rf_dagfuncs.c revision 1.13
1/* $NetBSD: rf_dagfuncs.c,v 1.13 2003/12/29 02:38:17 oster Exp $ */ 2/* 3 * Copyright (c) 1995 Carnegie-Mellon University. 4 * All rights reserved. 5 * 6 * Author: Mark Holland, William V. Courtright II 7 * 8 * Permission to use, copy, modify and distribute this software and 9 * its documentation is hereby granted, provided that both the copyright 10 * notice and this permission notice appear in all copies of the 11 * software, derivative works or modified versions, and any portions 12 * thereof, and that both notices appear in supporting documentation. 13 * 14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 17 * 18 * Carnegie Mellon requests users of this software to return to 19 * 20 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 21 * School of Computer Science 22 * Carnegie Mellon University 23 * Pittsburgh PA 15213-3890 24 * 25 * any improvements or extensions that they make and grant Carnegie the 26 * rights to redistribute these changes. 27 */ 28 29/* 30 * dagfuncs.c -- DAG node execution routines 31 * 32 * Rules: 33 * 1. Every DAG execution function must eventually cause node->status to 34 * get set to "good" or "bad", and "FinishNode" to be called. In the 35 * case of nodes that complete immediately (xor, NullNodeFunc, etc), 36 * the node execution function can do these two things directly. In 37 * the case of nodes that have to wait for some event (a disk read to 38 * complete, a lock to be released, etc) to occur before they can 39 * complete, this is typically achieved by having whatever module 40 * is doing the operation call GenericWakeupFunc upon completion. 41 * 2. DAG execution functions should check the status in the DAG header 42 * and NOP out their operations if the status is not "enable". However, 43 * execution functions that release resources must be sure to release 44 * them even when they NOP out the function that would use them. 45 * Functions that acquire resources should go ahead and acquire them 46 * even when they NOP, so that a downstream release node will not have 47 * to check to find out whether or not the acquire was suppressed. 48 */ 49 50#include <sys/cdefs.h> 51__KERNEL_RCSID(0, "$NetBSD: rf_dagfuncs.c,v 1.13 2003/12/29 02:38:17 oster Exp $"); 52 53#include <sys/param.h> 54#include <sys/ioctl.h> 55 56#include "rf_archs.h" 57#include "rf_raid.h" 58#include "rf_dag.h" 59#include "rf_layout.h" 60#include "rf_etimer.h" 61#include "rf_acctrace.h" 62#include "rf_diskqueue.h" 63#include "rf_dagfuncs.h" 64#include "rf_general.h" 65#include "rf_engine.h" 66#include "rf_dagutils.h" 67 68#include "rf_kintf.h" 69 70#if RF_INCLUDE_PARITYLOGGING > 0 71#include "rf_paritylog.h" 72#endif /* RF_INCLUDE_PARITYLOGGING > 0 */ 73 74int (*rf_DiskReadFunc) (RF_DagNode_t *); 75int (*rf_DiskWriteFunc) (RF_DagNode_t *); 76int (*rf_DiskReadUndoFunc) (RF_DagNode_t *); 77int (*rf_DiskWriteUndoFunc) (RF_DagNode_t *); 78int (*rf_DiskUnlockFunc) (RF_DagNode_t *); 79int (*rf_DiskUnlockUndoFunc) (RF_DagNode_t *); 80int (*rf_RegularXorUndoFunc) (RF_DagNode_t *); 81int (*rf_SimpleXorUndoFunc) (RF_DagNode_t *); 82int (*rf_RecoveryXorUndoFunc) (RF_DagNode_t *); 83 84/***************************************************************************************** 85 * main (only) configuration routine for this module 86 ****************************************************************************************/ 87int 88rf_ConfigureDAGFuncs(listp) 89 RF_ShutdownList_t **listp; 90{ 91 RF_ASSERT(((sizeof(long) == 8) && RF_LONGSHIFT == 3) || ((sizeof(long) == 4) && RF_LONGSHIFT == 2)); 92 rf_DiskReadFunc = rf_DiskReadFuncForThreads; 93 rf_DiskReadUndoFunc = rf_DiskUndoFunc; 94 rf_DiskWriteFunc = rf_DiskWriteFuncForThreads; 95 rf_DiskWriteUndoFunc = rf_DiskUndoFunc; 96 rf_DiskUnlockFunc = rf_DiskUnlockFuncForThreads; 97 rf_DiskUnlockUndoFunc = rf_NullNodeUndoFunc; 98 rf_RegularXorUndoFunc = rf_NullNodeUndoFunc; 99 rf_SimpleXorUndoFunc = rf_NullNodeUndoFunc; 100 rf_RecoveryXorUndoFunc = rf_NullNodeUndoFunc; 101 return (0); 102} 103 104 105 106/***************************************************************************************** 107 * the execution function associated with a terminate node 108 ****************************************************************************************/ 109int 110rf_TerminateFunc(node) 111 RF_DagNode_t *node; 112{ 113 RF_ASSERT(node->dagHdr->numCommits == node->dagHdr->numCommitNodes); 114 node->status = rf_good; 115 return (rf_FinishNode(node, RF_THREAD_CONTEXT)); 116} 117 118int 119rf_TerminateUndoFunc(node) 120 RF_DagNode_t *node; 121{ 122 return (0); 123} 124 125 126/***************************************************************************************** 127 * execution functions associated with a mirror node 128 * 129 * parameters: 130 * 131 * 0 - physical disk addres of data 132 * 1 - buffer for holding read data 133 * 2 - parity stripe ID 134 * 3 - flags 135 * 4 - physical disk address of mirror (parity) 136 * 137 ****************************************************************************************/ 138 139int 140rf_DiskReadMirrorIdleFunc(node) 141 RF_DagNode_t *node; 142{ 143 /* select the mirror copy with the shortest queue and fill in node 144 * parameters with physical disk address */ 145 146 rf_SelectMirrorDiskIdle(node); 147 return (rf_DiskReadFunc(node)); 148} 149 150#if (RF_INCLUDE_CHAINDECLUSTER > 0) || (RF_INCLUDE_INTERDECLUSTER > 0) || (RF_DEBUG_VALIDATE_DAG > 0) 151int 152rf_DiskReadMirrorPartitionFunc(node) 153 RF_DagNode_t *node; 154{ 155 /* select the mirror copy with the shortest queue and fill in node 156 * parameters with physical disk address */ 157 158 rf_SelectMirrorDiskPartition(node); 159 return (rf_DiskReadFunc(node)); 160} 161#endif 162 163int 164rf_DiskReadMirrorUndoFunc(node) 165 RF_DagNode_t *node; 166{ 167 return (0); 168} 169 170 171 172#if RF_INCLUDE_PARITYLOGGING > 0 173/***************************************************************************************** 174 * the execution function associated with a parity log update node 175 ****************************************************************************************/ 176int 177rf_ParityLogUpdateFunc(node) 178 RF_DagNode_t *node; 179{ 180 RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p; 181 caddr_t buf = (caddr_t) node->params[1].p; 182 RF_ParityLogData_t *logData; 183 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; 184 RF_Etimer_t timer; 185 186 if (node->dagHdr->status == rf_enable) { 187 RF_ETIMER_START(timer); 188 logData = rf_CreateParityLogData(RF_UPDATE, pda, buf, 189 (RF_Raid_t *) (node->dagHdr->raidPtr), 190 node->wakeFunc, (void *) node, 191 node->dagHdr->tracerec, timer); 192 if (logData) 193 rf_ParityLogAppend(logData, RF_FALSE, NULL, RF_FALSE); 194 else { 195 RF_ETIMER_STOP(timer); 196 RF_ETIMER_EVAL(timer); 197 tracerec->plog_us += RF_ETIMER_VAL_US(timer); 198 (node->wakeFunc) (node, ENOMEM); 199 } 200 } 201 return (0); 202} 203 204 205/***************************************************************************************** 206 * the execution function associated with a parity log overwrite node 207 ****************************************************************************************/ 208int 209rf_ParityLogOverwriteFunc(node) 210 RF_DagNode_t *node; 211{ 212 RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p; 213 caddr_t buf = (caddr_t) node->params[1].p; 214 RF_ParityLogData_t *logData; 215 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; 216 RF_Etimer_t timer; 217 218 if (node->dagHdr->status == rf_enable) { 219 RF_ETIMER_START(timer); 220 logData = rf_CreateParityLogData(RF_OVERWRITE, pda, buf, (RF_Raid_t *) (node->dagHdr->raidPtr), 221 node->wakeFunc, (void *) node, node->dagHdr->tracerec, timer); 222 if (logData) 223 rf_ParityLogAppend(logData, RF_FALSE, NULL, RF_FALSE); 224 else { 225 RF_ETIMER_STOP(timer); 226 RF_ETIMER_EVAL(timer); 227 tracerec->plog_us += RF_ETIMER_VAL_US(timer); 228 (node->wakeFunc) (node, ENOMEM); 229 } 230 } 231 return (0); 232} 233 234int 235rf_ParityLogUpdateUndoFunc(node) 236 RF_DagNode_t *node; 237{ 238 return (0); 239} 240 241int 242rf_ParityLogOverwriteUndoFunc(node) 243 RF_DagNode_t *node; 244{ 245 return (0); 246} 247#endif /* RF_INCLUDE_PARITYLOGGING > 0 */ 248 249/***************************************************************************************** 250 * the execution function associated with a NOP node 251 ****************************************************************************************/ 252int 253rf_NullNodeFunc(node) 254 RF_DagNode_t *node; 255{ 256 node->status = rf_good; 257 return (rf_FinishNode(node, RF_THREAD_CONTEXT)); 258} 259 260int 261rf_NullNodeUndoFunc(node) 262 RF_DagNode_t *node; 263{ 264 node->status = rf_undone; 265 return (rf_FinishNode(node, RF_THREAD_CONTEXT)); 266} 267 268 269/***************************************************************************************** 270 * the execution function associated with a disk-read node 271 ****************************************************************************************/ 272int 273rf_DiskReadFuncForThreads(node) 274 RF_DagNode_t *node; 275{ 276 RF_DiskQueueData_t *req; 277 RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p; 278 caddr_t buf = (caddr_t) node->params[1].p; 279 RF_StripeNum_t parityStripeID = (RF_StripeNum_t) node->params[2].v; 280 unsigned priority = RF_EXTRACT_PRIORITY(node->params[3].v); 281 unsigned lock = RF_EXTRACT_LOCK_FLAG(node->params[3].v); 282 unsigned unlock = RF_EXTRACT_UNLOCK_FLAG(node->params[3].v); 283 unsigned which_ru = RF_EXTRACT_RU(node->params[3].v); 284 RF_DiskQueueDataFlags_t flags = 0; 285 RF_IoType_t iotype = (node->dagHdr->status == rf_enable) ? RF_IO_TYPE_READ : RF_IO_TYPE_NOP; 286 RF_DiskQueue_t *dqs = ((RF_Raid_t *) (node->dagHdr->raidPtr))->Queues; 287 void *b_proc = NULL; 288 289 if (node->dagHdr->bp) 290 b_proc = (void *) ((struct buf *) node->dagHdr->bp)->b_proc; 291 292 RF_ASSERT(!(lock && unlock)); 293 flags |= (lock) ? RF_LOCK_DISK_QUEUE : 0; 294 flags |= (unlock) ? RF_UNLOCK_DISK_QUEUE : 0; 295 296 req = rf_CreateDiskQueueData(iotype, pda->startSector, pda->numSector, 297 buf, parityStripeID, which_ru, 298 (int (*) (void *, int)) node->wakeFunc, 299 node, NULL, node->dagHdr->tracerec, 300 (void *) (node->dagHdr->raidPtr), flags, b_proc); 301 if (!req) { 302 (node->wakeFunc) (node, ENOMEM); 303 } else { 304 node->dagFuncData = (void *) req; 305 rf_DiskIOEnqueue(&(dqs[pda->col]), req, priority); 306 } 307 return (0); 308} 309 310 311/***************************************************************************************** 312 * the execution function associated with a disk-write node 313 ****************************************************************************************/ 314int 315rf_DiskWriteFuncForThreads(node) 316 RF_DagNode_t *node; 317{ 318 RF_DiskQueueData_t *req; 319 RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p; 320 caddr_t buf = (caddr_t) node->params[1].p; 321 RF_StripeNum_t parityStripeID = (RF_StripeNum_t) node->params[2].v; 322 unsigned priority = RF_EXTRACT_PRIORITY(node->params[3].v); 323 unsigned lock = RF_EXTRACT_LOCK_FLAG(node->params[3].v); 324 unsigned unlock = RF_EXTRACT_UNLOCK_FLAG(node->params[3].v); 325 unsigned which_ru = RF_EXTRACT_RU(node->params[3].v); 326 RF_DiskQueueDataFlags_t flags = 0; 327 RF_IoType_t iotype = (node->dagHdr->status == rf_enable) ? RF_IO_TYPE_WRITE : RF_IO_TYPE_NOP; 328 RF_DiskQueue_t *dqs = ((RF_Raid_t *) (node->dagHdr->raidPtr))->Queues; 329 void *b_proc = NULL; 330 331 if (node->dagHdr->bp) 332 b_proc = (void *) ((struct buf *) node->dagHdr->bp)->b_proc; 333 334 /* normal processing (rollaway or forward recovery) begins here */ 335 RF_ASSERT(!(lock && unlock)); 336 flags |= (lock) ? RF_LOCK_DISK_QUEUE : 0; 337 flags |= (unlock) ? RF_UNLOCK_DISK_QUEUE : 0; 338 req = rf_CreateDiskQueueData(iotype, pda->startSector, pda->numSector, 339 buf, parityStripeID, which_ru, 340 (int (*) (void *, int)) node->wakeFunc, 341 (void *) node, NULL, 342 node->dagHdr->tracerec, 343 (void *) (node->dagHdr->raidPtr), 344 flags, b_proc); 345 346 if (!req) { 347 (node->wakeFunc) (node, ENOMEM); 348 } else { 349 node->dagFuncData = (void *) req; 350 rf_DiskIOEnqueue(&(dqs[pda->col]), req, priority); 351 } 352 353 return (0); 354} 355/***************************************************************************************** 356 * the undo function for disk nodes 357 * Note: this is not a proper undo of a write node, only locks are released. 358 * old data is not restored to disk! 359 ****************************************************************************************/ 360int 361rf_DiskUndoFunc(node) 362 RF_DagNode_t *node; 363{ 364 RF_DiskQueueData_t *req; 365 RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p; 366 RF_DiskQueue_t *dqs = ((RF_Raid_t *) (node->dagHdr->raidPtr))->Queues; 367 368 req = rf_CreateDiskQueueData(RF_IO_TYPE_NOP, 369 0L, 0, NULL, 0L, 0, 370 (int (*) (void *, int)) node->wakeFunc, 371 (void *) node, 372 NULL, node->dagHdr->tracerec, 373 (void *) (node->dagHdr->raidPtr), 374 RF_UNLOCK_DISK_QUEUE, NULL); 375 if (!req) 376 (node->wakeFunc) (node, ENOMEM); 377 else { 378 node->dagFuncData = (void *) req; 379 rf_DiskIOEnqueue(&(dqs[pda->col]), req, RF_IO_NORMAL_PRIORITY); 380 } 381 382 return (0); 383} 384/***************************************************************************************** 385 * the execution function associated with an "unlock disk queue" node 386 ****************************************************************************************/ 387int 388rf_DiskUnlockFuncForThreads(node) 389 RF_DagNode_t *node; 390{ 391 RF_DiskQueueData_t *req; 392 RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p; 393 RF_DiskQueue_t *dqs = ((RF_Raid_t *) (node->dagHdr->raidPtr))->Queues; 394 395 req = rf_CreateDiskQueueData(RF_IO_TYPE_NOP, 396 0L, 0, NULL, 0L, 0, 397 (int (*) (void *, int)) node->wakeFunc, 398 (void *) node, 399 NULL, node->dagHdr->tracerec, 400 (void *) (node->dagHdr->raidPtr), 401 RF_UNLOCK_DISK_QUEUE, NULL); 402 if (!req) 403 (node->wakeFunc) (node, ENOMEM); 404 else { 405 node->dagFuncData = (void *) req; 406 rf_DiskIOEnqueue(&(dqs[pda->col]), req, RF_IO_NORMAL_PRIORITY); 407 } 408 409 return (0); 410} 411/***************************************************************************************** 412 * Callback routine for DiskRead and DiskWrite nodes. When the disk op completes, 413 * the routine is called to set the node status and inform the execution engine that 414 * the node has fired. 415 ****************************************************************************************/ 416int 417rf_GenericWakeupFunc(node, status) 418 RF_DagNode_t *node; 419 int status; 420{ 421 switch (node->status) { 422 case rf_bwd1: 423 node->status = rf_bwd2; 424 if (node->dagFuncData) 425 rf_FreeDiskQueueData((RF_DiskQueueData_t *) node->dagFuncData); 426 return (rf_DiskWriteFuncForThreads(node)); 427 case rf_fired: 428 if (status) 429 node->status = rf_bad; 430 else 431 node->status = rf_good; 432 break; 433 case rf_recover: 434 /* probably should never reach this case */ 435 if (status) 436 node->status = rf_panic; 437 else 438 node->status = rf_undone; 439 break; 440 default: 441 printf("rf_GenericWakeupFunc:"); 442 printf("node->status is %d,", node->status); 443 printf("status is %d \n", status); 444 RF_PANIC(); 445 break; 446 } 447 if (node->dagFuncData) 448 rf_FreeDiskQueueData((RF_DiskQueueData_t *) node->dagFuncData); 449 return (rf_FinishNode(node, RF_INTR_CONTEXT)); 450} 451 452 453/***************************************************************************************** 454 * there are three distinct types of xor nodes 455 * A "regular xor" is used in the fault-free case where the access spans a complete 456 * stripe unit. It assumes that the result buffer is one full stripe unit in size, 457 * and uses the stripe-unit-offset values that it computes from the PDAs to determine 458 * where within the stripe unit to XOR each argument buffer. 459 * 460 * A "simple xor" is used in the fault-free case where the access touches only a portion 461 * of one (or two, in some cases) stripe unit(s). It assumes that all the argument 462 * buffers are of the same size and have the same stripe unit offset. 463 * 464 * A "recovery xor" is used in the degraded-mode case. It's similar to the regular 465 * xor function except that it takes the failed PDA as an additional parameter, and 466 * uses it to determine what portions of the argument buffers need to be xor'd into 467 * the result buffer, and where in the result buffer they should go. 468 ****************************************************************************************/ 469 470/* xor the params together and store the result in the result field. 471 * assume the result field points to a buffer that is the size of one SU, 472 * and use the pda params to determine where within the buffer to XOR 473 * the input buffers. 474 */ 475int 476rf_RegularXorFunc(node) 477 RF_DagNode_t *node; 478{ 479 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p; 480 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; 481 RF_Etimer_t timer; 482 int i, retcode; 483 484 retcode = 0; 485 if (node->dagHdr->status == rf_enable) { 486 /* don't do the XOR if the input is the same as the output */ 487 RF_ETIMER_START(timer); 488 for (i = 0; i < node->numParams - 1; i += 2) 489 if (node->params[i + 1].p != node->results[0]) { 490 retcode = rf_XorIntoBuffer(raidPtr, (RF_PhysDiskAddr_t *) node->params[i].p, 491 (char *) node->params[i + 1].p, (char *) node->results[0], node->dagHdr->bp); 492 } 493 RF_ETIMER_STOP(timer); 494 RF_ETIMER_EVAL(timer); 495 tracerec->xor_us += RF_ETIMER_VAL_US(timer); 496 } 497 return (rf_GenericWakeupFunc(node, retcode)); /* call wake func 498 * explicitly since no 499 * I/O in this node */ 500} 501/* xor the inputs into the result buffer, ignoring placement issues */ 502int 503rf_SimpleXorFunc(node) 504 RF_DagNode_t *node; 505{ 506 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p; 507 int i, retcode = 0; 508 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; 509 RF_Etimer_t timer; 510 511 if (node->dagHdr->status == rf_enable) { 512 RF_ETIMER_START(timer); 513 /* don't do the XOR if the input is the same as the output */ 514 for (i = 0; i < node->numParams - 1; i += 2) 515 if (node->params[i + 1].p != node->results[0]) { 516 retcode = rf_bxor((char *) node->params[i + 1].p, (char *) node->results[0], 517 rf_RaidAddressToByte(raidPtr, ((RF_PhysDiskAddr_t *) node->params[i].p)->numSector), 518 (struct buf *) node->dagHdr->bp); 519 } 520 RF_ETIMER_STOP(timer); 521 RF_ETIMER_EVAL(timer); 522 tracerec->xor_us += RF_ETIMER_VAL_US(timer); 523 } 524 return (rf_GenericWakeupFunc(node, retcode)); /* call wake func 525 * explicitly since no 526 * I/O in this node */ 527} 528/* this xor is used by the degraded-mode dag functions to recover lost data. 529 * the second-to-last parameter is the PDA for the failed portion of the access. 530 * the code here looks at this PDA and assumes that the xor target buffer is 531 * equal in size to the number of sectors in the failed PDA. It then uses 532 * the other PDAs in the parameter list to determine where within the target 533 * buffer the corresponding data should be xored. 534 */ 535int 536rf_RecoveryXorFunc(node) 537 RF_DagNode_t *node; 538{ 539 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p; 540 RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout; 541 RF_PhysDiskAddr_t *failedPDA = (RF_PhysDiskAddr_t *) node->params[node->numParams - 2].p; 542 int i, retcode = 0; 543 RF_PhysDiskAddr_t *pda; 544 int suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr, failedPDA->startSector); 545 char *srcbuf, *destbuf; 546 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; 547 RF_Etimer_t timer; 548 549 if (node->dagHdr->status == rf_enable) { 550 RF_ETIMER_START(timer); 551 for (i = 0; i < node->numParams - 2; i += 2) 552 if (node->params[i + 1].p != node->results[0]) { 553 pda = (RF_PhysDiskAddr_t *) node->params[i].p; 554 srcbuf = (char *) node->params[i + 1].p; 555 suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector); 556 destbuf = ((char *) node->results[0]) + rf_RaidAddressToByte(raidPtr, suoffset - failedSUOffset); 557 retcode = rf_bxor(srcbuf, destbuf, rf_RaidAddressToByte(raidPtr, pda->numSector), node->dagHdr->bp); 558 } 559 RF_ETIMER_STOP(timer); 560 RF_ETIMER_EVAL(timer); 561 tracerec->xor_us += RF_ETIMER_VAL_US(timer); 562 } 563 return (rf_GenericWakeupFunc(node, retcode)); 564} 565/***************************************************************************************** 566 * The next three functions are utilities used by the above xor-execution functions. 567 ****************************************************************************************/ 568 569 570/* 571 * this is just a glorified buffer xor. targbuf points to a buffer that is one full stripe unit 572 * in size. srcbuf points to a buffer that may be less than 1 SU, but never more. When the 573 * access described by pda is one SU in size (which by implication means it's SU-aligned), 574 * all that happens is (targbuf) <- (srcbuf ^ targbuf). When the access is less than one 575 * SU in size the XOR occurs on only the portion of targbuf identified in the pda. 576 */ 577 578int 579rf_XorIntoBuffer(raidPtr, pda, srcbuf, targbuf, bp) 580 RF_Raid_t *raidPtr; 581 RF_PhysDiskAddr_t *pda; 582 char *srcbuf; 583 char *targbuf; 584 void *bp; 585{ 586 char *targptr; 587 int sectPerSU = raidPtr->Layout.sectorsPerStripeUnit; 588 int SUOffset = pda->startSector % sectPerSU; 589 int length, retcode = 0; 590 591 RF_ASSERT(pda->numSector <= sectPerSU); 592 593 targptr = targbuf + rf_RaidAddressToByte(raidPtr, SUOffset); 594 length = rf_RaidAddressToByte(raidPtr, pda->numSector); 595 retcode = rf_bxor(srcbuf, targptr, length, bp); 596 return (retcode); 597} 598/* it really should be the case that the buffer pointers (returned by malloc) 599 * are aligned to the natural word size of the machine, so this is the only 600 * case we optimize for. The length should always be a multiple of the sector 601 * size, so there should be no problem with leftover bytes at the end. 602 */ 603int 604rf_bxor(src, dest, len, bp) 605 char *src; 606 char *dest; 607 int len; 608 void *bp; 609{ 610 unsigned mask = sizeof(long) - 1, retcode = 0; 611 612 if (!(((unsigned long) src) & mask) && !(((unsigned long) dest) & mask) && !(len & mask)) { 613 retcode = rf_longword_bxor((unsigned long *) src, (unsigned long *) dest, len >> RF_LONGSHIFT, bp); 614 } else { 615 RF_ASSERT(0); 616 } 617 return (retcode); 618} 619/* map a user buffer into kernel space, if necessary */ 620#define REMAP_VA(_bp,x,y) (y) = (x) 621 622/* When XORing in kernel mode, we need to map each user page to kernel space before we can access it. 623 * We don't want to assume anything about which input buffers are in kernel/user 624 * space, nor about their alignment, so in each loop we compute the maximum number 625 * of bytes that we can xor without crossing any page boundaries, and do only this many 626 * bytes before the next remap. 627 */ 628int 629rf_longword_bxor(src, dest, len, bp) 630 unsigned long *src; 631 unsigned long *dest; 632 int len; /* longwords */ 633 void *bp; 634{ 635 unsigned long *end = src + len; 636 unsigned long d0, d1, d2, d3, s0, s1, s2, s3; /* temps */ 637 unsigned long *pg_src, *pg_dest; /* per-page source/dest 638 * pointers */ 639 int longs_this_time;/* # longwords to xor in the current iteration */ 640 641 REMAP_VA(bp, src, pg_src); 642 REMAP_VA(bp, dest, pg_dest); 643 if (!pg_src || !pg_dest) 644 return (EFAULT); 645 646 while (len >= 4) { 647 longs_this_time = RF_MIN(len, RF_MIN(RF_BLIP(pg_src), RF_BLIP(pg_dest)) >> RF_LONGSHIFT); /* note len in longwords */ 648 src += longs_this_time; 649 dest += longs_this_time; 650 len -= longs_this_time; 651 while (longs_this_time >= 4) { 652 d0 = pg_dest[0]; 653 d1 = pg_dest[1]; 654 d2 = pg_dest[2]; 655 d3 = pg_dest[3]; 656 s0 = pg_src[0]; 657 s1 = pg_src[1]; 658 s2 = pg_src[2]; 659 s3 = pg_src[3]; 660 pg_dest[0] = d0 ^ s0; 661 pg_dest[1] = d1 ^ s1; 662 pg_dest[2] = d2 ^ s2; 663 pg_dest[3] = d3 ^ s3; 664 pg_src += 4; 665 pg_dest += 4; 666 longs_this_time -= 4; 667 } 668 while (longs_this_time > 0) { /* cannot cross any page 669 * boundaries here */ 670 *pg_dest++ ^= *pg_src++; 671 longs_this_time--; 672 } 673 674 /* either we're done, or we've reached a page boundary on one 675 * (or possibly both) of the pointers */ 676 if (len) { 677 if (RF_PAGE_ALIGNED(src)) 678 REMAP_VA(bp, src, pg_src); 679 if (RF_PAGE_ALIGNED(dest)) 680 REMAP_VA(bp, dest, pg_dest); 681 if (!pg_src || !pg_dest) 682 return (EFAULT); 683 } 684 } 685 while (src < end) { 686 *pg_dest++ ^= *pg_src++; 687 src++; 688 dest++; 689 len--; 690 if (RF_PAGE_ALIGNED(src)) 691 REMAP_VA(bp, src, pg_src); 692 if (RF_PAGE_ALIGNED(dest)) 693 REMAP_VA(bp, dest, pg_dest); 694 } 695 RF_ASSERT(len == 0); 696 return (0); 697} 698 699#if 0 700/* 701 dst = a ^ b ^ c; 702 a may equal dst 703 see comment above longword_bxor 704*/ 705int 706rf_longword_bxor3(dst, a, b, c, len, bp) 707 unsigned long *dst; 708 unsigned long *a; 709 unsigned long *b; 710 unsigned long *c; 711 int len; /* length in longwords */ 712 void *bp; 713{ 714 unsigned long a0, a1, a2, a3, b0, b1, b2, b3; 715 unsigned long *pg_a, *pg_b, *pg_c, *pg_dst; /* per-page source/dest 716 * pointers */ 717 int longs_this_time;/* # longs to xor in the current iteration */ 718 char dst_is_a = 0; 719 720 REMAP_VA(bp, a, pg_a); 721 REMAP_VA(bp, b, pg_b); 722 REMAP_VA(bp, c, pg_c); 723 if (a == dst) { 724 pg_dst = pg_a; 725 dst_is_a = 1; 726 } else { 727 REMAP_VA(bp, dst, pg_dst); 728 } 729 730 /* align dest to cache line. Can't cross a pg boundary on dst here. */ 731 while ((((unsigned long) pg_dst) & 0x1f)) { 732 *pg_dst++ = *pg_a++ ^ *pg_b++ ^ *pg_c++; 733 dst++; 734 a++; 735 b++; 736 c++; 737 if (RF_PAGE_ALIGNED(a)) { 738 REMAP_VA(bp, a, pg_a); 739 if (!pg_a) 740 return (EFAULT); 741 } 742 if (RF_PAGE_ALIGNED(b)) { 743 REMAP_VA(bp, a, pg_b); 744 if (!pg_b) 745 return (EFAULT); 746 } 747 if (RF_PAGE_ALIGNED(c)) { 748 REMAP_VA(bp, a, pg_c); 749 if (!pg_c) 750 return (EFAULT); 751 } 752 len--; 753 } 754 755 while (len > 4) { 756 longs_this_time = RF_MIN(len, RF_MIN(RF_BLIP(a), RF_MIN(RF_BLIP(b), RF_MIN(RF_BLIP(c), RF_BLIP(dst)))) >> RF_LONGSHIFT); 757 a += longs_this_time; 758 b += longs_this_time; 759 c += longs_this_time; 760 dst += longs_this_time; 761 len -= longs_this_time; 762 while (longs_this_time >= 4) { 763 a0 = pg_a[0]; 764 longs_this_time -= 4; 765 766 a1 = pg_a[1]; 767 a2 = pg_a[2]; 768 769 a3 = pg_a[3]; 770 pg_a += 4; 771 772 b0 = pg_b[0]; 773 b1 = pg_b[1]; 774 775 b2 = pg_b[2]; 776 b3 = pg_b[3]; 777 /* start dual issue */ 778 a0 ^= b0; 779 b0 = pg_c[0]; 780 781 pg_b += 4; 782 a1 ^= b1; 783 784 a2 ^= b2; 785 a3 ^= b3; 786 787 b1 = pg_c[1]; 788 a0 ^= b0; 789 790 b2 = pg_c[2]; 791 a1 ^= b1; 792 793 b3 = pg_c[3]; 794 a2 ^= b2; 795 796 pg_dst[0] = a0; 797 a3 ^= b3; 798 pg_dst[1] = a1; 799 pg_c += 4; 800 pg_dst[2] = a2; 801 pg_dst[3] = a3; 802 pg_dst += 4; 803 } 804 while (longs_this_time > 0) { /* cannot cross any page 805 * boundaries here */ 806 *pg_dst++ = *pg_a++ ^ *pg_b++ ^ *pg_c++; 807 longs_this_time--; 808 } 809 810 if (len) { 811 if (RF_PAGE_ALIGNED(a)) { 812 REMAP_VA(bp, a, pg_a); 813 if (!pg_a) 814 return (EFAULT); 815 if (dst_is_a) 816 pg_dst = pg_a; 817 } 818 if (RF_PAGE_ALIGNED(b)) { 819 REMAP_VA(bp, b, pg_b); 820 if (!pg_b) 821 return (EFAULT); 822 } 823 if (RF_PAGE_ALIGNED(c)) { 824 REMAP_VA(bp, c, pg_c); 825 if (!pg_c) 826 return (EFAULT); 827 } 828 if (!dst_is_a) 829 if (RF_PAGE_ALIGNED(dst)) { 830 REMAP_VA(bp, dst, pg_dst); 831 if (!pg_dst) 832 return (EFAULT); 833 } 834 } 835 } 836 while (len) { 837 *pg_dst++ = *pg_a++ ^ *pg_b++ ^ *pg_c++; 838 dst++; 839 a++; 840 b++; 841 c++; 842 if (RF_PAGE_ALIGNED(a)) { 843 REMAP_VA(bp, a, pg_a); 844 if (!pg_a) 845 return (EFAULT); 846 if (dst_is_a) 847 pg_dst = pg_a; 848 } 849 if (RF_PAGE_ALIGNED(b)) { 850 REMAP_VA(bp, b, pg_b); 851 if (!pg_b) 852 return (EFAULT); 853 } 854 if (RF_PAGE_ALIGNED(c)) { 855 REMAP_VA(bp, c, pg_c); 856 if (!pg_c) 857 return (EFAULT); 858 } 859 if (!dst_is_a) 860 if (RF_PAGE_ALIGNED(dst)) { 861 REMAP_VA(bp, dst, pg_dst); 862 if (!pg_dst) 863 return (EFAULT); 864 } 865 len--; 866 } 867 return (0); 868} 869 870int 871rf_bxor3(dst, a, b, c, len, bp) 872 unsigned char *dst; 873 unsigned char *a; 874 unsigned char *b; 875 unsigned char *c; 876 unsigned long len; 877 void *bp; 878{ 879 RF_ASSERT(((RF_UL(dst) | RF_UL(a) | RF_UL(b) | RF_UL(c) | len) & 0x7) == 0); 880 881 return (rf_longword_bxor3((unsigned long *) dst, (unsigned long *) a, 882 (unsigned long *) b, (unsigned long *) c, len >> RF_LONGSHIFT, bp)); 883} 884#endif 885