rf_dagfuncs.c revision 1.16
1/* $NetBSD: rf_dagfuncs.c,v 1.16 2003/12/30 23:40:20 oster Exp $ */ 2/* 3 * Copyright (c) 1995 Carnegie-Mellon University. 4 * All rights reserved. 5 * 6 * Author: Mark Holland, William V. Courtright II 7 * 8 * Permission to use, copy, modify and distribute this software and 9 * its documentation is hereby granted, provided that both the copyright 10 * notice and this permission notice appear in all copies of the 11 * software, derivative works or modified versions, and any portions 12 * thereof, and that both notices appear in supporting documentation. 13 * 14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 17 * 18 * Carnegie Mellon requests users of this software to return to 19 * 20 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 21 * School of Computer Science 22 * Carnegie Mellon University 23 * Pittsburgh PA 15213-3890 24 * 25 * any improvements or extensions that they make and grant Carnegie the 26 * rights to redistribute these changes. 27 */ 28 29/* 30 * dagfuncs.c -- DAG node execution routines 31 * 32 * Rules: 33 * 1. Every DAG execution function must eventually cause node->status to 34 * get set to "good" or "bad", and "FinishNode" to be called. In the 35 * case of nodes that complete immediately (xor, NullNodeFunc, etc), 36 * the node execution function can do these two things directly. In 37 * the case of nodes that have to wait for some event (a disk read to 38 * complete, a lock to be released, etc) to occur before they can 39 * complete, this is typically achieved by having whatever module 40 * is doing the operation call GenericWakeupFunc upon completion. 41 * 2. DAG execution functions should check the status in the DAG header 42 * and NOP out their operations if the status is not "enable". However, 43 * execution functions that release resources must be sure to release 44 * them even when they NOP out the function that would use them. 45 * Functions that acquire resources should go ahead and acquire them 46 * even when they NOP, so that a downstream release node will not have 47 * to check to find out whether or not the acquire was suppressed. 48 */ 49 50#include <sys/cdefs.h> 51__KERNEL_RCSID(0, "$NetBSD: rf_dagfuncs.c,v 1.16 2003/12/30 23:40:20 oster Exp $"); 52 53#include <sys/param.h> 54#include <sys/ioctl.h> 55 56#include "rf_archs.h" 57#include "rf_raid.h" 58#include "rf_dag.h" 59#include "rf_layout.h" 60#include "rf_etimer.h" 61#include "rf_acctrace.h" 62#include "rf_diskqueue.h" 63#include "rf_dagfuncs.h" 64#include "rf_general.h" 65#include "rf_engine.h" 66#include "rf_dagutils.h" 67 68#include "rf_kintf.h" 69 70#if RF_INCLUDE_PARITYLOGGING > 0 71#include "rf_paritylog.h" 72#endif /* RF_INCLUDE_PARITYLOGGING > 0 */ 73 74int (*rf_DiskReadFunc) (RF_DagNode_t *); 75int (*rf_DiskWriteFunc) (RF_DagNode_t *); 76int (*rf_DiskReadUndoFunc) (RF_DagNode_t *); 77int (*rf_DiskWriteUndoFunc) (RF_DagNode_t *); 78int (*rf_DiskUnlockFunc) (RF_DagNode_t *); 79int (*rf_DiskUnlockUndoFunc) (RF_DagNode_t *); 80int (*rf_RegularXorUndoFunc) (RF_DagNode_t *); 81int (*rf_SimpleXorUndoFunc) (RF_DagNode_t *); 82int (*rf_RecoveryXorUndoFunc) (RF_DagNode_t *); 83 84/***************************************************************************** 85 * main (only) configuration routine for this module 86 ****************************************************************************/ 87int 88rf_ConfigureDAGFuncs(RF_ShutdownList_t **listp) 89{ 90 RF_ASSERT(((sizeof(long) == 8) && RF_LONGSHIFT == 3) || 91 ((sizeof(long) == 4) && RF_LONGSHIFT == 2)); 92 rf_DiskReadFunc = rf_DiskReadFuncForThreads; 93 rf_DiskReadUndoFunc = rf_DiskUndoFunc; 94 rf_DiskWriteFunc = rf_DiskWriteFuncForThreads; 95 rf_DiskWriteUndoFunc = rf_DiskUndoFunc; 96 rf_DiskUnlockFunc = rf_DiskUnlockFuncForThreads; 97 rf_DiskUnlockUndoFunc = rf_NullNodeUndoFunc; 98 rf_RegularXorUndoFunc = rf_NullNodeUndoFunc; 99 rf_SimpleXorUndoFunc = rf_NullNodeUndoFunc; 100 rf_RecoveryXorUndoFunc = rf_NullNodeUndoFunc; 101 return (0); 102} 103 104 105 106/***************************************************************************** 107 * the execution function associated with a terminate node 108 ****************************************************************************/ 109int 110rf_TerminateFunc(RF_DagNode_t *node) 111{ 112 RF_ASSERT(node->dagHdr->numCommits == node->dagHdr->numCommitNodes); 113 node->status = rf_good; 114 return (rf_FinishNode(node, RF_THREAD_CONTEXT)); 115} 116 117int 118rf_TerminateUndoFunc(RF_DagNode_t *node) 119{ 120 return (0); 121} 122 123 124/***************************************************************************** 125 * execution functions associated with a mirror node 126 * 127 * parameters: 128 * 129 * 0 - physical disk addres of data 130 * 1 - buffer for holding read data 131 * 2 - parity stripe ID 132 * 3 - flags 133 * 4 - physical disk address of mirror (parity) 134 * 135 ****************************************************************************/ 136 137int 138rf_DiskReadMirrorIdleFunc(RF_DagNode_t *node) 139{ 140 /* select the mirror copy with the shortest queue and fill in node 141 * parameters with physical disk address */ 142 143 rf_SelectMirrorDiskIdle(node); 144 return (rf_DiskReadFunc(node)); 145} 146 147#if (RF_INCLUDE_CHAINDECLUSTER > 0) || (RF_INCLUDE_INTERDECLUSTER > 0) || (RF_DEBUG_VALIDATE_DAG > 0) 148int 149rf_DiskReadMirrorPartitionFunc(RF_DagNode_t *node) 150{ 151 /* select the mirror copy with the shortest queue and fill in node 152 * parameters with physical disk address */ 153 154 rf_SelectMirrorDiskPartition(node); 155 return (rf_DiskReadFunc(node)); 156} 157#endif 158 159int 160rf_DiskReadMirrorUndoFunc(RF_DagNode_t *node) 161{ 162 return (0); 163} 164 165 166 167#if RF_INCLUDE_PARITYLOGGING > 0 168/***************************************************************************** 169 * the execution function associated with a parity log update node 170 ****************************************************************************/ 171int 172rf_ParityLogUpdateFunc(RF_DagNode_t *node) 173{ 174 RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p; 175 caddr_t buf = (caddr_t) node->params[1].p; 176 RF_ParityLogData_t *logData; 177 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; 178 RF_Etimer_t timer; 179 180 if (node->dagHdr->status == rf_enable) { 181 RF_ETIMER_START(timer); 182 logData = rf_CreateParityLogData(RF_UPDATE, pda, buf, 183 (RF_Raid_t *) (node->dagHdr->raidPtr), 184 node->wakeFunc, (void *) node, 185 node->dagHdr->tracerec, timer); 186 if (logData) 187 rf_ParityLogAppend(logData, RF_FALSE, NULL, RF_FALSE); 188 else { 189 RF_ETIMER_STOP(timer); 190 RF_ETIMER_EVAL(timer); 191 tracerec->plog_us += RF_ETIMER_VAL_US(timer); 192 (node->wakeFunc) (node, ENOMEM); 193 } 194 } 195 return (0); 196} 197 198 199/***************************************************************************** 200 * the execution function associated with a parity log overwrite node 201 ****************************************************************************/ 202int 203rf_ParityLogOverwriteFunc(RF_DagNode_t *node) 204{ 205 RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p; 206 caddr_t buf = (caddr_t) node->params[1].p; 207 RF_ParityLogData_t *logData; 208 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; 209 RF_Etimer_t timer; 210 211 if (node->dagHdr->status == rf_enable) { 212 RF_ETIMER_START(timer); 213 logData = rf_CreateParityLogData(RF_OVERWRITE, pda, buf, 214(RF_Raid_t *) (node->dagHdr->raidPtr), 215 node->wakeFunc, (void *) node, node->dagHdr->tracerec, timer); 216 if (logData) 217 rf_ParityLogAppend(logData, RF_FALSE, NULL, RF_FALSE); 218 else { 219 RF_ETIMER_STOP(timer); 220 RF_ETIMER_EVAL(timer); 221 tracerec->plog_us += RF_ETIMER_VAL_US(timer); 222 (node->wakeFunc) (node, ENOMEM); 223 } 224 } 225 return (0); 226} 227 228int 229rf_ParityLogUpdateUndoFunc(RF_DagNode_t *node) 230{ 231 return (0); 232} 233 234int 235rf_ParityLogOverwriteUndoFunc(RF_DagNode_t *node) 236{ 237 return (0); 238} 239#endif /* RF_INCLUDE_PARITYLOGGING > 0 */ 240 241/***************************************************************************** 242 * the execution function associated with a NOP node 243 ****************************************************************************/ 244int 245rf_NullNodeFunc(RF_DagNode_t *node) 246{ 247 node->status = rf_good; 248 return (rf_FinishNode(node, RF_THREAD_CONTEXT)); 249} 250 251int 252rf_NullNodeUndoFunc(RF_DagNode_t *node) 253{ 254 node->status = rf_undone; 255 return (rf_FinishNode(node, RF_THREAD_CONTEXT)); 256} 257 258 259/***************************************************************************** 260 * the execution function associated with a disk-read node 261 ****************************************************************************/ 262int 263rf_DiskReadFuncForThreads(RF_DagNode_t *node) 264{ 265 RF_DiskQueueData_t *req; 266 RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p; 267 caddr_t buf = (caddr_t) node->params[1].p; 268 RF_StripeNum_t parityStripeID = (RF_StripeNum_t) node->params[2].v; 269 unsigned priority = RF_EXTRACT_PRIORITY(node->params[3].v); 270 unsigned lock = RF_EXTRACT_LOCK_FLAG(node->params[3].v); 271 unsigned unlock = RF_EXTRACT_UNLOCK_FLAG(node->params[3].v); 272 unsigned which_ru = RF_EXTRACT_RU(node->params[3].v); 273 RF_DiskQueueDataFlags_t flags = 0; 274 RF_IoType_t iotype = (node->dagHdr->status == rf_enable) ? RF_IO_TYPE_READ : RF_IO_TYPE_NOP; 275 RF_DiskQueue_t *dqs = ((RF_Raid_t *) (node->dagHdr->raidPtr))->Queues; 276 void *b_proc = NULL; 277 278 if (node->dagHdr->bp) 279 b_proc = (void *) ((struct buf *) node->dagHdr->bp)->b_proc; 280 281 RF_ASSERT(!(lock && unlock)); 282 flags |= (lock) ? RF_LOCK_DISK_QUEUE : 0; 283 flags |= (unlock) ? RF_UNLOCK_DISK_QUEUE : 0; 284 285 req = rf_CreateDiskQueueData(iotype, pda->startSector, pda->numSector, 286 buf, parityStripeID, which_ru, 287 (int (*) (void *, int)) node->wakeFunc, 288 node, NULL, node->dagHdr->tracerec, 289 (void *) (node->dagHdr->raidPtr), flags, b_proc); 290 if (!req) { 291 (node->wakeFunc) (node, ENOMEM); 292 } else { 293 node->dagFuncData = (void *) req; 294 rf_DiskIOEnqueue(&(dqs[pda->col]), req, priority); 295 } 296 return (0); 297} 298 299 300/***************************************************************************** 301 * the execution function associated with a disk-write node 302 ****************************************************************************/ 303int 304rf_DiskWriteFuncForThreads(RF_DagNode_t *node) 305{ 306 RF_DiskQueueData_t *req; 307 RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p; 308 caddr_t buf = (caddr_t) node->params[1].p; 309 RF_StripeNum_t parityStripeID = (RF_StripeNum_t) node->params[2].v; 310 unsigned priority = RF_EXTRACT_PRIORITY(node->params[3].v); 311 unsigned lock = RF_EXTRACT_LOCK_FLAG(node->params[3].v); 312 unsigned unlock = RF_EXTRACT_UNLOCK_FLAG(node->params[3].v); 313 unsigned which_ru = RF_EXTRACT_RU(node->params[3].v); 314 RF_DiskQueueDataFlags_t flags = 0; 315 RF_IoType_t iotype = (node->dagHdr->status == rf_enable) ? RF_IO_TYPE_WRITE : RF_IO_TYPE_NOP; 316 RF_DiskQueue_t *dqs = ((RF_Raid_t *) (node->dagHdr->raidPtr))->Queues; 317 void *b_proc = NULL; 318 319 if (node->dagHdr->bp) 320 b_proc = (void *) ((struct buf *) node->dagHdr->bp)->b_proc; 321 322 /* normal processing (rollaway or forward recovery) begins here */ 323 RF_ASSERT(!(lock && unlock)); 324 flags |= (lock) ? RF_LOCK_DISK_QUEUE : 0; 325 flags |= (unlock) ? RF_UNLOCK_DISK_QUEUE : 0; 326 req = rf_CreateDiskQueueData(iotype, pda->startSector, pda->numSector, 327 buf, parityStripeID, which_ru, 328 (int (*) (void *, int)) node->wakeFunc, 329 (void *) node, NULL, 330 node->dagHdr->tracerec, 331 (void *) (node->dagHdr->raidPtr), 332 flags, b_proc); 333 334 if (!req) { 335 (node->wakeFunc) (node, ENOMEM); 336 } else { 337 node->dagFuncData = (void *) req; 338 rf_DiskIOEnqueue(&(dqs[pda->col]), req, priority); 339 } 340 341 return (0); 342} 343/***************************************************************************** 344 * the undo function for disk nodes 345 * Note: this is not a proper undo of a write node, only locks are released. 346 * old data is not restored to disk! 347 ****************************************************************************/ 348int 349rf_DiskUndoFunc(RF_DagNode_t *node) 350{ 351 RF_DiskQueueData_t *req; 352 RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p; 353 RF_DiskQueue_t *dqs = ((RF_Raid_t *) (node->dagHdr->raidPtr))->Queues; 354 355 req = rf_CreateDiskQueueData(RF_IO_TYPE_NOP, 356 0L, 0, NULL, 0L, 0, 357 (int (*) (void *, int)) node->wakeFunc, 358 (void *) node, 359 NULL, node->dagHdr->tracerec, 360 (void *) (node->dagHdr->raidPtr), 361 RF_UNLOCK_DISK_QUEUE, NULL); 362 if (!req) 363 (node->wakeFunc) (node, ENOMEM); 364 else { 365 node->dagFuncData = (void *) req; 366 rf_DiskIOEnqueue(&(dqs[pda->col]), req, RF_IO_NORMAL_PRIORITY); 367 } 368 369 return (0); 370} 371/***************************************************************************** 372 * the execution function associated with an "unlock disk queue" node 373 ****************************************************************************/ 374int 375rf_DiskUnlockFuncForThreads(RF_DagNode_t *node) 376{ 377 RF_DiskQueueData_t *req; 378 RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p; 379 RF_DiskQueue_t *dqs = ((RF_Raid_t *) (node->dagHdr->raidPtr))->Queues; 380 381 req = rf_CreateDiskQueueData(RF_IO_TYPE_NOP, 382 0L, 0, NULL, 0L, 0, 383 (int (*) (void *, int)) node->wakeFunc, 384 (void *) node, 385 NULL, node->dagHdr->tracerec, 386 (void *) (node->dagHdr->raidPtr), 387 RF_UNLOCK_DISK_QUEUE, NULL); 388 if (!req) 389 (node->wakeFunc) (node, ENOMEM); 390 else { 391 node->dagFuncData = (void *) req; 392 rf_DiskIOEnqueue(&(dqs[pda->col]), req, RF_IO_NORMAL_PRIORITY); 393 } 394 395 return (0); 396} 397/***************************************************************************** 398 * Callback routine for DiskRead and DiskWrite nodes. When the disk 399 * op completes, the routine is called to set the node status and 400 * inform the execution engine that the node has fired. 401 ****************************************************************************/ 402int 403rf_GenericWakeupFunc(RF_DagNode_t *node, int status) 404{ 405 406 switch (node->status) { 407 case rf_bwd1: 408 node->status = rf_bwd2; 409 if (node->dagFuncData) 410 rf_FreeDiskQueueData((RF_DiskQueueData_t *) node->dagFuncData); 411 return (rf_DiskWriteFuncForThreads(node)); 412 case rf_fired: 413 if (status) 414 node->status = rf_bad; 415 else 416 node->status = rf_good; 417 break; 418 case rf_recover: 419 /* probably should never reach this case */ 420 if (status) 421 node->status = rf_panic; 422 else 423 node->status = rf_undone; 424 break; 425 default: 426 printf("rf_GenericWakeupFunc:"); 427 printf("node->status is %d,", node->status); 428 printf("status is %d \n", status); 429 RF_PANIC(); 430 break; 431 } 432 if (node->dagFuncData) 433 rf_FreeDiskQueueData((RF_DiskQueueData_t *) node->dagFuncData); 434 return (rf_FinishNode(node, RF_INTR_CONTEXT)); 435} 436 437 438/***************************************************************************** 439 * there are three distinct types of xor nodes: 440 441 * A "regular xor" is used in the fault-free case where the access 442 * spans a complete stripe unit. It assumes that the result buffer is 443 * one full stripe unit in size, and uses the stripe-unit-offset 444 * values that it computes from the PDAs to determine where within the 445 * stripe unit to XOR each argument buffer. 446 * 447 * A "simple xor" is used in the fault-free case where the access 448 * touches only a portion of one (or two, in some cases) stripe 449 * unit(s). It assumes that all the argument buffers are of the same 450 * size and have the same stripe unit offset. 451 * 452 * A "recovery xor" is used in the degraded-mode case. It's similar 453 * to the regular xor function except that it takes the failed PDA as 454 * an additional parameter, and uses it to determine what portions of 455 * the argument buffers need to be xor'd into the result buffer, and 456 * where in the result buffer they should go. 457 ****************************************************************************/ 458 459/* xor the params together and store the result in the result field. 460 * assume the result field points to a buffer that is the size of one 461 * SU, and use the pda params to determine where within the buffer to 462 * XOR the input buffers. */ 463int 464rf_RegularXorFunc(RF_DagNode_t *node) 465{ 466 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p; 467 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; 468 RF_Etimer_t timer; 469 int i, retcode; 470 471 retcode = 0; 472 if (node->dagHdr->status == rf_enable) { 473 /* don't do the XOR if the input is the same as the output */ 474 RF_ETIMER_START(timer); 475 for (i = 0; i < node->numParams - 1; i += 2) 476 if (node->params[i + 1].p != node->results[0]) { 477 retcode = rf_XorIntoBuffer(raidPtr, (RF_PhysDiskAddr_t *) node->params[i].p, 478 (char *) node->params[i + 1].p, (char *) node->results[0], node->dagHdr->bp); 479 } 480 RF_ETIMER_STOP(timer); 481 RF_ETIMER_EVAL(timer); 482 tracerec->xor_us += RF_ETIMER_VAL_US(timer); 483 } 484 return (rf_GenericWakeupFunc(node, retcode)); /* call wake func 485 * explicitly since no 486 * I/O in this node */ 487} 488/* xor the inputs into the result buffer, ignoring placement issues */ 489int 490rf_SimpleXorFunc(RF_DagNode_t *node) 491{ 492 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p; 493 int i, retcode = 0; 494 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; 495 RF_Etimer_t timer; 496 497 if (node->dagHdr->status == rf_enable) { 498 RF_ETIMER_START(timer); 499 /* don't do the XOR if the input is the same as the output */ 500 for (i = 0; i < node->numParams - 1; i += 2) 501 if (node->params[i + 1].p != node->results[0]) { 502 retcode = rf_bxor((char *) node->params[i + 1].p, (char *) node->results[0], 503 rf_RaidAddressToByte(raidPtr, ((RF_PhysDiskAddr_t *) node->params[i].p)->numSector), 504 (struct buf *) node->dagHdr->bp); 505 } 506 RF_ETIMER_STOP(timer); 507 RF_ETIMER_EVAL(timer); 508 tracerec->xor_us += RF_ETIMER_VAL_US(timer); 509 } 510 return (rf_GenericWakeupFunc(node, retcode)); /* call wake func 511 * explicitly since no 512 * I/O in this node */ 513} 514/* this xor is used by the degraded-mode dag functions to recover lost 515 * data. the second-to-last parameter is the PDA for the failed 516 * portion of the access. the code here looks at this PDA and assumes 517 * that the xor target buffer is equal in size to the number of 518 * sectors in the failed PDA. It then uses the other PDAs in the 519 * parameter list to determine where within the target buffer the 520 * corresponding data should be xored. */ 521int 522rf_RecoveryXorFunc(RF_DagNode_t *node) 523{ 524 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p; 525 RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout; 526 RF_PhysDiskAddr_t *failedPDA = (RF_PhysDiskAddr_t *) node->params[node->numParams - 2].p; 527 int i, retcode = 0; 528 RF_PhysDiskAddr_t *pda; 529 int suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr, failedPDA->startSector); 530 char *srcbuf, *destbuf; 531 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; 532 RF_Etimer_t timer; 533 534 if (node->dagHdr->status == rf_enable) { 535 RF_ETIMER_START(timer); 536 for (i = 0; i < node->numParams - 2; i += 2) 537 if (node->params[i + 1].p != node->results[0]) { 538 pda = (RF_PhysDiskAddr_t *) node->params[i].p; 539 srcbuf = (char *) node->params[i + 1].p; 540 suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector); 541 destbuf = ((char *) node->results[0]) + rf_RaidAddressToByte(raidPtr, suoffset - failedSUOffset); 542 retcode = rf_bxor(srcbuf, destbuf, rf_RaidAddressToByte(raidPtr, pda->numSector), node->dagHdr->bp); 543 } 544 RF_ETIMER_STOP(timer); 545 RF_ETIMER_EVAL(timer); 546 tracerec->xor_us += RF_ETIMER_VAL_US(timer); 547 } 548 return (rf_GenericWakeupFunc(node, retcode)); 549} 550/***************************************************************************** 551 * The next three functions are utilities used by the above 552 * xor-execution functions. 553 ****************************************************************************/ 554 555 556/* 557 * this is just a glorified buffer xor. targbuf points to a buffer 558 * that is one full stripe unit in size. srcbuf points to a buffer 559 * that may be less than 1 SU, but never more. When the access 560 * described by pda is one SU in size (which by implication means it's 561 * SU-aligned), all that happens is (targbuf) <- (srcbuf ^ targbuf). 562 * When the access is less than one SU in size the XOR occurs on only 563 * the portion of targbuf identified in the pda. */ 564 565int 566rf_XorIntoBuffer(RF_Raid_t *raidPtr, RF_PhysDiskAddr_t *pda, 567 char *srcbuf, char *targbuf, void *bp) 568{ 569 char *targptr; 570 int sectPerSU = raidPtr->Layout.sectorsPerStripeUnit; 571 int SUOffset = pda->startSector % sectPerSU; 572 int length, retcode = 0; 573 574 RF_ASSERT(pda->numSector <= sectPerSU); 575 576 targptr = targbuf + rf_RaidAddressToByte(raidPtr, SUOffset); 577 length = rf_RaidAddressToByte(raidPtr, pda->numSector); 578 retcode = rf_bxor(srcbuf, targptr, length, bp); 579 return (retcode); 580} 581/* it really should be the case that the buffer pointers (returned by 582 * malloc) are aligned to the natural word size of the machine, so 583 * this is the only case we optimize for. The length should always be 584 * a multiple of the sector size, so there should be no problem with 585 * leftover bytes at the end. */ 586int 587rf_bxor(char *src, char *dest, int len, void *bp) 588{ 589 unsigned mask = sizeof(long) - 1, retcode = 0; 590 591 if (!(((unsigned long) src) & mask) && 592 !(((unsigned long) dest) & mask) && !(len & mask)) { 593 retcode = rf_longword_bxor((unsigned long *) src, 594 (unsigned long *) dest, 595 len >> RF_LONGSHIFT, bp); 596 } else { 597 RF_ASSERT(0); 598 } 599 return (retcode); 600} 601 602/* When XORing in kernel mode, we need to map each user page to kernel 603 * space before we can access it. We don't want to assume anything 604 * about which input buffers are in kernel/user space, nor about their 605 * alignment, so in each loop we compute the maximum number of bytes 606 * that we can xor without crossing any page boundaries, and do only 607 * this many bytes before the next remap. 608 * 609 * len - is in longwords 610 */ 611int 612rf_longword_bxor(unsigned long *src, unsigned long *dest, int len, void *bp) 613{ 614 unsigned long *end = src + len; 615 unsigned long d0, d1, d2, d3, s0, s1, s2, s3; /* temps */ 616 unsigned long *pg_src, *pg_dest; /* per-page source/dest pointers */ 617 int longs_this_time;/* # longwords to xor in the current iteration */ 618 619 pg_src = src; 620 pg_dest = dest; 621 if (!pg_src || !pg_dest) 622 return (EFAULT); 623 624 while (len >= 4) { 625 longs_this_time = RF_MIN(len, RF_MIN(RF_BLIP(pg_src), RF_BLIP(pg_dest)) >> RF_LONGSHIFT); /* note len in longwords */ 626 src += longs_this_time; 627 dest += longs_this_time; 628 len -= longs_this_time; 629 while (longs_this_time >= 4) { 630 d0 = pg_dest[0]; 631 d1 = pg_dest[1]; 632 d2 = pg_dest[2]; 633 d3 = pg_dest[3]; 634 s0 = pg_src[0]; 635 s1 = pg_src[1]; 636 s2 = pg_src[2]; 637 s3 = pg_src[3]; 638 pg_dest[0] = d0 ^ s0; 639 pg_dest[1] = d1 ^ s1; 640 pg_dest[2] = d2 ^ s2; 641 pg_dest[3] = d3 ^ s3; 642 pg_src += 4; 643 pg_dest += 4; 644 longs_this_time -= 4; 645 } 646 while (longs_this_time > 0) { /* cannot cross any page 647 * boundaries here */ 648 *pg_dest++ ^= *pg_src++; 649 longs_this_time--; 650 } 651 652 /* either we're done, or we've reached a page boundary on one 653 * (or possibly both) of the pointers */ 654 if (len) { 655 if (RF_PAGE_ALIGNED(src)) 656 pg_src = src; 657 if (RF_PAGE_ALIGNED(dest)) 658 pg_dest = dest; 659 if (!pg_src || !pg_dest) 660 return (EFAULT); 661 } 662 } 663 while (src < end) { 664 *pg_dest++ ^= *pg_src++; 665 src++; 666 dest++; 667 len--; 668 if (RF_PAGE_ALIGNED(src)) 669 pg_src = src; 670 if (RF_PAGE_ALIGNED(dest)) 671 pg_dest = dest; 672 } 673 RF_ASSERT(len == 0); 674 return (0); 675} 676 677#if 0 678/* 679 dst = a ^ b ^ c; 680 a may equal dst 681 see comment above longword_bxor 682 len is length in longwords 683*/ 684int 685rf_longword_bxor3(unsigned long *dst, unsigned long *a, unsigned long *b, 686 unsigned long *c, int len, void *bp) 687{ 688 unsigned long a0, a1, a2, a3, b0, b1, b2, b3; 689 unsigned long *pg_a, *pg_b, *pg_c, *pg_dst; /* per-page source/dest 690 * pointers */ 691 int longs_this_time;/* # longs to xor in the current iteration */ 692 char dst_is_a = 0; 693 694 pg_a = a; 695 pg_b = b; 696 pg_c = c; 697 if (a == dst) { 698 pg_dst = pg_a; 699 dst_is_a = 1; 700 } else { 701 pg_dst = dst; 702 } 703 704 /* align dest to cache line. Can't cross a pg boundary on dst here. */ 705 while ((((unsigned long) pg_dst) & 0x1f)) { 706 *pg_dst++ = *pg_a++ ^ *pg_b++ ^ *pg_c++; 707 dst++; 708 a++; 709 b++; 710 c++; 711 if (RF_PAGE_ALIGNED(a)) { 712 pg_a = a; 713 if (!pg_a) 714 return (EFAULT); 715 } 716 if (RF_PAGE_ALIGNED(b)) { 717 pg_b = a; 718 if (!pg_b) 719 return (EFAULT); 720 } 721 if (RF_PAGE_ALIGNED(c)) { 722 pg_c = a; 723 if (!pg_c) 724 return (EFAULT); 725 } 726 len--; 727 } 728 729 while (len > 4) { 730 longs_this_time = RF_MIN(len, RF_MIN(RF_BLIP(a), RF_MIN(RF_BLIP(b), RF_MIN(RF_BLIP(c), RF_BLIP(dst)))) >> RF_LONGSHIFT); 731 a += longs_this_time; 732 b += longs_this_time; 733 c += longs_this_time; 734 dst += longs_this_time; 735 len -= longs_this_time; 736 while (longs_this_time >= 4) { 737 a0 = pg_a[0]; 738 longs_this_time -= 4; 739 740 a1 = pg_a[1]; 741 a2 = pg_a[2]; 742 743 a3 = pg_a[3]; 744 pg_a += 4; 745 746 b0 = pg_b[0]; 747 b1 = pg_b[1]; 748 749 b2 = pg_b[2]; 750 b3 = pg_b[3]; 751 /* start dual issue */ 752 a0 ^= b0; 753 b0 = pg_c[0]; 754 755 pg_b += 4; 756 a1 ^= b1; 757 758 a2 ^= b2; 759 a3 ^= b3; 760 761 b1 = pg_c[1]; 762 a0 ^= b0; 763 764 b2 = pg_c[2]; 765 a1 ^= b1; 766 767 b3 = pg_c[3]; 768 a2 ^= b2; 769 770 pg_dst[0] = a0; 771 a3 ^= b3; 772 pg_dst[1] = a1; 773 pg_c += 4; 774 pg_dst[2] = a2; 775 pg_dst[3] = a3; 776 pg_dst += 4; 777 } 778 while (longs_this_time > 0) { /* cannot cross any page 779 * boundaries here */ 780 *pg_dst++ = *pg_a++ ^ *pg_b++ ^ *pg_c++; 781 longs_this_time--; 782 } 783 784 if (len) { 785 if (RF_PAGE_ALIGNED(a)) { 786 pg_a = a; 787 if (!pg_a) 788 return (EFAULT); 789 if (dst_is_a) 790 pg_dst = pg_a; 791 } 792 if (RF_PAGE_ALIGNED(b)) { 793 pg_b = b; 794 if (!pg_b) 795 return (EFAULT); 796 } 797 if (RF_PAGE_ALIGNED(c)) { 798 pg_c = c; 799 if (!pg_c) 800 return (EFAULT); 801 } 802 if (!dst_is_a) 803 if (RF_PAGE_ALIGNED(dst)) { 804 pg_dst = dst; 805 if (!pg_dst) 806 return (EFAULT); 807 } 808 } 809 } 810 while (len) { 811 *pg_dst++ = *pg_a++ ^ *pg_b++ ^ *pg_c++; 812 dst++; 813 a++; 814 b++; 815 c++; 816 if (RF_PAGE_ALIGNED(a)) { 817 pg_a = a; 818 if (!pg_a) 819 return (EFAULT); 820 if (dst_is_a) 821 pg_dst = pg_a; 822 } 823 if (RF_PAGE_ALIGNED(b)) { 824 pg_b = b; 825 if (!pg_b) 826 return (EFAULT); 827 } 828 if (RF_PAGE_ALIGNED(c)) { 829 pg_c = c; 830 if (!pg_c) 831 return (EFAULT); 832 } 833 if (!dst_is_a) 834 if (RF_PAGE_ALIGNED(dst)) { 835 pg_dst = dst; 836 if (!pg_dst) 837 return (EFAULT); 838 } 839 len--; 840 } 841 return (0); 842} 843 844int 845rf_bxor3(unsigned char *dst, unsigned char *a, unsigned char *b, 846 unsigned char *c, unsigned long len, void *bp) 847{ 848 RF_ASSERT(((RF_UL(dst) | RF_UL(a) | RF_UL(b) | RF_UL(c) | len) & 0x7) == 0); 849 850 return (rf_longword_bxor3((unsigned long *) dst, (unsigned long *) a, 851 (unsigned long *) b, (unsigned long *) c, len >> RF_LONGSHIFT, bp)); 852} 853#endif 854