rf_dagfuncs.c revision 1.18
1/* $NetBSD: rf_dagfuncs.c,v 1.18 2004/01/10 17:04:44 oster Exp $ */ 2/* 3 * Copyright (c) 1995 Carnegie-Mellon University. 4 * All rights reserved. 5 * 6 * Author: Mark Holland, William V. Courtright II 7 * 8 * Permission to use, copy, modify and distribute this software and 9 * its documentation is hereby granted, provided that both the copyright 10 * notice and this permission notice appear in all copies of the 11 * software, derivative works or modified versions, and any portions 12 * thereof, and that both notices appear in supporting documentation. 13 * 14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 17 * 18 * Carnegie Mellon requests users of this software to return to 19 * 20 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 21 * School of Computer Science 22 * Carnegie Mellon University 23 * Pittsburgh PA 15213-3890 24 * 25 * any improvements or extensions that they make and grant Carnegie the 26 * rights to redistribute these changes. 27 */ 28 29/* 30 * dagfuncs.c -- DAG node execution routines 31 * 32 * Rules: 33 * 1. Every DAG execution function must eventually cause node->status to 34 * get set to "good" or "bad", and "FinishNode" to be called. In the 35 * case of nodes that complete immediately (xor, NullNodeFunc, etc), 36 * the node execution function can do these two things directly. In 37 * the case of nodes that have to wait for some event (a disk read to 38 * complete, a lock to be released, etc) to occur before they can 39 * complete, this is typically achieved by having whatever module 40 * is doing the operation call GenericWakeupFunc upon completion. 41 * 2. DAG execution functions should check the status in the DAG header 42 * and NOP out their operations if the status is not "enable". However, 43 * execution functions that release resources must be sure to release 44 * them even when they NOP out the function that would use them. 45 * Functions that acquire resources should go ahead and acquire them 46 * even when they NOP, so that a downstream release node will not have 47 * to check to find out whether or not the acquire was suppressed. 48 */ 49 50#include <sys/cdefs.h> 51__KERNEL_RCSID(0, "$NetBSD: rf_dagfuncs.c,v 1.18 2004/01/10 17:04:44 oster Exp $"); 52 53#include <sys/param.h> 54#include <sys/ioctl.h> 55 56#include "rf_archs.h" 57#include "rf_raid.h" 58#include "rf_dag.h" 59#include "rf_layout.h" 60#include "rf_etimer.h" 61#include "rf_acctrace.h" 62#include "rf_diskqueue.h" 63#include "rf_dagfuncs.h" 64#include "rf_general.h" 65#include "rf_engine.h" 66#include "rf_dagutils.h" 67 68#include "rf_kintf.h" 69 70#if RF_INCLUDE_PARITYLOGGING > 0 71#include "rf_paritylog.h" 72#endif /* RF_INCLUDE_PARITYLOGGING > 0 */ 73 74int (*rf_DiskReadFunc) (RF_DagNode_t *); 75int (*rf_DiskWriteFunc) (RF_DagNode_t *); 76int (*rf_DiskReadUndoFunc) (RF_DagNode_t *); 77int (*rf_DiskWriteUndoFunc) (RF_DagNode_t *); 78int (*rf_DiskUnlockFunc) (RF_DagNode_t *); 79int (*rf_DiskUnlockUndoFunc) (RF_DagNode_t *); 80int (*rf_RegularXorUndoFunc) (RF_DagNode_t *); 81int (*rf_SimpleXorUndoFunc) (RF_DagNode_t *); 82int (*rf_RecoveryXorUndoFunc) (RF_DagNode_t *); 83 84/***************************************************************************** 85 * main (only) configuration routine for this module 86 ****************************************************************************/ 87int 88rf_ConfigureDAGFuncs(RF_ShutdownList_t **listp) 89{ 90 RF_ASSERT(((sizeof(long) == 8) && RF_LONGSHIFT == 3) || 91 ((sizeof(long) == 4) && RF_LONGSHIFT == 2)); 92 rf_DiskReadFunc = rf_DiskReadFuncForThreads; 93 rf_DiskReadUndoFunc = rf_DiskUndoFunc; 94 rf_DiskWriteFunc = rf_DiskWriteFuncForThreads; 95 rf_DiskWriteUndoFunc = rf_DiskUndoFunc; 96 rf_DiskUnlockFunc = rf_DiskUnlockFuncForThreads; 97 rf_DiskUnlockUndoFunc = rf_NullNodeUndoFunc; 98 rf_RegularXorUndoFunc = rf_NullNodeUndoFunc; 99 rf_SimpleXorUndoFunc = rf_NullNodeUndoFunc; 100 rf_RecoveryXorUndoFunc = rf_NullNodeUndoFunc; 101 return (0); 102} 103 104 105 106/***************************************************************************** 107 * the execution function associated with a terminate node 108 ****************************************************************************/ 109int 110rf_TerminateFunc(RF_DagNode_t *node) 111{ 112 RF_ASSERT(node->dagHdr->numCommits == node->dagHdr->numCommitNodes); 113 node->status = rf_good; 114 return (rf_FinishNode(node, RF_THREAD_CONTEXT)); 115} 116 117int 118rf_TerminateUndoFunc(RF_DagNode_t *node) 119{ 120 return (0); 121} 122 123 124/***************************************************************************** 125 * execution functions associated with a mirror node 126 * 127 * parameters: 128 * 129 * 0 - physical disk addres of data 130 * 1 - buffer for holding read data 131 * 2 - parity stripe ID 132 * 3 - flags 133 * 4 - physical disk address of mirror (parity) 134 * 135 ****************************************************************************/ 136 137int 138rf_DiskReadMirrorIdleFunc(RF_DagNode_t *node) 139{ 140 /* select the mirror copy with the shortest queue and fill in node 141 * parameters with physical disk address */ 142 143 rf_SelectMirrorDiskIdle(node); 144 return (rf_DiskReadFunc(node)); 145} 146 147#if (RF_INCLUDE_CHAINDECLUSTER > 0) || (RF_INCLUDE_INTERDECLUSTER > 0) || (RF_DEBUG_VALIDATE_DAG > 0) 148int 149rf_DiskReadMirrorPartitionFunc(RF_DagNode_t *node) 150{ 151 /* select the mirror copy with the shortest queue and fill in node 152 * parameters with physical disk address */ 153 154 rf_SelectMirrorDiskPartition(node); 155 return (rf_DiskReadFunc(node)); 156} 157#endif 158 159int 160rf_DiskReadMirrorUndoFunc(RF_DagNode_t *node) 161{ 162 return (0); 163} 164 165 166 167#if RF_INCLUDE_PARITYLOGGING > 0 168/***************************************************************************** 169 * the execution function associated with a parity log update node 170 ****************************************************************************/ 171int 172rf_ParityLogUpdateFunc(RF_DagNode_t *node) 173{ 174 RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p; 175 caddr_t buf = (caddr_t) node->params[1].p; 176 RF_ParityLogData_t *logData; 177 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; 178 RF_Etimer_t timer; 179 180 if (node->dagHdr->status == rf_enable) { 181 RF_ETIMER_START(timer); 182 logData = rf_CreateParityLogData(RF_UPDATE, pda, buf, 183 (RF_Raid_t *) (node->dagHdr->raidPtr), 184 node->wakeFunc, (void *) node, 185 node->dagHdr->tracerec, timer); 186 if (logData) 187 rf_ParityLogAppend(logData, RF_FALSE, NULL, RF_FALSE); 188 else { 189 RF_ETIMER_STOP(timer); 190 RF_ETIMER_EVAL(timer); 191 tracerec->plog_us += RF_ETIMER_VAL_US(timer); 192 (node->wakeFunc) (node, ENOMEM); 193 } 194 } 195 return (0); 196} 197 198 199/***************************************************************************** 200 * the execution function associated with a parity log overwrite node 201 ****************************************************************************/ 202int 203rf_ParityLogOverwriteFunc(RF_DagNode_t *node) 204{ 205 RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p; 206 caddr_t buf = (caddr_t) node->params[1].p; 207 RF_ParityLogData_t *logData; 208 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; 209 RF_Etimer_t timer; 210 211 if (node->dagHdr->status == rf_enable) { 212 RF_ETIMER_START(timer); 213 logData = rf_CreateParityLogData(RF_OVERWRITE, pda, buf, 214(RF_Raid_t *) (node->dagHdr->raidPtr), 215 node->wakeFunc, (void *) node, node->dagHdr->tracerec, timer); 216 if (logData) 217 rf_ParityLogAppend(logData, RF_FALSE, NULL, RF_FALSE); 218 else { 219 RF_ETIMER_STOP(timer); 220 RF_ETIMER_EVAL(timer); 221 tracerec->plog_us += RF_ETIMER_VAL_US(timer); 222 (node->wakeFunc) (node, ENOMEM); 223 } 224 } 225 return (0); 226} 227 228int 229rf_ParityLogUpdateUndoFunc(RF_DagNode_t *node) 230{ 231 return (0); 232} 233 234int 235rf_ParityLogOverwriteUndoFunc(RF_DagNode_t *node) 236{ 237 return (0); 238} 239#endif /* RF_INCLUDE_PARITYLOGGING > 0 */ 240 241/***************************************************************************** 242 * the execution function associated with a NOP node 243 ****************************************************************************/ 244int 245rf_NullNodeFunc(RF_DagNode_t *node) 246{ 247 node->status = rf_good; 248 return (rf_FinishNode(node, RF_THREAD_CONTEXT)); 249} 250 251int 252rf_NullNodeUndoFunc(RF_DagNode_t *node) 253{ 254 node->status = rf_undone; 255 return (rf_FinishNode(node, RF_THREAD_CONTEXT)); 256} 257 258 259/***************************************************************************** 260 * the execution function associated with a disk-read node 261 ****************************************************************************/ 262int 263rf_DiskReadFuncForThreads(RF_DagNode_t *node) 264{ 265 RF_DiskQueueData_t *req; 266 RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p; 267 caddr_t buf = (caddr_t) node->params[1].p; 268 RF_StripeNum_t parityStripeID = (RF_StripeNum_t) node->params[2].v; 269 unsigned priority = RF_EXTRACT_PRIORITY(node->params[3].v); 270 unsigned which_ru = RF_EXTRACT_RU(node->params[3].v); 271 RF_IoType_t iotype = (node->dagHdr->status == rf_enable) ? RF_IO_TYPE_READ : RF_IO_TYPE_NOP; 272 RF_DiskQueue_t *dqs = ((RF_Raid_t *) (node->dagHdr->raidPtr))->Queues; 273 void *b_proc = NULL; 274 275 if (node->dagHdr->bp) 276 b_proc = (void *) ((struct buf *) node->dagHdr->bp)->b_proc; 277 278 req = rf_CreateDiskQueueData(iotype, pda->startSector, pda->numSector, 279 buf, parityStripeID, which_ru, 280 (int (*) (void *, int)) node->wakeFunc, 281 node, NULL, node->dagHdr->tracerec, 282 (void *) (node->dagHdr->raidPtr), 0, b_proc); 283 if (!req) { 284 (node->wakeFunc) (node, ENOMEM); 285 } else { 286 node->dagFuncData = (void *) req; 287 rf_DiskIOEnqueue(&(dqs[pda->col]), req, priority); 288 } 289 return (0); 290} 291 292 293/***************************************************************************** 294 * the execution function associated with a disk-write node 295 ****************************************************************************/ 296int 297rf_DiskWriteFuncForThreads(RF_DagNode_t *node) 298{ 299 RF_DiskQueueData_t *req; 300 RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p; 301 caddr_t buf = (caddr_t) node->params[1].p; 302 RF_StripeNum_t parityStripeID = (RF_StripeNum_t) node->params[2].v; 303 unsigned priority = RF_EXTRACT_PRIORITY(node->params[3].v); 304 unsigned which_ru = RF_EXTRACT_RU(node->params[3].v); 305 RF_IoType_t iotype = (node->dagHdr->status == rf_enable) ? RF_IO_TYPE_WRITE : RF_IO_TYPE_NOP; 306 RF_DiskQueue_t *dqs = ((RF_Raid_t *) (node->dagHdr->raidPtr))->Queues; 307 void *b_proc = NULL; 308 309 if (node->dagHdr->bp) 310 b_proc = (void *) ((struct buf *) node->dagHdr->bp)->b_proc; 311 312 /* normal processing (rollaway or forward recovery) begins here */ 313 req = rf_CreateDiskQueueData(iotype, pda->startSector, pda->numSector, 314 buf, parityStripeID, which_ru, 315 (int (*) (void *, int)) node->wakeFunc, 316 (void *) node, NULL, 317 node->dagHdr->tracerec, 318 (void *) (node->dagHdr->raidPtr), 319 0, b_proc); 320 321 if (!req) { 322 (node->wakeFunc) (node, ENOMEM); 323 } else { 324 node->dagFuncData = (void *) req; 325 rf_DiskIOEnqueue(&(dqs[pda->col]), req, priority); 326 } 327 328 return (0); 329} 330/***************************************************************************** 331 * the undo function for disk nodes 332 * Note: this is not a proper undo of a write node, only locks are released. 333 * old data is not restored to disk! 334 ****************************************************************************/ 335int 336rf_DiskUndoFunc(RF_DagNode_t *node) 337{ 338 RF_DiskQueueData_t *req; 339 RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p; 340 RF_DiskQueue_t *dqs = ((RF_Raid_t *) (node->dagHdr->raidPtr))->Queues; 341 342 req = rf_CreateDiskQueueData(RF_IO_TYPE_NOP, 343 0L, 0, NULL, 0L, 0, 344 (int (*) (void *, int)) node->wakeFunc, 345 (void *) node, 346 NULL, node->dagHdr->tracerec, 347 (void *) (node->dagHdr->raidPtr), 348 RF_UNLOCK_DISK_QUEUE, NULL); 349 if (!req) 350 (node->wakeFunc) (node, ENOMEM); 351 else { 352 node->dagFuncData = (void *) req; 353 rf_DiskIOEnqueue(&(dqs[pda->col]), req, RF_IO_NORMAL_PRIORITY); 354 } 355 356 return (0); 357} 358/***************************************************************************** 359 * the execution function associated with an "unlock disk queue" node 360 ****************************************************************************/ 361int 362rf_DiskUnlockFuncForThreads(RF_DagNode_t *node) 363{ 364 RF_DiskQueueData_t *req; 365 RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p; 366 RF_DiskQueue_t *dqs = ((RF_Raid_t *) (node->dagHdr->raidPtr))->Queues; 367 368 req = rf_CreateDiskQueueData(RF_IO_TYPE_NOP, 369 0L, 0, NULL, 0L, 0, 370 (int (*) (void *, int)) node->wakeFunc, 371 (void *) node, 372 NULL, node->dagHdr->tracerec, 373 (void *) (node->dagHdr->raidPtr), 374 RF_UNLOCK_DISK_QUEUE, NULL); 375 if (!req) 376 (node->wakeFunc) (node, ENOMEM); 377 else { 378 node->dagFuncData = (void *) req; 379 rf_DiskIOEnqueue(&(dqs[pda->col]), req, RF_IO_NORMAL_PRIORITY); 380 } 381 382 return (0); 383} 384/***************************************************************************** 385 * Callback routine for DiskRead and DiskWrite nodes. When the disk 386 * op completes, the routine is called to set the node status and 387 * inform the execution engine that the node has fired. 388 ****************************************************************************/ 389int 390rf_GenericWakeupFunc(RF_DagNode_t *node, int status) 391{ 392 393 switch (node->status) { 394 case rf_bwd1: 395 node->status = rf_bwd2; 396 if (node->dagFuncData) 397 rf_FreeDiskQueueData((RF_DiskQueueData_t *) node->dagFuncData); 398 return (rf_DiskWriteFuncForThreads(node)); 399 case rf_fired: 400 if (status) 401 node->status = rf_bad; 402 else 403 node->status = rf_good; 404 break; 405 case rf_recover: 406 /* probably should never reach this case */ 407 if (status) 408 node->status = rf_panic; 409 else 410 node->status = rf_undone; 411 break; 412 default: 413 printf("rf_GenericWakeupFunc:"); 414 printf("node->status is %d,", node->status); 415 printf("status is %d \n", status); 416 RF_PANIC(); 417 break; 418 } 419 if (node->dagFuncData) 420 rf_FreeDiskQueueData((RF_DiskQueueData_t *) node->dagFuncData); 421 return (rf_FinishNode(node, RF_INTR_CONTEXT)); 422} 423 424 425/***************************************************************************** 426 * there are three distinct types of xor nodes: 427 428 * A "regular xor" is used in the fault-free case where the access 429 * spans a complete stripe unit. It assumes that the result buffer is 430 * one full stripe unit in size, and uses the stripe-unit-offset 431 * values that it computes from the PDAs to determine where within the 432 * stripe unit to XOR each argument buffer. 433 * 434 * A "simple xor" is used in the fault-free case where the access 435 * touches only a portion of one (or two, in some cases) stripe 436 * unit(s). It assumes that all the argument buffers are of the same 437 * size and have the same stripe unit offset. 438 * 439 * A "recovery xor" is used in the degraded-mode case. It's similar 440 * to the regular xor function except that it takes the failed PDA as 441 * an additional parameter, and uses it to determine what portions of 442 * the argument buffers need to be xor'd into the result buffer, and 443 * where in the result buffer they should go. 444 ****************************************************************************/ 445 446/* xor the params together and store the result in the result field. 447 * assume the result field points to a buffer that is the size of one 448 * SU, and use the pda params to determine where within the buffer to 449 * XOR the input buffers. */ 450int 451rf_RegularXorFunc(RF_DagNode_t *node) 452{ 453 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p; 454 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; 455 RF_Etimer_t timer; 456 int i, retcode; 457 458 retcode = 0; 459 if (node->dagHdr->status == rf_enable) { 460 /* don't do the XOR if the input is the same as the output */ 461 RF_ETIMER_START(timer); 462 for (i = 0; i < node->numParams - 1; i += 2) 463 if (node->params[i + 1].p != node->results[0]) { 464 retcode = rf_XorIntoBuffer(raidPtr, (RF_PhysDiskAddr_t *) node->params[i].p, 465 (char *) node->params[i + 1].p, (char *) node->results[0]); 466 } 467 RF_ETIMER_STOP(timer); 468 RF_ETIMER_EVAL(timer); 469 tracerec->xor_us += RF_ETIMER_VAL_US(timer); 470 } 471 return (rf_GenericWakeupFunc(node, retcode)); /* call wake func 472 * explicitly since no 473 * I/O in this node */ 474} 475/* xor the inputs into the result buffer, ignoring placement issues */ 476int 477rf_SimpleXorFunc(RF_DagNode_t *node) 478{ 479 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p; 480 int i, retcode = 0; 481 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; 482 RF_Etimer_t timer; 483 484 if (node->dagHdr->status == rf_enable) { 485 RF_ETIMER_START(timer); 486 /* don't do the XOR if the input is the same as the output */ 487 for (i = 0; i < node->numParams - 1; i += 2) 488 if (node->params[i + 1].p != node->results[0]) { 489 retcode = rf_bxor((char *) node->params[i + 1].p, (char *) node->results[0], 490 rf_RaidAddressToByte(raidPtr, ((RF_PhysDiskAddr_t *) node->params[i].p)->numSector)); 491 } 492 RF_ETIMER_STOP(timer); 493 RF_ETIMER_EVAL(timer); 494 tracerec->xor_us += RF_ETIMER_VAL_US(timer); 495 } 496 return (rf_GenericWakeupFunc(node, retcode)); /* call wake func 497 * explicitly since no 498 * I/O in this node */ 499} 500/* this xor is used by the degraded-mode dag functions to recover lost 501 * data. the second-to-last parameter is the PDA for the failed 502 * portion of the access. the code here looks at this PDA and assumes 503 * that the xor target buffer is equal in size to the number of 504 * sectors in the failed PDA. It then uses the other PDAs in the 505 * parameter list to determine where within the target buffer the 506 * corresponding data should be xored. */ 507int 508rf_RecoveryXorFunc(RF_DagNode_t *node) 509{ 510 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p; 511 RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout; 512 RF_PhysDiskAddr_t *failedPDA = (RF_PhysDiskAddr_t *) node->params[node->numParams - 2].p; 513 int i, retcode = 0; 514 RF_PhysDiskAddr_t *pda; 515 int suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr, failedPDA->startSector); 516 char *srcbuf, *destbuf; 517 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; 518 RF_Etimer_t timer; 519 520 if (node->dagHdr->status == rf_enable) { 521 RF_ETIMER_START(timer); 522 for (i = 0; i < node->numParams - 2; i += 2) 523 if (node->params[i + 1].p != node->results[0]) { 524 pda = (RF_PhysDiskAddr_t *) node->params[i].p; 525 srcbuf = (char *) node->params[i + 1].p; 526 suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector); 527 destbuf = ((char *) node->results[0]) + rf_RaidAddressToByte(raidPtr, suoffset - failedSUOffset); 528 retcode = rf_bxor(srcbuf, destbuf, rf_RaidAddressToByte(raidPtr, pda->numSector)); 529 } 530 RF_ETIMER_STOP(timer); 531 RF_ETIMER_EVAL(timer); 532 tracerec->xor_us += RF_ETIMER_VAL_US(timer); 533 } 534 return (rf_GenericWakeupFunc(node, retcode)); 535} 536/***************************************************************************** 537 * The next three functions are utilities used by the above 538 * xor-execution functions. 539 ****************************************************************************/ 540 541 542/* 543 * this is just a glorified buffer xor. targbuf points to a buffer 544 * that is one full stripe unit in size. srcbuf points to a buffer 545 * that may be less than 1 SU, but never more. When the access 546 * described by pda is one SU in size (which by implication means it's 547 * SU-aligned), all that happens is (targbuf) <- (srcbuf ^ targbuf). 548 * When the access is less than one SU in size the XOR occurs on only 549 * the portion of targbuf identified in the pda. */ 550 551int 552rf_XorIntoBuffer(RF_Raid_t *raidPtr, RF_PhysDiskAddr_t *pda, 553 char *srcbuf, char *targbuf) 554{ 555 char *targptr; 556 int sectPerSU = raidPtr->Layout.sectorsPerStripeUnit; 557 int SUOffset = pda->startSector % sectPerSU; 558 int length, retcode = 0; 559 560 RF_ASSERT(pda->numSector <= sectPerSU); 561 562 targptr = targbuf + rf_RaidAddressToByte(raidPtr, SUOffset); 563 length = rf_RaidAddressToByte(raidPtr, pda->numSector); 564 retcode = rf_bxor(srcbuf, targptr, length); 565 return (retcode); 566} 567/* it really should be the case that the buffer pointers (returned by 568 * malloc) are aligned to the natural word size of the machine, so 569 * this is the only case we optimize for. The length should always be 570 * a multiple of the sector size, so there should be no problem with 571 * leftover bytes at the end. */ 572int 573rf_bxor(char *src, char *dest, int len) 574{ 575 unsigned mask = sizeof(long) - 1, retcode = 0; 576 577 if (!(((unsigned long) src) & mask) && 578 !(((unsigned long) dest) & mask) && !(len & mask)) { 579 retcode = rf_longword_bxor((unsigned long *) src, 580 (unsigned long *) dest, 581 len >> RF_LONGSHIFT); 582 } else { 583 RF_ASSERT(0); 584 } 585 return (retcode); 586} 587 588/* When XORing in kernel mode, we need to map each user page to kernel 589 * space before we can access it. We don't want to assume anything 590 * about which input buffers are in kernel/user space, nor about their 591 * alignment, so in each loop we compute the maximum number of bytes 592 * that we can xor without crossing any page boundaries, and do only 593 * this many bytes before the next remap. 594 * 595 * len - is in longwords 596 */ 597int 598rf_longword_bxor(unsigned long *src, unsigned long *dest, int len) 599{ 600 unsigned long *end = src + len; 601 unsigned long d0, d1, d2, d3, s0, s1, s2, s3; /* temps */ 602 unsigned long *pg_src, *pg_dest; /* per-page source/dest pointers */ 603 int longs_this_time;/* # longwords to xor in the current iteration */ 604 605 pg_src = src; 606 pg_dest = dest; 607 if (!pg_src || !pg_dest) 608 return (EFAULT); 609 610 while (len >= 4) { 611 longs_this_time = RF_MIN(len, RF_MIN(RF_BLIP(pg_src), RF_BLIP(pg_dest)) >> RF_LONGSHIFT); /* note len in longwords */ 612 src += longs_this_time; 613 dest += longs_this_time; 614 len -= longs_this_time; 615 while (longs_this_time >= 4) { 616 d0 = pg_dest[0]; 617 d1 = pg_dest[1]; 618 d2 = pg_dest[2]; 619 d3 = pg_dest[3]; 620 s0 = pg_src[0]; 621 s1 = pg_src[1]; 622 s2 = pg_src[2]; 623 s3 = pg_src[3]; 624 pg_dest[0] = d0 ^ s0; 625 pg_dest[1] = d1 ^ s1; 626 pg_dest[2] = d2 ^ s2; 627 pg_dest[3] = d3 ^ s3; 628 pg_src += 4; 629 pg_dest += 4; 630 longs_this_time -= 4; 631 } 632 while (longs_this_time > 0) { /* cannot cross any page 633 * boundaries here */ 634 *pg_dest++ ^= *pg_src++; 635 longs_this_time--; 636 } 637 638 /* either we're done, or we've reached a page boundary on one 639 * (or possibly both) of the pointers */ 640 if (len) { 641 if (RF_PAGE_ALIGNED(src)) 642 pg_src = src; 643 if (RF_PAGE_ALIGNED(dest)) 644 pg_dest = dest; 645 if (!pg_src || !pg_dest) 646 return (EFAULT); 647 } 648 } 649 while (src < end) { 650 *pg_dest++ ^= *pg_src++; 651 src++; 652 dest++; 653 len--; 654 if (RF_PAGE_ALIGNED(src)) 655 pg_src = src; 656 if (RF_PAGE_ALIGNED(dest)) 657 pg_dest = dest; 658 } 659 RF_ASSERT(len == 0); 660 return (0); 661} 662 663#if 0 664/* 665 dst = a ^ b ^ c; 666 a may equal dst 667 see comment above longword_bxor 668 len is length in longwords 669*/ 670int 671rf_longword_bxor3(unsigned long *dst, unsigned long *a, unsigned long *b, 672 unsigned long *c, int len, void *bp) 673{ 674 unsigned long a0, a1, a2, a3, b0, b1, b2, b3; 675 unsigned long *pg_a, *pg_b, *pg_c, *pg_dst; /* per-page source/dest 676 * pointers */ 677 int longs_this_time;/* # longs to xor in the current iteration */ 678 char dst_is_a = 0; 679 680 pg_a = a; 681 pg_b = b; 682 pg_c = c; 683 if (a == dst) { 684 pg_dst = pg_a; 685 dst_is_a = 1; 686 } else { 687 pg_dst = dst; 688 } 689 690 /* align dest to cache line. Can't cross a pg boundary on dst here. */ 691 while ((((unsigned long) pg_dst) & 0x1f)) { 692 *pg_dst++ = *pg_a++ ^ *pg_b++ ^ *pg_c++; 693 dst++; 694 a++; 695 b++; 696 c++; 697 if (RF_PAGE_ALIGNED(a)) { 698 pg_a = a; 699 if (!pg_a) 700 return (EFAULT); 701 } 702 if (RF_PAGE_ALIGNED(b)) { 703 pg_b = a; 704 if (!pg_b) 705 return (EFAULT); 706 } 707 if (RF_PAGE_ALIGNED(c)) { 708 pg_c = a; 709 if (!pg_c) 710 return (EFAULT); 711 } 712 len--; 713 } 714 715 while (len > 4) { 716 longs_this_time = RF_MIN(len, RF_MIN(RF_BLIP(a), RF_MIN(RF_BLIP(b), RF_MIN(RF_BLIP(c), RF_BLIP(dst)))) >> RF_LONGSHIFT); 717 a += longs_this_time; 718 b += longs_this_time; 719 c += longs_this_time; 720 dst += longs_this_time; 721 len -= longs_this_time; 722 while (longs_this_time >= 4) { 723 a0 = pg_a[0]; 724 longs_this_time -= 4; 725 726 a1 = pg_a[1]; 727 a2 = pg_a[2]; 728 729 a3 = pg_a[3]; 730 pg_a += 4; 731 732 b0 = pg_b[0]; 733 b1 = pg_b[1]; 734 735 b2 = pg_b[2]; 736 b3 = pg_b[3]; 737 /* start dual issue */ 738 a0 ^= b0; 739 b0 = pg_c[0]; 740 741 pg_b += 4; 742 a1 ^= b1; 743 744 a2 ^= b2; 745 a3 ^= b3; 746 747 b1 = pg_c[1]; 748 a0 ^= b0; 749 750 b2 = pg_c[2]; 751 a1 ^= b1; 752 753 b3 = pg_c[3]; 754 a2 ^= b2; 755 756 pg_dst[0] = a0; 757 a3 ^= b3; 758 pg_dst[1] = a1; 759 pg_c += 4; 760 pg_dst[2] = a2; 761 pg_dst[3] = a3; 762 pg_dst += 4; 763 } 764 while (longs_this_time > 0) { /* cannot cross any page 765 * boundaries here */ 766 *pg_dst++ = *pg_a++ ^ *pg_b++ ^ *pg_c++; 767 longs_this_time--; 768 } 769 770 if (len) { 771 if (RF_PAGE_ALIGNED(a)) { 772 pg_a = a; 773 if (!pg_a) 774 return (EFAULT); 775 if (dst_is_a) 776 pg_dst = pg_a; 777 } 778 if (RF_PAGE_ALIGNED(b)) { 779 pg_b = b; 780 if (!pg_b) 781 return (EFAULT); 782 } 783 if (RF_PAGE_ALIGNED(c)) { 784 pg_c = c; 785 if (!pg_c) 786 return (EFAULT); 787 } 788 if (!dst_is_a) 789 if (RF_PAGE_ALIGNED(dst)) { 790 pg_dst = dst; 791 if (!pg_dst) 792 return (EFAULT); 793 } 794 } 795 } 796 while (len) { 797 *pg_dst++ = *pg_a++ ^ *pg_b++ ^ *pg_c++; 798 dst++; 799 a++; 800 b++; 801 c++; 802 if (RF_PAGE_ALIGNED(a)) { 803 pg_a = a; 804 if (!pg_a) 805 return (EFAULT); 806 if (dst_is_a) 807 pg_dst = pg_a; 808 } 809 if (RF_PAGE_ALIGNED(b)) { 810 pg_b = b; 811 if (!pg_b) 812 return (EFAULT); 813 } 814 if (RF_PAGE_ALIGNED(c)) { 815 pg_c = c; 816 if (!pg_c) 817 return (EFAULT); 818 } 819 if (!dst_is_a) 820 if (RF_PAGE_ALIGNED(dst)) { 821 pg_dst = dst; 822 if (!pg_dst) 823 return (EFAULT); 824 } 825 len--; 826 } 827 return (0); 828} 829 830int 831rf_bxor3(unsigned char *dst, unsigned char *a, unsigned char *b, 832 unsigned char *c, unsigned long len, void *bp) 833{ 834 RF_ASSERT(((RF_UL(dst) | RF_UL(a) | RF_UL(b) | RF_UL(c) | len) & 0x7) == 0); 835 836 return (rf_longword_bxor3((unsigned long *) dst, (unsigned long *) a, 837 (unsigned long *) b, (unsigned long *) c, len >> RF_LONGSHIFT, bp)); 838} 839#endif 840