1/* $NetBSD: rf_nwayxor.c,v 1.10 2006/10/12 01:31:51 christos Exp $ */ 2/* 3 * Copyright (c) 1995 Carnegie-Mellon University. 4 * All rights reserved. 5 * 6 * Author: Mark Holland, Daniel Stodolsky 7 * 8 * Permission to use, copy, modify and distribute this software and 9 * its documentation is hereby granted, provided that both the copyright 10 * notice and this permission notice appear in all copies of the 11 * software, derivative works or modified versions, and any portions 12 * thereof, and that both notices appear in supporting documentation. 13 * 14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 17 * 18 * Carnegie Mellon requests users of this software to return to 19 * 20 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 21 * School of Computer Science 22 * Carnegie Mellon University 23 * Pittsburgh PA 15213-3890 24 * 25 * any improvements or extensions that they make and grant Carnegie the 26 * rights to redistribute these changes. 27 */ 28 29/************************************************************ 30 * 31 * nwayxor.c -- code to do N-way xors for reconstruction 32 * 33 * nWayXorN xors N input buffers into the destination buffer. 34 * adapted from danner's longword_bxor code. 35 * 36 ************************************************************/ 37 38#include <sys/cdefs.h> 39__KERNEL_RCSID(0, "$NetBSD: rf_nwayxor.c,v 1.10 2006/10/12 01:31:51 christos Exp $"); 40 41#include "rf_nwayxor.h" 42#include "rf_shutdown.h" 43 44static int callcount[10]; 45static void rf_ShutdownNWayXor(void *); 46 47static void 48rf_ShutdownNWayXor(void *ignored) 49{ 50 int i; 51 52 if (rf_showXorCallCounts == 0) 53 return; 54 printf("Call counts for n-way xor routines: "); 55 for (i = 0; i < 10; i++) 56 printf("%d ", callcount[i]); 57 printf("\n"); 58} 59 60int 61rf_ConfigureNWayXor(RF_ShutdownList_t **listp) 62{ 63 int i; 64 65 for (i = 0; i < 10; i++) 66 callcount[i] = 0; 67 rf_ShutdownCreate(listp, rf_ShutdownNWayXor, NULL); 68 return (0); 69} 70 71void 72rf_nWayXor1(RF_ReconBuffer_t **src_rbs, RF_ReconBuffer_t *dest_rb, int len) 73{ 74 unsigned long *src = (unsigned long *) src_rbs[0]->buffer; 75 unsigned long *dest = (unsigned long *) dest_rb->buffer; 76 unsigned long *end = src + len; 77 unsigned long d0, d1, d2, d3, s0, s1, s2, s3; 78 79 callcount[1]++; 80 while (len >= 4) { 81 d0 = dest[0]; 82 d1 = dest[1]; 83 d2 = dest[2]; 84 d3 = dest[3]; 85 s0 = src[0]; 86 s1 = src[1]; 87 s2 = src[2]; 88 s3 = src[3]; 89 dest[0] = d0 ^ s0; 90 dest[1] = d1 ^ s1; 91 dest[2] = d2 ^ s2; 92 dest[3] = d3 ^ s3; 93 src += 4; 94 dest += 4; 95 len -= 4; 96 } 97 while (src < end) { 98 *dest++ ^= *src++; 99 } 100} 101 102void 103rf_nWayXor2(RF_ReconBuffer_t **src_rbs, RF_ReconBuffer_t *dest_rb, int len) 104{ 105 unsigned long *dst = (unsigned long *) dest_rb->buffer; 106 unsigned long *a = dst; 107 unsigned long *b = (unsigned long *) src_rbs[0]->buffer; 108 unsigned long *c = (unsigned long *) src_rbs[1]->buffer; 109 unsigned long a0, a1, a2, a3, b0, b1, b2, b3; 110 111 callcount[2]++; 112 /* align dest to cache line */ 113 while ((((unsigned long) dst) & 0x1f)) { 114 *dst++ = *a++ ^ *b++ ^ *c++; 115 len--; 116 } 117 while (len > 4) { 118 a0 = a[0]; 119 len -= 4; 120 121 a1 = a[1]; 122 a2 = a[2]; 123 124 a3 = a[3]; 125 a += 4; 126 127 b0 = b[0]; 128 b1 = b[1]; 129 130 b2 = b[2]; 131 b3 = b[3]; 132 /* start dual issue */ 133 a0 ^= b0; 134 b0 = c[0]; 135 136 b += 4; 137 a1 ^= b1; 138 139 a2 ^= b2; 140 a3 ^= b3; 141 142 b1 = c[1]; 143 a0 ^= b0; 144 145 b2 = c[2]; 146 a1 ^= b1; 147 148 b3 = c[3]; 149 a2 ^= b2; 150 151 dst[0] = a0; 152 a3 ^= b3; 153 dst[1] = a1; 154 c += 4; 155 dst[2] = a2; 156 dst[3] = a3; 157 dst += 4; 158 } 159 while (len) { 160 *dst++ = *a++ ^ *b++ ^ *c++; 161 len--; 162 } 163} 164/* note that first arg is not incremented but 2nd arg is */ 165#define LOAD_FIRST(_dst,_b) \ 166 a0 = _dst[0]; len -= 4; \ 167 a1 = _dst[1]; \ 168 a2 = _dst[2]; \ 169 a3 = _dst[3]; \ 170 b0 = _b[0]; \ 171 b1 = _b[1]; \ 172 b2 = _b[2]; \ 173 b3 = _b[3]; _b += 4; 174 175/* note: arg is incremented */ 176#define XOR_AND_LOAD_NEXT(_n) \ 177 a0 ^= b0; b0 = _n[0]; \ 178 a1 ^= b1; b1 = _n[1]; \ 179 a2 ^= b2; b2 = _n[2]; \ 180 a3 ^= b3; b3 = _n[3]; \ 181 _n += 4; 182 183/* arg is incremented */ 184#define XOR_AND_STORE(_dst) \ 185 a0 ^= b0; _dst[0] = a0; \ 186 a1 ^= b1; _dst[1] = a1; \ 187 a2 ^= b2; _dst[2] = a2; \ 188 a3 ^= b3; _dst[3] = a3; \ 189 _dst += 4; 190 191 192void 193rf_nWayXor3(RF_ReconBuffer_t **src_rbs, RF_ReconBuffer_t *dest_rb, int len) 194{ 195 unsigned long *dst = (unsigned long *) dest_rb->buffer; 196 unsigned long *b = (unsigned long *) src_rbs[0]->buffer; 197 unsigned long *c = (unsigned long *) src_rbs[1]->buffer; 198 unsigned long *d = (unsigned long *) src_rbs[2]->buffer; 199 unsigned long a0, a1, a2, a3, b0, b1, b2, b3; 200 201 callcount[3]++; 202 /* align dest to cache line */ 203 while ((((unsigned long) dst) & 0x1f)) { 204 *dst++ ^= *b++ ^ *c++ ^ *d++; 205 len--; 206 } 207 while (len > 4) { 208 LOAD_FIRST(dst, b); 209 XOR_AND_LOAD_NEXT(c); 210 XOR_AND_LOAD_NEXT(d); 211 XOR_AND_STORE(dst); 212 } 213 while (len) { 214 *dst++ ^= *b++ ^ *c++ ^ *d++; 215 len--; 216 } 217} 218 219void 220rf_nWayXor4(RF_ReconBuffer_t **src_rbs, RF_ReconBuffer_t *dest_rb, int len) 221{ 222 unsigned long *dst = (unsigned long *) dest_rb->buffer; 223 unsigned long *b = (unsigned long *) src_rbs[0]->buffer; 224 unsigned long *c = (unsigned long *) src_rbs[1]->buffer; 225 unsigned long *d = (unsigned long *) src_rbs[2]->buffer; 226 unsigned long *e = (unsigned long *) src_rbs[3]->buffer; 227 unsigned long a0, a1, a2, a3, b0, b1, b2, b3; 228 229 callcount[4]++; 230 /* align dest to cache line */ 231 while ((((unsigned long) dst) & 0x1f)) { 232 *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++; 233 len--; 234 } 235 while (len > 4) { 236 LOAD_FIRST(dst, b); 237 XOR_AND_LOAD_NEXT(c); 238 XOR_AND_LOAD_NEXT(d); 239 XOR_AND_LOAD_NEXT(e); 240 XOR_AND_STORE(dst); 241 } 242 while (len) { 243 *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++; 244 len--; 245 } 246} 247 248void 249rf_nWayXor5(RF_ReconBuffer_t **src_rbs, RF_ReconBuffer_t *dest_rb, int len) 250{ 251 unsigned long *dst = (unsigned long *) dest_rb->buffer; 252 unsigned long *b = (unsigned long *) src_rbs[0]->buffer; 253 unsigned long *c = (unsigned long *) src_rbs[1]->buffer; 254 unsigned long *d = (unsigned long *) src_rbs[2]->buffer; 255 unsigned long *e = (unsigned long *) src_rbs[3]->buffer; 256 unsigned long *f = (unsigned long *) src_rbs[4]->buffer; 257 unsigned long a0, a1, a2, a3, b0, b1, b2, b3; 258 259 callcount[5]++; 260 /* align dest to cache line */ 261 while ((((unsigned long) dst) & 0x1f)) { 262 *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++; 263 len--; 264 } 265 while (len > 4) { 266 LOAD_FIRST(dst, b); 267 XOR_AND_LOAD_NEXT(c); 268 XOR_AND_LOAD_NEXT(d); 269 XOR_AND_LOAD_NEXT(e); 270 XOR_AND_LOAD_NEXT(f); 271 XOR_AND_STORE(dst); 272 } 273 while (len) { 274 *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++; 275 len--; 276 } 277} 278 279void 280rf_nWayXor6(RF_ReconBuffer_t **src_rbs, RF_ReconBuffer_t *dest_rb, int len) 281{ 282 unsigned long *dst = (unsigned long *) dest_rb->buffer; 283 unsigned long *b = (unsigned long *) src_rbs[0]->buffer; 284 unsigned long *c = (unsigned long *) src_rbs[1]->buffer; 285 unsigned long *d = (unsigned long *) src_rbs[2]->buffer; 286 unsigned long *e = (unsigned long *) src_rbs[3]->buffer; 287 unsigned long *f = (unsigned long *) src_rbs[4]->buffer; 288 unsigned long *g = (unsigned long *) src_rbs[5]->buffer; 289 unsigned long a0, a1, a2, a3, b0, b1, b2, b3; 290 291 callcount[6]++; 292 /* align dest to cache line */ 293 while ((((unsigned long) dst) & 0x1f)) { 294 *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++; 295 len--; 296 } 297 while (len > 4) { 298 LOAD_FIRST(dst, b); 299 XOR_AND_LOAD_NEXT(c); 300 XOR_AND_LOAD_NEXT(d); 301 XOR_AND_LOAD_NEXT(e); 302 XOR_AND_LOAD_NEXT(f); 303 XOR_AND_LOAD_NEXT(g); 304 XOR_AND_STORE(dst); 305 } 306 while (len) { 307 *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++; 308 len--; 309 } 310} 311 312void 313rf_nWayXor7(RF_ReconBuffer_t **src_rbs, RF_ReconBuffer_t *dest_rb, int len) 314{ 315 unsigned long *dst = (unsigned long *) dest_rb->buffer; 316 unsigned long *b = (unsigned long *) src_rbs[0]->buffer; 317 unsigned long *c = (unsigned long *) src_rbs[1]->buffer; 318 unsigned long *d = (unsigned long *) src_rbs[2]->buffer; 319 unsigned long *e = (unsigned long *) src_rbs[3]->buffer; 320 unsigned long *f = (unsigned long *) src_rbs[4]->buffer; 321 unsigned long *g = (unsigned long *) src_rbs[5]->buffer; 322 unsigned long *h = (unsigned long *) src_rbs[6]->buffer; 323 unsigned long a0, a1, a2, a3, b0, b1, b2, b3; 324 325 callcount[7]++; 326 /* align dest to cache line */ 327 while ((((unsigned long) dst) & 0x1f)) { 328 *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++; 329 len--; 330 } 331 while (len > 4) { 332 LOAD_FIRST(dst, b); 333 XOR_AND_LOAD_NEXT(c); 334 XOR_AND_LOAD_NEXT(d); 335 XOR_AND_LOAD_NEXT(e); 336 XOR_AND_LOAD_NEXT(f); 337 XOR_AND_LOAD_NEXT(g); 338 XOR_AND_LOAD_NEXT(h); 339 XOR_AND_STORE(dst); 340 } 341 while (len) { 342 *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++; 343 len--; 344 } 345} 346 347void 348rf_nWayXor8(RF_ReconBuffer_t **src_rbs, RF_ReconBuffer_t *dest_rb, int len) 349{ 350 unsigned long *dst = (unsigned long *) dest_rb->buffer; 351 unsigned long *b = (unsigned long *) src_rbs[0]->buffer; 352 unsigned long *c = (unsigned long *) src_rbs[1]->buffer; 353 unsigned long *d = (unsigned long *) src_rbs[2]->buffer; 354 unsigned long *e = (unsigned long *) src_rbs[3]->buffer; 355 unsigned long *f = (unsigned long *) src_rbs[4]->buffer; 356 unsigned long *g = (unsigned long *) src_rbs[5]->buffer; 357 unsigned long *h = (unsigned long *) src_rbs[6]->buffer; 358 unsigned long *i = (unsigned long *) src_rbs[7]->buffer; 359 unsigned long a0, a1, a2, a3, b0, b1, b2, b3; 360 361 callcount[8]++; 362 /* align dest to cache line */ 363 while ((((unsigned long) dst) & 0x1f)) { 364 *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++ ^ *i++; 365 len--; 366 } 367 while (len > 4) { 368 LOAD_FIRST(dst, b); 369 XOR_AND_LOAD_NEXT(c); 370 XOR_AND_LOAD_NEXT(d); 371 XOR_AND_LOAD_NEXT(e); 372 XOR_AND_LOAD_NEXT(f); 373 XOR_AND_LOAD_NEXT(g); 374 XOR_AND_LOAD_NEXT(h); 375 XOR_AND_LOAD_NEXT(i); 376 XOR_AND_STORE(dst); 377 } 378 while (len) { 379 *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++ ^ *i++; 380 len--; 381 } 382} 383 384 385void 386rf_nWayXor9(RF_ReconBuffer_t **src_rbs, RF_ReconBuffer_t *dest_rb, int len) 387{ 388 unsigned long *dst = (unsigned long *) dest_rb->buffer; 389 unsigned long *b = (unsigned long *) src_rbs[0]->buffer; 390 unsigned long *c = (unsigned long *) src_rbs[1]->buffer; 391 unsigned long *d = (unsigned long *) src_rbs[2]->buffer; 392 unsigned long *e = (unsigned long *) src_rbs[3]->buffer; 393 unsigned long *f = (unsigned long *) src_rbs[4]->buffer; 394 unsigned long *g = (unsigned long *) src_rbs[5]->buffer; 395 unsigned long *h = (unsigned long *) src_rbs[6]->buffer; 396 unsigned long *i = (unsigned long *) src_rbs[7]->buffer; 397 unsigned long *j = (unsigned long *) src_rbs[8]->buffer; 398 unsigned long a0, a1, a2, a3, b0, b1, b2, b3; 399 400 callcount[9]++; 401 /* align dest to cache line */ 402 while ((((unsigned long) dst) & 0x1f)) { 403 *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++ ^ *i++ ^ *j++; 404 len--; 405 } 406 while (len > 4) { 407 LOAD_FIRST(dst, b); 408 XOR_AND_LOAD_NEXT(c); 409 XOR_AND_LOAD_NEXT(d); 410 XOR_AND_LOAD_NEXT(e); 411 XOR_AND_LOAD_NEXT(f); 412 XOR_AND_LOAD_NEXT(g); 413 XOR_AND_LOAD_NEXT(h); 414 XOR_AND_LOAD_NEXT(i); 415 XOR_AND_LOAD_NEXT(j); 416 XOR_AND_STORE(dst); 417 } 418 while (len) { 419 *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++ ^ *i++ ^ *j++; 420 len--; 421 } 422} 423