1/*	$NetBSD: rf_nwayxor.c,v 1.10 2006/10/12 01:31:51 christos Exp $	*/
2/*
3 * Copyright (c) 1995 Carnegie-Mellon University.
4 * All rights reserved.
5 *
6 * Author: Mark Holland, Daniel Stodolsky
7 *
8 * Permission to use, copy, modify and distribute this software and
9 * its documentation is hereby granted, provided that both the copyright
10 * notice and this permission notice appear in all copies of the
11 * software, derivative works or modified versions, and any portions
12 * thereof, and that both notices appear in supporting documentation.
13 *
14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17 *
18 * Carnegie Mellon requests users of this software to return to
19 *
20 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
21 *  School of Computer Science
22 *  Carnegie Mellon University
23 *  Pittsburgh PA 15213-3890
24 *
25 * any improvements or extensions that they make and grant Carnegie the
26 * rights to redistribute these changes.
27 */
28
29/************************************************************
30 *
31 * nwayxor.c -- code to do N-way xors for reconstruction
32 *
33 * nWayXorN xors N input buffers into the destination buffer.
34 * adapted from danner's longword_bxor code.
35 *
36 ************************************************************/
37
38#include <sys/cdefs.h>
39__KERNEL_RCSID(0, "$NetBSD: rf_nwayxor.c,v 1.10 2006/10/12 01:31:51 christos Exp $");
40
41#include "rf_nwayxor.h"
42#include "rf_shutdown.h"
43
44static int callcount[10];
45static void rf_ShutdownNWayXor(void *);
46
47static void
48rf_ShutdownNWayXor(void *ignored)
49{
50	int     i;
51
52	if (rf_showXorCallCounts == 0)
53		return;
54	printf("Call counts for n-way xor routines:  ");
55	for (i = 0; i < 10; i++)
56		printf("%d ", callcount[i]);
57	printf("\n");
58}
59
60int
61rf_ConfigureNWayXor(RF_ShutdownList_t **listp)
62{
63	int     i;
64
65	for (i = 0; i < 10; i++)
66		callcount[i] = 0;
67	rf_ShutdownCreate(listp, rf_ShutdownNWayXor, NULL);
68	return (0);
69}
70
71void
72rf_nWayXor1(RF_ReconBuffer_t **src_rbs,	RF_ReconBuffer_t *dest_rb, int len)
73{
74	unsigned long *src = (unsigned long *) src_rbs[0]->buffer;
75	unsigned long *dest = (unsigned long *) dest_rb->buffer;
76	unsigned long *end = src + len;
77	unsigned long d0, d1, d2, d3, s0, s1, s2, s3;
78
79	callcount[1]++;
80	while (len >= 4) {
81		d0 = dest[0];
82		d1 = dest[1];
83		d2 = dest[2];
84		d3 = dest[3];
85		s0 = src[0];
86		s1 = src[1];
87		s2 = src[2];
88		s3 = src[3];
89		dest[0] = d0 ^ s0;
90		dest[1] = d1 ^ s1;
91		dest[2] = d2 ^ s2;
92		dest[3] = d3 ^ s3;
93		src += 4;
94		dest += 4;
95		len -= 4;
96	}
97	while (src < end) {
98		*dest++ ^= *src++;
99	}
100}
101
102void
103rf_nWayXor2(RF_ReconBuffer_t **src_rbs, RF_ReconBuffer_t *dest_rb, int len)
104{
105	unsigned long *dst = (unsigned long *) dest_rb->buffer;
106	unsigned long *a = dst;
107	unsigned long *b = (unsigned long *) src_rbs[0]->buffer;
108	unsigned long *c = (unsigned long *) src_rbs[1]->buffer;
109	unsigned long a0, a1, a2, a3, b0, b1, b2, b3;
110
111	callcount[2]++;
112	/* align dest to cache line */
113	while ((((unsigned long) dst) & 0x1f)) {
114		*dst++ = *a++ ^ *b++ ^ *c++;
115		len--;
116	}
117	while (len > 4) {
118		a0 = a[0];
119		len -= 4;
120
121		a1 = a[1];
122		a2 = a[2];
123
124		a3 = a[3];
125		a += 4;
126
127		b0 = b[0];
128		b1 = b[1];
129
130		b2 = b[2];
131		b3 = b[3];
132		/* start dual issue */
133		a0 ^= b0;
134		b0 = c[0];
135
136		b += 4;
137		a1 ^= b1;
138
139		a2 ^= b2;
140		a3 ^= b3;
141
142		b1 = c[1];
143		a0 ^= b0;
144
145		b2 = c[2];
146		a1 ^= b1;
147
148		b3 = c[3];
149		a2 ^= b2;
150
151		dst[0] = a0;
152		a3 ^= b3;
153		dst[1] = a1;
154		c += 4;
155		dst[2] = a2;
156		dst[3] = a3;
157		dst += 4;
158	}
159	while (len) {
160		*dst++ = *a++ ^ *b++ ^ *c++;
161		len--;
162	}
163}
164/* note that first arg is not incremented but 2nd arg is */
165#define LOAD_FIRST(_dst,_b) \
166  a0 = _dst[0]; len -= 4;   \
167  a1 = _dst[1];             \
168  a2 = _dst[2];             \
169  a3 = _dst[3];             \
170  b0 = _b[0];               \
171  b1 = _b[1];               \
172  b2 = _b[2];               \
173  b3 = _b[3];  _b += 4;
174
175/* note: arg is incremented */
176#define XOR_AND_LOAD_NEXT(_n) \
177  a0 ^= b0; b0 = _n[0];       \
178  a1 ^= b1; b1 = _n[1];       \
179  a2 ^= b2; b2 = _n[2];       \
180  a3 ^= b3; b3 = _n[3];       \
181  _n += 4;
182
183/* arg is incremented */
184#define XOR_AND_STORE(_dst)       \
185  a0 ^= b0; _dst[0] = a0;         \
186  a1 ^= b1; _dst[1] = a1;         \
187  a2 ^= b2; _dst[2] = a2;         \
188  a3 ^= b3; _dst[3] = a3;         \
189  _dst += 4;
190
191
192void
193rf_nWayXor3(RF_ReconBuffer_t **src_rbs, RF_ReconBuffer_t *dest_rb, int len)
194{
195	unsigned long *dst = (unsigned long *) dest_rb->buffer;
196	unsigned long *b = (unsigned long *) src_rbs[0]->buffer;
197	unsigned long *c = (unsigned long *) src_rbs[1]->buffer;
198	unsigned long *d = (unsigned long *) src_rbs[2]->buffer;
199	unsigned long a0, a1, a2, a3, b0, b1, b2, b3;
200
201	callcount[3]++;
202	/* align dest to cache line */
203	while ((((unsigned long) dst) & 0x1f)) {
204		*dst++ ^= *b++ ^ *c++ ^ *d++;
205		len--;
206	}
207	while (len > 4) {
208		LOAD_FIRST(dst, b);
209		XOR_AND_LOAD_NEXT(c);
210		XOR_AND_LOAD_NEXT(d);
211		XOR_AND_STORE(dst);
212	}
213	while (len) {
214		*dst++ ^= *b++ ^ *c++ ^ *d++;
215		len--;
216	}
217}
218
219void
220rf_nWayXor4(RF_ReconBuffer_t **src_rbs, RF_ReconBuffer_t *dest_rb, int len)
221{
222	unsigned long *dst = (unsigned long *) dest_rb->buffer;
223	unsigned long *b = (unsigned long *) src_rbs[0]->buffer;
224	unsigned long *c = (unsigned long *) src_rbs[1]->buffer;
225	unsigned long *d = (unsigned long *) src_rbs[2]->buffer;
226	unsigned long *e = (unsigned long *) src_rbs[3]->buffer;
227	unsigned long a0, a1, a2, a3, b0, b1, b2, b3;
228
229	callcount[4]++;
230	/* align dest to cache line */
231	while ((((unsigned long) dst) & 0x1f)) {
232		*dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++;
233		len--;
234	}
235	while (len > 4) {
236		LOAD_FIRST(dst, b);
237		XOR_AND_LOAD_NEXT(c);
238		XOR_AND_LOAD_NEXT(d);
239		XOR_AND_LOAD_NEXT(e);
240		XOR_AND_STORE(dst);
241	}
242	while (len) {
243		*dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++;
244		len--;
245	}
246}
247
248void
249rf_nWayXor5(RF_ReconBuffer_t **src_rbs, RF_ReconBuffer_t *dest_rb, int len)
250{
251	unsigned long *dst = (unsigned long *) dest_rb->buffer;
252	unsigned long *b = (unsigned long *) src_rbs[0]->buffer;
253	unsigned long *c = (unsigned long *) src_rbs[1]->buffer;
254	unsigned long *d = (unsigned long *) src_rbs[2]->buffer;
255	unsigned long *e = (unsigned long *) src_rbs[3]->buffer;
256	unsigned long *f = (unsigned long *) src_rbs[4]->buffer;
257	unsigned long a0, a1, a2, a3, b0, b1, b2, b3;
258
259	callcount[5]++;
260	/* align dest to cache line */
261	while ((((unsigned long) dst) & 0x1f)) {
262		*dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++;
263		len--;
264	}
265	while (len > 4) {
266		LOAD_FIRST(dst, b);
267		XOR_AND_LOAD_NEXT(c);
268		XOR_AND_LOAD_NEXT(d);
269		XOR_AND_LOAD_NEXT(e);
270		XOR_AND_LOAD_NEXT(f);
271		XOR_AND_STORE(dst);
272	}
273	while (len) {
274		*dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++;
275		len--;
276	}
277}
278
279void
280rf_nWayXor6(RF_ReconBuffer_t **src_rbs, RF_ReconBuffer_t *dest_rb, int len)
281{
282	unsigned long *dst = (unsigned long *) dest_rb->buffer;
283	unsigned long *b = (unsigned long *) src_rbs[0]->buffer;
284	unsigned long *c = (unsigned long *) src_rbs[1]->buffer;
285	unsigned long *d = (unsigned long *) src_rbs[2]->buffer;
286	unsigned long *e = (unsigned long *) src_rbs[3]->buffer;
287	unsigned long *f = (unsigned long *) src_rbs[4]->buffer;
288	unsigned long *g = (unsigned long *) src_rbs[5]->buffer;
289	unsigned long a0, a1, a2, a3, b0, b1, b2, b3;
290
291	callcount[6]++;
292	/* align dest to cache line */
293	while ((((unsigned long) dst) & 0x1f)) {
294		*dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++;
295		len--;
296	}
297	while (len > 4) {
298		LOAD_FIRST(dst, b);
299		XOR_AND_LOAD_NEXT(c);
300		XOR_AND_LOAD_NEXT(d);
301		XOR_AND_LOAD_NEXT(e);
302		XOR_AND_LOAD_NEXT(f);
303		XOR_AND_LOAD_NEXT(g);
304		XOR_AND_STORE(dst);
305	}
306	while (len) {
307		*dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++;
308		len--;
309	}
310}
311
312void
313rf_nWayXor7(RF_ReconBuffer_t **src_rbs, RF_ReconBuffer_t *dest_rb, int len)
314{
315	unsigned long *dst = (unsigned long *) dest_rb->buffer;
316	unsigned long *b = (unsigned long *) src_rbs[0]->buffer;
317	unsigned long *c = (unsigned long *) src_rbs[1]->buffer;
318	unsigned long *d = (unsigned long *) src_rbs[2]->buffer;
319	unsigned long *e = (unsigned long *) src_rbs[3]->buffer;
320	unsigned long *f = (unsigned long *) src_rbs[4]->buffer;
321	unsigned long *g = (unsigned long *) src_rbs[5]->buffer;
322	unsigned long *h = (unsigned long *) src_rbs[6]->buffer;
323	unsigned long a0, a1, a2, a3, b0, b1, b2, b3;
324
325	callcount[7]++;
326	/* align dest to cache line */
327	while ((((unsigned long) dst) & 0x1f)) {
328		*dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++;
329		len--;
330	}
331	while (len > 4) {
332		LOAD_FIRST(dst, b);
333		XOR_AND_LOAD_NEXT(c);
334		XOR_AND_LOAD_NEXT(d);
335		XOR_AND_LOAD_NEXT(e);
336		XOR_AND_LOAD_NEXT(f);
337		XOR_AND_LOAD_NEXT(g);
338		XOR_AND_LOAD_NEXT(h);
339		XOR_AND_STORE(dst);
340	}
341	while (len) {
342		*dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++;
343		len--;
344	}
345}
346
347void
348rf_nWayXor8(RF_ReconBuffer_t **src_rbs, RF_ReconBuffer_t *dest_rb, int len)
349{
350	unsigned long *dst = (unsigned long *) dest_rb->buffer;
351	unsigned long *b = (unsigned long *) src_rbs[0]->buffer;
352	unsigned long *c = (unsigned long *) src_rbs[1]->buffer;
353	unsigned long *d = (unsigned long *) src_rbs[2]->buffer;
354	unsigned long *e = (unsigned long *) src_rbs[3]->buffer;
355	unsigned long *f = (unsigned long *) src_rbs[4]->buffer;
356	unsigned long *g = (unsigned long *) src_rbs[5]->buffer;
357	unsigned long *h = (unsigned long *) src_rbs[6]->buffer;
358	unsigned long *i = (unsigned long *) src_rbs[7]->buffer;
359	unsigned long a0, a1, a2, a3, b0, b1, b2, b3;
360
361	callcount[8]++;
362	/* align dest to cache line */
363	while ((((unsigned long) dst) & 0x1f)) {
364		*dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++ ^ *i++;
365		len--;
366	}
367	while (len > 4) {
368		LOAD_FIRST(dst, b);
369		XOR_AND_LOAD_NEXT(c);
370		XOR_AND_LOAD_NEXT(d);
371		XOR_AND_LOAD_NEXT(e);
372		XOR_AND_LOAD_NEXT(f);
373		XOR_AND_LOAD_NEXT(g);
374		XOR_AND_LOAD_NEXT(h);
375		XOR_AND_LOAD_NEXT(i);
376		XOR_AND_STORE(dst);
377	}
378	while (len) {
379		*dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++ ^ *i++;
380		len--;
381	}
382}
383
384
385void
386rf_nWayXor9(RF_ReconBuffer_t **src_rbs, RF_ReconBuffer_t *dest_rb, int len)
387{
388	unsigned long *dst = (unsigned long *) dest_rb->buffer;
389	unsigned long *b = (unsigned long *) src_rbs[0]->buffer;
390	unsigned long *c = (unsigned long *) src_rbs[1]->buffer;
391	unsigned long *d = (unsigned long *) src_rbs[2]->buffer;
392	unsigned long *e = (unsigned long *) src_rbs[3]->buffer;
393	unsigned long *f = (unsigned long *) src_rbs[4]->buffer;
394	unsigned long *g = (unsigned long *) src_rbs[5]->buffer;
395	unsigned long *h = (unsigned long *) src_rbs[6]->buffer;
396	unsigned long *i = (unsigned long *) src_rbs[7]->buffer;
397	unsigned long *j = (unsigned long *) src_rbs[8]->buffer;
398	unsigned long a0, a1, a2, a3, b0, b1, b2, b3;
399
400	callcount[9]++;
401	/* align dest to cache line */
402	while ((((unsigned long) dst) & 0x1f)) {
403		*dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++ ^ *i++ ^ *j++;
404		len--;
405	}
406	while (len > 4) {
407		LOAD_FIRST(dst, b);
408		XOR_AND_LOAD_NEXT(c);
409		XOR_AND_LOAD_NEXT(d);
410		XOR_AND_LOAD_NEXT(e);
411		XOR_AND_LOAD_NEXT(f);
412		XOR_AND_LOAD_NEXT(g);
413		XOR_AND_LOAD_NEXT(h);
414		XOR_AND_LOAD_NEXT(i);
415		XOR_AND_LOAD_NEXT(j);
416		XOR_AND_STORE(dst);
417	}
418	while (len) {
419		*dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++ ^ *i++ ^ *j++;
420		len--;
421	}
422}
423