linpack.c revision 6651:beb4b5f48c82
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27#pragma ident	"%Z%%M%	%I%	%E% SMI"
28
29#include <errno.h>
30#include <stdio.h>
31#include <stdlib.h>
32#include <sys/time.h>
33#include <unistd.h>
34#include <externs.h>
35#include <fp.h>
36#include <fps_ereport.h>
37#include <fpstestmsg.h>
38#include <linpack.h>
39#include <sunperf.h>
40
41double fabs(double x);
42
43extern void	___pl_dss_set_chip_cache_(int *cache_size);
44static double dran(int iseed[4]);
45static int LINSUB(REAL * residn, REAL * resid,
46    REAL * eps, REAL * x11, REAL * xn1, int fps_verbose_msg);
47static int MATGEN(REAL a[], int lda, int n, REAL b[], REAL * norma);
48static REAL EPSLON(REAL x);
49static void MXPY(int n1, REAL y[], int n2, int ldm, REAL x[], REAL m[]);
50
51extern int errno;
52static int LAPACK_ECACHE_SIZE = 8 * 1024 * 1024;
53static int MAT_SIZE;
54
55/*
56 * LINPACK(int Stress, int unit, struct fps_test_ereport *report,
57 * int fps_verbose_msg)
58 * performs the single and double precision lapack test. If an
59 * error is found, relevant data is collected and stored in report.
60 */
61int
62LINPACK(int Stress, int unit, struct fps_test_ereport *report,
63    int fps_verbose_msg)
64{
65	char err_data[MAX_INFO_SIZE];
66	char l_buf[64];
67	int c_index;
68	int ret;
69	REAL eps;
70	REAL resid;
71	REAL residn;
72	REAL x11;
73	REAL xn1;
74	REAL EPS;
75	REAL RESID;
76	REAL RESIDN;
77	REAL X11;
78	REAL XN1;
79	uint64_t expected[5];
80	uint64_t observed[5];
81
82#ifdef  FPS_LAPA_UNK
83#ifndef DP
84	if (Stress > 1000)
85			return (0);
86#endif /* DP */
87#endif /* FPS_LAPA_UNK */
88
89	if (Stress > 10000)
90		return (0);
91
92	/*
93	 * make sure is no dependency on the E$ size Without this call the
94	 * computed results will depend on the size of the E$ (
95	 * sos10/libsunperf ) IIIi computed results != IV+/IV/III+/III ...
96	 */
97	___pl_dss_set_chip_cache_(&LAPACK_ECACHE_SIZE);
98
99	c_index = Stress;
100
101	if (2000 == c_index)
102		c_index = 1001;
103	if (3000 == c_index)
104		c_index = 1002;
105	if (4016 == c_index)
106		c_index = 1003;
107	if (5000 == c_index)
108		c_index = 1004;
109	if (6000 == c_index)
110		c_index = 1005;
111	if (7016 == c_index)
112		c_index = 1006;
113	if (8034 == c_index)
114		c_index = 1007;
115	if (9000 == c_index)
116		c_index = 1008;
117	if (10000 == c_index)
118		c_index = 1009;
119
120	(void) snprintf(l_buf, 63, "%s(%d,cpu=%d)", PREC, Stress, unit);
121	fps_msg(fps_verbose_msg, gettext(FPSM_02), l_buf, unit);
122
123	MAT_SIZE = Stress;
124	ret = LINSUB(&residn, &resid, &eps, &x11, &xn1, fps_verbose_msg);
125
126	if (2 == ret) {
127		if (errno == EAGAIN || errno == ENOMEM)
128			_exit(FPU_SYSCALL_TRYAGAIN);
129		else
130			_exit(FPU_SYSCALL_FAIL);
131	}
132
133#ifdef  FPS_LAPA_UNK
134	RESIDN  = RESID   = X11 = XN1 = 0.0000000000000000e+00;
135
136#ifdef DP
137	EPS = 2.2204460492503131e-16;
138#else /* DP */
139	EPS = 1.1920928955078125e-07;
140#endif /* DP */
141
142#else /* FPS_LAPA_UNK */
143
144	RESIDN = LinpValsA[c_index].residn;
145	RESID = LinpValsA[c_index].resid;
146	EPS = LinpValsA[c_index].eps;
147	X11 = LinpValsA[c_index].x11;
148	XN1 = LinpValsA[c_index].xn1;
149
150#endif /* FPS_LAPA_UNK */
151
152	if ((residn == RESIDN) && (resid == RESID) && (eps == EPS) &&
153	    (x11 == X11) && (xn1 == XN1)) {
154
155		return (0);
156	} else {
157		snprintf(err_data, sizeof (err_data),
158		    "\nExpected: %.16e, %.16e, %.16e, %.16e, %.16e"
159		    "\nObserved: %.16e, %.16e, %.16e, %.16e, %.16e",
160		    RESIDN, RESID, EPS, X11, XN1, residn, resid, eps, x11, xn1);
161
162
163#ifdef	DP
164		observed[0] = *(uint64_t *)&residn;
165		observed[1] = *(uint64_t *)&resid;
166		observed[2] = *(uint64_t *)&eps;
167		observed[3] = *(uint64_t *)&x11;
168		observed[4] = *(uint64_t *)&xn1;
169		expected[0] = *(uint64_t *)&RESIDN;
170		expected[1] = *(uint64_t *)&RESID;
171		expected[2] = *(uint64_t *)&EPS;
172		expected[3] = *(uint64_t *)&X11;
173		expected[4] = *(uint64_t *)&XN1;
174
175		setup_fps_test_struct(IS_EREPORT_INFO, report,
176		    6317, &observed, &expected, 5, 5, err_data);
177#else
178		observed[0] = (uint64_t)(*(uint32_t *)&residn);
179		observed[1] = (uint64_t)(*(uint32_t *)&resid);
180		observed[2] = (uint64_t)(*(uint32_t *)&eps);
181		observed[3] = (uint64_t)(*(uint32_t *)&x11);
182		observed[4] = (uint64_t)(*(uint32_t *)&xn1);
183		expected[0] = (uint64_t)(*(uint32_t *)&RESIDN);
184		expected[1] = (uint64_t)(*(uint32_t *)&RESID);
185		expected[2] = (uint64_t)(*(uint32_t *)&EPS);
186		expected[3] = (uint64_t)(*(uint32_t *)&X11);
187		expected[4] = (uint64_t)(*(uint32_t *)&XN1);
188
189		setup_fps_test_struct(IS_EREPORT_INFO, report,
190		    6316, &observed, &expected, 5, 5, err_data);
191#endif
192
193		return (-1);
194	}
195}
196
197/*
198 * LINSUB(REAL *residn, REAL *resid, REAL *eps,
199 * REAL *x11, REAL *xn1, int fps_verbose_msg)begins
200 * the lapack calculation calls.
201 */
202static int
203LINSUB(REAL *residn, REAL *resid,
204	REAL *eps, REAL *x11, REAL *xn1,
205	int fps_verbose_msg)
206{
207	int i;
208	int lda;
209	int n;
210	int nr_malloc;
211	REAL *a;
212	REAL abs;
213	REAL *b;
214	REAL norma;
215	REAL normx;
216	REAL *x;
217	struct timeval timeout;
218	long info;
219	long *ipvt;
220
221	timeout.tv_sec = 0;
222	timeout.tv_usec = 10000; /* microseconds, 10ms */
223	nr_malloc = 0;
224
225mallocAgain:
226
227	a = (REAL *) malloc((MAT_SIZE + 8) * (MAT_SIZE + 1) *
228	    (size_t)sizeof (REAL));
229	b = (REAL *) malloc(MAT_SIZE * (size_t)sizeof (REAL));
230	x = (REAL *) malloc(MAT_SIZE * (size_t)sizeof (REAL));
231
232	ipvt = (long *)malloc(MAT_SIZE * (size_t)sizeof (long));
233
234	if ((NULL == a) || (NULL == b) ||
235	    (NULL == x) || (NULL == ipvt)) {
236		if (NULL != a)
237			free(a);
238		if (NULL != b)
239			free(b);
240		if (NULL != x)
241			free(x);
242		if (NULL != ipvt)
243			free(ipvt);
244
245		/* sleep 10 ms. wait for 100 ms */
246		if (nr_malloc++ < 11) {
247			(void) select(1, NULL, NULL, NULL, &timeout);
248			goto mallocAgain;
249		}
250		fps_msg(fps_verbose_msg,
251		    "Malloc failed in lapack, matrix size %d",
252		    MAT_SIZE);
253
254		return (2);
255	}
256	lda = MAT_SIZE + 8;
257	n = MAT_SIZE;
258
259	(void) MATGEN(a, lda, n, b, &norma);
260	GEFA(n, n, a, lda, ipvt, &info);
261	GESL('N', n, 1, a, lda, ipvt, b, n, &info);
262	free(ipvt);
263
264	for (i = 0; i < n; i++) {
265		x[i] = b[i];
266	}
267
268	(void) MATGEN((REAL *) a, lda, n, b, &norma);
269
270	for (i = 0; i < n; i++) {
271		b[i] = -b[i];
272	}
273
274	MXPY(n, b, n, lda, x, (REAL *) a);
275	free(a);
276
277	*resid = 0.0;
278	normx = 0.0;
279
280	for (i = 0; i < n; i++) {
281		abs = (REAL)fabs((double)b[i]);
282		*resid = (*resid > abs) ? *resid : abs;
283		abs = (REAL)fabs((double)x[i]);
284		normx = (normx > abs) ? normx : abs;
285	}
286
287	free(b);
288
289	*eps = EPSLON((REAL) LP_ONE);
290
291	*residn = *resid / (n * norma * normx * (*eps));
292
293	*x11 = x[0] - 1;
294	*xn1 = x[n - 1] - 1;
295
296	free(x);
297
298	return (0);
299}
300
301/*
302 * dran(int iseed[4]) returns a random real number from a
303 * uniform (0,1) distribution.
304 */
305static double
306dran(int iseed[4])
307{
308	double r;
309	double value;
310	int ipw2;
311	int it1;
312	int it2;
313	int it3;
314	int it4;
315	int m1;
316	int m2;
317	int m3;
318	int m4;
319
320	/* Set constants */
321	m1 = 494;
322	m2 = 322;
323	m3 = 2508;
324	m4 = 2549;
325	ipw2 = 4096;
326	r = 1.0 / ipw2;
327
328	/* multiply the seed by the multiplier modulo 2**48 */
329	it4 = iseed[3] * m4;
330	it3 = it4 / ipw2;
331	it4 = it4 - ipw2 * it3;
332	it3 = it3 + iseed[2] * m4 + iseed[3] * m3;
333	it2 = it3 / ipw2;
334	it3 = it3 - ipw2 * it2;
335	it2 = it2 + iseed[1] * m4 + iseed[2] * m3 + iseed[3] * m2;
336	it1 = it2 / ipw2;
337	it2 = it2 - ipw2 * it1;
338	it1 = it1 + iseed[0] * m4 + iseed[1] * m3 + iseed[2] * m2 +
339	    iseed[3] * m1;
340	it1 = it1 % ipw2;
341
342	/* return updated seed */
343	iseed[0] = it1;
344	iseed[1] = it2;
345	iseed[2] = it3;
346	iseed[3] = it4;
347
348	/* convert 48-bit integer to a real number in the interval (0,1) */
349	value = r * ((double)it1 + r * ((double)it2 + r * ((double)it3 +
350	    r * ((double)it4))));
351
352	return (value);
353}
354
355/*
356 * MATGEN(REAL a[], int lda, int n, REAL b[], REAL *norma)
357 * generates matrix a and b.
358 */
359
360#define	ALPHA 1.68750
361static int
362MATGEN(REAL a[], int lda, int n, REAL b[], REAL *norma)
363{
364	int		i;
365	int		init[4];
366	int		j;
367	REAL	value;
368
369	init[0] = 1;
370	init[1] = 2;
371	init[2] = 3;
372	init[3] = 1325;
373	*norma = LP_ZERO;
374	for (j = 0; j < n; j++) {
375		for (i = 0; i < n; i++) {
376#ifdef FPS_LAPA_UNK
377			a[lda*j+i] =
378			    (i < j) ? (double)(i+1) : (double)(j+ALPHA);
379			if (fabs(a[lda*j+i]) > *norma)
380				*norma = fabs(a[lda*j+i]);
381			} /* i */
382#else
383			value = (REAL) dran(init) - 0.5;
384			a[lda * j + i] = value;
385			value = fabs(value);
386			if (value > *norma) {
387				*norma = value;
388			}
389		} /* i */
390#endif /* FPS_LAPA_UNK */
391	} /* j */
392
393
394	for (i = 0; i < n; i++) {
395		b[i] = LP_ZERO;
396	}
397	for (j = 0; j < n; j++) {
398		for (i = 0; i < n; i++) {
399			b[i] = b[i] + a[lda * j + i];
400		}
401	}
402
403	return (0);
404}
405
406/*
407 * IAMAX(int n, REAL dx[])finds the index of element
408 * having maximum absolute value.
409 */
410int
411IAMAX(int n, REAL dx[])
412{
413	double abs;
414	double dmax;
415	int i;
416	int itemp;
417
418	if (n < 1)
419		return (-1);
420	if (n == 1)
421		return (0);
422
423	itemp = 0;
424	dmax = fabs((double)dx[0]);
425
426	for (i = 1; i < n; i++) {
427		abs = fabs((double)dx[i]);
428		if (abs > dmax) {
429			itemp = i;
430			dmax = abs;
431		}
432	}
433
434	return (itemp);
435}
436
437/*
438 * EPSLON(REAL x) estimates unit roundoff in
439 * quantities of size x.
440 */
441static REAL
442EPSLON(REAL x)
443{
444	REAL a;
445	REAL abs;
446	REAL b;
447	REAL c;
448	REAL eps;
449
450	a = 4.0e0 / 3.0e0;
451	eps = LP_ZERO;
452
453	while (eps == LP_ZERO) {
454		b = a - LP_ONE;
455		c = b + b + b;
456		eps = (REAL)fabs((double)(c - LP_ONE));
457	}
458
459	abs = (REAL)fabs((double)x);
460
461	return (eps * abs);
462}
463
464/*
465 * MXPY(int n1, REAL y[], int n2, int ldm, REAL x[], REAL m[])
466 * multiplies matrix m times vector x and add the result to
467 * vector y.
468 */
469static void
470MXPY(int n1, REAL y[], int n2, int ldm, REAL x[], REAL m[])
471{
472	int i;
473	int j;
474	int jmin;
475
476	/* cleanup odd vector */
477	j = n2 % 2;
478	if (j >= 1) {
479		j = j - 1;
480		for (i = 0; i < n1; i++)
481			y[i] = (y[i]) + x[j] * m[ldm * j + i];
482	}
483
484	/* cleanup odd group of two vectors */
485	j = n2 % 4;
486	if (j >= 2) {
487		j = j - 1;
488		for (i = 0; i < n1; i++)
489			y[i] = ((y[i])
490			    + x[j - 1] * m[ldm * (j - 1) + i])
491			    + x[j] * m[ldm * j + i];
492	}
493
494	/* cleanup odd group of four vectors */
495	j = n2 % 8;
496	if (j >= 4) {
497		j = j - 1;
498		for (i = 0; i < n1; i++)
499			y[i] = ((((y[i])
500			    + x[j - 3] * m[ldm * (j - 3) + i])
501			    + x[j - 2] * m[ldm * (j - 2) + i])
502			    + x[j - 1] * m[ldm * (j - 1) + i])
503			    + x[j] * m[ldm * j + i];
504	}
505
506	/* cleanup odd group of eight vectors */
507	j = n2 % 16;
508	if (j >= 8) {
509		j = j - 1;
510		for (i = 0; i < n1; i++)
511			y[i] = ((((((((y[i])
512			    + x[j - 7] * m[ldm * (j - 7) + i])
513			    + x[j - 6] * m[ldm * (j - 6) + i])
514			    + x[j - 5] * m[ldm * (j - 5) + i])
515			    + x[j - 4] * m[ldm * (j - 4) + i])
516			    + x[j - 3] * m[ldm * (j - 3) + i])
517			    + x[j - 2] * m[ldm * (j - 2) + i])
518			    + x[j - 1] * m[ldm * (j - 1) + i])
519			    + x[j] * m[ldm * j + i];
520	}
521
522	/* main loop - groups of sixteen vectors */
523	jmin = (n2 % 16) + 16;
524	for (j = jmin - 1; j < n2; j = j + 16) {
525		for (i = 0; i < n1; i++)
526			y[i] = ((((((((((((((((y[i])
527			    + x[j - 15] * m[ldm * (j - 15) + i])
528			    + x[j - 14] * m[ldm * (j - 14) + i])
529			    + x[j - 13] * m[ldm * (j - 13) + i])
530			    + x[j - 12] * m[ldm * (j - 12) + i])
531			    + x[j - 11] * m[ldm * (j - 11) + i])
532			    + x[j - 10] * m[ldm * (j - 10) + i])
533			    + x[j - 9] * m[ldm * (j - 9) + i])
534			    + x[j - 8] * m[ldm * (j - 8) + i])
535			    + x[j - 7] * m[ldm * (j - 7) + i])
536			    + x[j - 6] * m[ldm * (j - 6) + i])
537			    + x[j - 5] * m[ldm * (j - 5) + i])
538			    + x[j - 4] * m[ldm * (j - 4) + i])
539			    + x[j - 3] * m[ldm * (j - 3) + i])
540			    + x[j - 2] * m[ldm * (j - 2) + i])
541			    + x[j - 1] * m[ldm * (j - 1) + i])
542			    + x[j] * m[ldm * j + i];
543	}
544}
545