/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #pragma ident "%Z%%M% %I% %E% SMI" #include #include #include #include #include #include #include #include #include #include #include double fabs(double x); extern void ___pl_dss_set_chip_cache_(int *cache_size); static double dran(int iseed[4]); static int LINSUB(REAL * residn, REAL * resid, REAL * eps, REAL * x11, REAL * xn1, int fps_verbose_msg); static int MATGEN(REAL a[], int lda, int n, REAL b[], REAL * norma); static REAL EPSLON(REAL x); static void MXPY(int n1, REAL y[], int n2, int ldm, REAL x[], REAL m[]); extern int errno; static int MAT_SIZE; #ifndef __lint static int LAPACK_ECACHE_SIZE = 8 * 1024 * 1024; #endif /* * LINPACK(int Stress, int unit, struct fps_test_ereport *report, * int fps_verbose_msg) * performs the single and double precision lapack test. If an * error is found, relevant data is collected and stored in report. */ int LINPACK(int Stress, int unit, struct fps_test_ereport *report, int fps_verbose_msg) { char err_data[MAX_INFO_SIZE]; char l_buf[64]; int c_index; int ret; REAL eps; REAL resid; REAL residn; REAL x11; REAL xn1; REAL EPS; REAL RESID; REAL RESIDN; REAL X11; REAL XN1; uint64_t expected[5]; uint64_t observed[5]; #ifdef FPS_LAPA_UNK #ifndef DP if (Stress > 1000) return (0); #endif /* DP */ #endif /* FPS_LAPA_UNK */ if (Stress > 10000) return (0); /* * make sure is no dependency on the E$ size Without this call the * computed results will depend on the size of the E$ ( * sos10/libsunperf ) IIIi computed results != IV+/IV/III+/III ... */ #ifndef __lint ___pl_dss_set_chip_cache_(&LAPACK_ECACHE_SIZE); #endif c_index = Stress; if (2000 == c_index) c_index = 1001; if (3000 == c_index) c_index = 1002; if (4016 == c_index) c_index = 1003; if (5000 == c_index) c_index = 1004; if (6000 == c_index) c_index = 1005; if (7016 == c_index) c_index = 1006; if (8034 == c_index) c_index = 1007; if (9000 == c_index) c_index = 1008; if (10000 == c_index) c_index = 1009; (void) snprintf(l_buf, 63, "%s(%d,cpu=%d)", PREC, Stress, unit); fps_msg(fps_verbose_msg, gettext(FPSM_02), l_buf, unit); MAT_SIZE = Stress; ret = LINSUB(&residn, &resid, &eps, &x11, &xn1, fps_verbose_msg); if (2 == ret) { if (errno == EAGAIN || errno == ENOMEM) _exit(FPU_SYSCALL_TRYAGAIN); else _exit(FPU_SYSCALL_FAIL); } #ifdef FPS_LAPA_UNK RESIDN = RESID = X11 = XN1 = 0.0000000000000000e+00; #ifdef DP EPS = 2.2204460492503131e-16; #else /* DP */ EPS = 1.1920928955078125e-07; #endif /* DP */ #else /* FPS_LAPA_UNK */ RESIDN = LinpValsA[c_index].residn; RESID = LinpValsA[c_index].resid; EPS = LinpValsA[c_index].eps; X11 = LinpValsA[c_index].x11; XN1 = LinpValsA[c_index].xn1; #endif /* FPS_LAPA_UNK */ if ((residn == RESIDN) && (resid == RESID) && (eps == EPS) && (x11 == X11) && (xn1 == XN1)) { return (0); } else { (void) snprintf(err_data, sizeof (err_data), "\nExpected: %.16e, %.16e, %.16e, %.16e, %.16e" "\nObserved: %.16e, %.16e, %.16e, %.16e, %.16e", RESIDN, RESID, EPS, X11, XN1, residn, resid, eps, x11, xn1); #ifdef DP observed[0] = *(uint64_t *)&residn; observed[1] = *(uint64_t *)&resid; observed[2] = *(uint64_t *)&eps; observed[3] = *(uint64_t *)&x11; observed[4] = *(uint64_t *)&xn1; expected[0] = *(uint64_t *)&RESIDN; expected[1] = *(uint64_t *)&RESID; expected[2] = *(uint64_t *)&EPS; expected[3] = *(uint64_t *)&X11; expected[4] = *(uint64_t *)&XN1; setup_fps_test_struct(IS_EREPORT_INFO, report, 6317, &observed, &expected, 5, 5, err_data); #else observed[0] = (uint64_t)(*(uint32_t *)&residn); observed[1] = (uint64_t)(*(uint32_t *)&resid); observed[2] = (uint64_t)(*(uint32_t *)&eps); observed[3] = (uint64_t)(*(uint32_t *)&x11); observed[4] = (uint64_t)(*(uint32_t *)&xn1); expected[0] = (uint64_t)(*(uint32_t *)&RESIDN); expected[1] = (uint64_t)(*(uint32_t *)&RESID); expected[2] = (uint64_t)(*(uint32_t *)&EPS); expected[3] = (uint64_t)(*(uint32_t *)&X11); expected[4] = (uint64_t)(*(uint32_t *)&XN1); setup_fps_test_struct(IS_EREPORT_INFO, report, 6316, &observed, &expected, 5, 5, err_data); #endif return (-1); } } /* * LINSUB(REAL *residn, REAL *resid, REAL *eps, * REAL *x11, REAL *xn1, int fps_verbose_msg)begins * the lapack calculation calls. */ static int LINSUB(REAL *residn, REAL *resid, REAL *eps, REAL *x11, REAL *xn1, int fps_verbose_msg) { int i; int lda; int n; int nr_malloc; REAL *a; REAL abs; REAL *b; REAL norma; REAL normx; REAL *x; struct timeval timeout; long *ipvt; #ifndef __lint long info; #endif timeout.tv_sec = 0; timeout.tv_usec = 10000; /* microseconds, 10ms */ nr_malloc = 0; mallocAgain: a = (REAL *) malloc((MAT_SIZE + 8) * (MAT_SIZE + 1) * (size_t)sizeof (REAL)); b = (REAL *) malloc(MAT_SIZE * (size_t)sizeof (REAL)); x = (REAL *) malloc(MAT_SIZE * (size_t)sizeof (REAL)); ipvt = (long *)malloc(MAT_SIZE * (size_t)sizeof (long)); if ((NULL == a) || (NULL == b) || (NULL == x) || (NULL == ipvt)) { if (NULL != a) free(a); if (NULL != b) free(b); if (NULL != x) free(x); if (NULL != ipvt) free(ipvt); /* sleep 10 ms. wait for 100 ms */ if (nr_malloc++ < 11) { (void) select(1, NULL, NULL, NULL, &timeout); goto mallocAgain; } fps_msg(fps_verbose_msg, "Malloc failed in lapack, matrix size %d", MAT_SIZE); return (2); } lda = MAT_SIZE + 8; n = MAT_SIZE; (void) MATGEN(a, lda, n, b, &norma); #ifndef __lint GEFA(n, n, a, lda, ipvt, &info); GESL('N', n, 1, a, lda, ipvt, b, n, &info); #endif free(ipvt); for (i = 0; i < n; i++) { x[i] = b[i]; } (void) MATGEN((REAL *) a, lda, n, b, &norma); for (i = 0; i < n; i++) { b[i] = -b[i]; } MXPY(n, b, n, lda, x, (REAL *) a); free(a); *resid = 0.0; normx = 0.0; for (i = 0; i < n; i++) { abs = (REAL)fabs((double)b[i]); *resid = (*resid > abs) ? *resid : abs; abs = (REAL)fabs((double)x[i]); normx = (normx > abs) ? normx : abs; } free(b); *eps = EPSLON((REAL) LP_ONE); *residn = *resid / (n * norma * normx * (*eps)); *x11 = x[0] - 1; *xn1 = x[n - 1] - 1; free(x); return (0); } /* * dran(int iseed[4]) returns a random real number from a * uniform (0,1) distribution. */ static double dran(int iseed[4]) { double r; double value; int ipw2; int it1; int it2; int it3; int it4; int m1; int m2; int m3; int m4; /* Set constants */ m1 = 494; m2 = 322; m3 = 2508; m4 = 2549; ipw2 = 4096; r = 1.0 / ipw2; /* multiply the seed by the multiplier modulo 2**48 */ it4 = iseed[3] * m4; it3 = it4 / ipw2; it4 = it4 - ipw2 * it3; it3 = it3 + iseed[2] * m4 + iseed[3] * m3; it2 = it3 / ipw2; it3 = it3 - ipw2 * it2; it2 = it2 + iseed[1] * m4 + iseed[2] * m3 + iseed[3] * m2; it1 = it2 / ipw2; it2 = it2 - ipw2 * it1; it1 = it1 + iseed[0] * m4 + iseed[1] * m3 + iseed[2] * m2 + iseed[3] * m1; it1 = it1 % ipw2; /* return updated seed */ iseed[0] = it1; iseed[1] = it2; iseed[2] = it3; iseed[3] = it4; /* convert 48-bit integer to a real number in the interval (0,1) */ value = r * ((double)it1 + r * ((double)it2 + r * ((double)it3 + r * ((double)it4)))); return (value); } /* * MATGEN(REAL a[], int lda, int n, REAL b[], REAL *norma) * generates matrix a and b. */ #define ALPHA 1.68750 static int MATGEN(REAL a[], int lda, int n, REAL b[], REAL *norma) { int i; int init[4]; int j; REAL value; init[0] = 1; init[1] = 2; init[2] = 3; init[3] = 1325; *norma = LP_ZERO; for (j = 0; j < n; j++) { for (i = 0; i < n; i++) { #ifdef FPS_LAPA_UNK a[lda*j+i] = (i < j) ? (double)(i+1) : (double)(j+ALPHA); if (fabs(a[lda*j+i]) > *norma) *norma = fabs(a[lda*j+i]); } /* i */ #else value = (REAL) dran(init) - 0.5; a[lda * j + i] = value; value = fabs(value); if (value > *norma) { *norma = value; } } /* i */ #endif /* FPS_LAPA_UNK */ } /* j */ for (i = 0; i < n; i++) { b[i] = LP_ZERO; } for (j = 0; j < n; j++) { for (i = 0; i < n; i++) { b[i] = b[i] + a[lda * j + i]; } } return (0); } /* * EPSLON(REAL x) estimates unit roundoff in * quantities of size x. */ static REAL EPSLON(REAL x) { REAL a; REAL abs; REAL b; REAL c; REAL eps; a = 4.0e0 / 3.0e0; eps = LP_ZERO; while (eps == LP_ZERO) { b = a - LP_ONE; c = b + b + b; eps = (REAL)fabs((double)(c - LP_ONE)); } abs = (REAL)fabs((double)x); return (eps * abs); } /* * MXPY(int n1, REAL y[], int n2, int ldm, REAL x[], REAL m[]) * multiplies matrix m times vector x and add the result to * vector y. */ static void MXPY(int n1, REAL y[], int n2, int ldm, REAL x[], REAL m[]) { int i; int j; int jmin; /* cleanup odd vector */ j = n2 % 2; if (j >= 1) { j = j - 1; for (i = 0; i < n1; i++) y[i] = (y[i]) + x[j] * m[ldm * j + i]; } /* cleanup odd group of two vectors */ j = n2 % 4; if (j >= 2) { j = j - 1; for (i = 0; i < n1; i++) y[i] = ((y[i]) + x[j - 1] * m[ldm * (j - 1) + i]) + x[j] * m[ldm * j + i]; } /* cleanup odd group of four vectors */ j = n2 % 8; if (j >= 4) { j = j - 1; for (i = 0; i < n1; i++) y[i] = ((((y[i]) + x[j - 3] * m[ldm * (j - 3) + i]) + x[j - 2] * m[ldm * (j - 2) + i]) + x[j - 1] * m[ldm * (j - 1) + i]) + x[j] * m[ldm * j + i]; } /* cleanup odd group of eight vectors */ j = n2 % 16; if (j >= 8) { j = j - 1; for (i = 0; i < n1; i++) y[i] = ((((((((y[i]) + x[j - 7] * m[ldm * (j - 7) + i]) + x[j - 6] * m[ldm * (j - 6) + i]) + x[j - 5] * m[ldm * (j - 5) + i]) + x[j - 4] * m[ldm * (j - 4) + i]) + x[j - 3] * m[ldm * (j - 3) + i]) + x[j - 2] * m[ldm * (j - 2) + i]) + x[j - 1] * m[ldm * (j - 1) + i]) + x[j] * m[ldm * j + i]; } /* main loop - groups of sixteen vectors */ jmin = (n2 % 16) + 16; for (j = jmin - 1; j < n2; j = j + 16) { for (i = 0; i < n1; i++) y[i] = ((((((((((((((((y[i]) + x[j - 15] * m[ldm * (j - 15) + i]) + x[j - 14] * m[ldm * (j - 14) + i]) + x[j - 13] * m[ldm * (j - 13) + i]) + x[j - 12] * m[ldm * (j - 12) + i]) + x[j - 11] * m[ldm * (j - 11) + i]) + x[j - 10] * m[ldm * (j - 10) + i]) + x[j - 9] * m[ldm * (j - 9) + i]) + x[j - 8] * m[ldm * (j - 8) + i]) + x[j - 7] * m[ldm * (j - 7) + i]) + x[j - 6] * m[ldm * (j - 6) + i]) + x[j - 5] * m[ldm * (j - 5) + i]) + x[j - 4] * m[ldm * (j - 4) + i]) + x[j - 3] * m[ldm * (j - 3) + i]) + x[j - 2] * m[ldm * (j - 2) + i]) + x[j - 1] * m[ldm * (j - 1) + i]) + x[j] * m[ldm * j + i]; } }