linpack.c revision 6651:beb4b5f48c82
1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22/* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27#pragma ident "%Z%%M% %I% %E% SMI" 28 29#include <errno.h> 30#include <stdio.h> 31#include <stdlib.h> 32#include <sys/time.h> 33#include <unistd.h> 34#include <externs.h> 35#include <fp.h> 36#include <fps_ereport.h> 37#include <fpstestmsg.h> 38#include <linpack.h> 39#include <sunperf.h> 40 41double fabs(double x); 42 43extern void ___pl_dss_set_chip_cache_(int *cache_size); 44static double dran(int iseed[4]); 45static int LINSUB(REAL * residn, REAL * resid, 46 REAL * eps, REAL * x11, REAL * xn1, int fps_verbose_msg); 47static int MATGEN(REAL a[], int lda, int n, REAL b[], REAL * norma); 48static REAL EPSLON(REAL x); 49static void MXPY(int n1, REAL y[], int n2, int ldm, REAL x[], REAL m[]); 50 51extern int errno; 52static int LAPACK_ECACHE_SIZE = 8 * 1024 * 1024; 53static int MAT_SIZE; 54 55/* 56 * LINPACK(int Stress, int unit, struct fps_test_ereport *report, 57 * int fps_verbose_msg) 58 * performs the single and double precision lapack test. If an 59 * error is found, relevant data is collected and stored in report. 60 */ 61int 62LINPACK(int Stress, int unit, struct fps_test_ereport *report, 63 int fps_verbose_msg) 64{ 65 char err_data[MAX_INFO_SIZE]; 66 char l_buf[64]; 67 int c_index; 68 int ret; 69 REAL eps; 70 REAL resid; 71 REAL residn; 72 REAL x11; 73 REAL xn1; 74 REAL EPS; 75 REAL RESID; 76 REAL RESIDN; 77 REAL X11; 78 REAL XN1; 79 uint64_t expected[5]; 80 uint64_t observed[5]; 81 82#ifdef FPS_LAPA_UNK 83#ifndef DP 84 if (Stress > 1000) 85 return (0); 86#endif /* DP */ 87#endif /* FPS_LAPA_UNK */ 88 89 if (Stress > 10000) 90 return (0); 91 92 /* 93 * make sure is no dependency on the E$ size Without this call the 94 * computed results will depend on the size of the E$ ( 95 * sos10/libsunperf ) IIIi computed results != IV+/IV/III+/III ... 96 */ 97 ___pl_dss_set_chip_cache_(&LAPACK_ECACHE_SIZE); 98 99 c_index = Stress; 100 101 if (2000 == c_index) 102 c_index = 1001; 103 if (3000 == c_index) 104 c_index = 1002; 105 if (4016 == c_index) 106 c_index = 1003; 107 if (5000 == c_index) 108 c_index = 1004; 109 if (6000 == c_index) 110 c_index = 1005; 111 if (7016 == c_index) 112 c_index = 1006; 113 if (8034 == c_index) 114 c_index = 1007; 115 if (9000 == c_index) 116 c_index = 1008; 117 if (10000 == c_index) 118 c_index = 1009; 119 120 (void) snprintf(l_buf, 63, "%s(%d,cpu=%d)", PREC, Stress, unit); 121 fps_msg(fps_verbose_msg, gettext(FPSM_02), l_buf, unit); 122 123 MAT_SIZE = Stress; 124 ret = LINSUB(&residn, &resid, &eps, &x11, &xn1, fps_verbose_msg); 125 126 if (2 == ret) { 127 if (errno == EAGAIN || errno == ENOMEM) 128 _exit(FPU_SYSCALL_TRYAGAIN); 129 else 130 _exit(FPU_SYSCALL_FAIL); 131 } 132 133#ifdef FPS_LAPA_UNK 134 RESIDN = RESID = X11 = XN1 = 0.0000000000000000e+00; 135 136#ifdef DP 137 EPS = 2.2204460492503131e-16; 138#else /* DP */ 139 EPS = 1.1920928955078125e-07; 140#endif /* DP */ 141 142#else /* FPS_LAPA_UNK */ 143 144 RESIDN = LinpValsA[c_index].residn; 145 RESID = LinpValsA[c_index].resid; 146 EPS = LinpValsA[c_index].eps; 147 X11 = LinpValsA[c_index].x11; 148 XN1 = LinpValsA[c_index].xn1; 149 150#endif /* FPS_LAPA_UNK */ 151 152 if ((residn == RESIDN) && (resid == RESID) && (eps == EPS) && 153 (x11 == X11) && (xn1 == XN1)) { 154 155 return (0); 156 } else { 157 snprintf(err_data, sizeof (err_data), 158 "\nExpected: %.16e, %.16e, %.16e, %.16e, %.16e" 159 "\nObserved: %.16e, %.16e, %.16e, %.16e, %.16e", 160 RESIDN, RESID, EPS, X11, XN1, residn, resid, eps, x11, xn1); 161 162 163#ifdef DP 164 observed[0] = *(uint64_t *)&residn; 165 observed[1] = *(uint64_t *)&resid; 166 observed[2] = *(uint64_t *)&eps; 167 observed[3] = *(uint64_t *)&x11; 168 observed[4] = *(uint64_t *)&xn1; 169 expected[0] = *(uint64_t *)&RESIDN; 170 expected[1] = *(uint64_t *)&RESID; 171 expected[2] = *(uint64_t *)&EPS; 172 expected[3] = *(uint64_t *)&X11; 173 expected[4] = *(uint64_t *)&XN1; 174 175 setup_fps_test_struct(IS_EREPORT_INFO, report, 176 6317, &observed, &expected, 5, 5, err_data); 177#else 178 observed[0] = (uint64_t)(*(uint32_t *)&residn); 179 observed[1] = (uint64_t)(*(uint32_t *)&resid); 180 observed[2] = (uint64_t)(*(uint32_t *)&eps); 181 observed[3] = (uint64_t)(*(uint32_t *)&x11); 182 observed[4] = (uint64_t)(*(uint32_t *)&xn1); 183 expected[0] = (uint64_t)(*(uint32_t *)&RESIDN); 184 expected[1] = (uint64_t)(*(uint32_t *)&RESID); 185 expected[2] = (uint64_t)(*(uint32_t *)&EPS); 186 expected[3] = (uint64_t)(*(uint32_t *)&X11); 187 expected[4] = (uint64_t)(*(uint32_t *)&XN1); 188 189 setup_fps_test_struct(IS_EREPORT_INFO, report, 190 6316, &observed, &expected, 5, 5, err_data); 191#endif 192 193 return (-1); 194 } 195} 196 197/* 198 * LINSUB(REAL *residn, REAL *resid, REAL *eps, 199 * REAL *x11, REAL *xn1, int fps_verbose_msg)begins 200 * the lapack calculation calls. 201 */ 202static int 203LINSUB(REAL *residn, REAL *resid, 204 REAL *eps, REAL *x11, REAL *xn1, 205 int fps_verbose_msg) 206{ 207 int i; 208 int lda; 209 int n; 210 int nr_malloc; 211 REAL *a; 212 REAL abs; 213 REAL *b; 214 REAL norma; 215 REAL normx; 216 REAL *x; 217 struct timeval timeout; 218 long info; 219 long *ipvt; 220 221 timeout.tv_sec = 0; 222 timeout.tv_usec = 10000; /* microseconds, 10ms */ 223 nr_malloc = 0; 224 225mallocAgain: 226 227 a = (REAL *) malloc((MAT_SIZE + 8) * (MAT_SIZE + 1) * 228 (size_t)sizeof (REAL)); 229 b = (REAL *) malloc(MAT_SIZE * (size_t)sizeof (REAL)); 230 x = (REAL *) malloc(MAT_SIZE * (size_t)sizeof (REAL)); 231 232 ipvt = (long *)malloc(MAT_SIZE * (size_t)sizeof (long)); 233 234 if ((NULL == a) || (NULL == b) || 235 (NULL == x) || (NULL == ipvt)) { 236 if (NULL != a) 237 free(a); 238 if (NULL != b) 239 free(b); 240 if (NULL != x) 241 free(x); 242 if (NULL != ipvt) 243 free(ipvt); 244 245 /* sleep 10 ms. wait for 100 ms */ 246 if (nr_malloc++ < 11) { 247 (void) select(1, NULL, NULL, NULL, &timeout); 248 goto mallocAgain; 249 } 250 fps_msg(fps_verbose_msg, 251 "Malloc failed in lapack, matrix size %d", 252 MAT_SIZE); 253 254 return (2); 255 } 256 lda = MAT_SIZE + 8; 257 n = MAT_SIZE; 258 259 (void) MATGEN(a, lda, n, b, &norma); 260 GEFA(n, n, a, lda, ipvt, &info); 261 GESL('N', n, 1, a, lda, ipvt, b, n, &info); 262 free(ipvt); 263 264 for (i = 0; i < n; i++) { 265 x[i] = b[i]; 266 } 267 268 (void) MATGEN((REAL *) a, lda, n, b, &norma); 269 270 for (i = 0; i < n; i++) { 271 b[i] = -b[i]; 272 } 273 274 MXPY(n, b, n, lda, x, (REAL *) a); 275 free(a); 276 277 *resid = 0.0; 278 normx = 0.0; 279 280 for (i = 0; i < n; i++) { 281 abs = (REAL)fabs((double)b[i]); 282 *resid = (*resid > abs) ? *resid : abs; 283 abs = (REAL)fabs((double)x[i]); 284 normx = (normx > abs) ? normx : abs; 285 } 286 287 free(b); 288 289 *eps = EPSLON((REAL) LP_ONE); 290 291 *residn = *resid / (n * norma * normx * (*eps)); 292 293 *x11 = x[0] - 1; 294 *xn1 = x[n - 1] - 1; 295 296 free(x); 297 298 return (0); 299} 300 301/* 302 * dran(int iseed[4]) returns a random real number from a 303 * uniform (0,1) distribution. 304 */ 305static double 306dran(int iseed[4]) 307{ 308 double r; 309 double value; 310 int ipw2; 311 int it1; 312 int it2; 313 int it3; 314 int it4; 315 int m1; 316 int m2; 317 int m3; 318 int m4; 319 320 /* Set constants */ 321 m1 = 494; 322 m2 = 322; 323 m3 = 2508; 324 m4 = 2549; 325 ipw2 = 4096; 326 r = 1.0 / ipw2; 327 328 /* multiply the seed by the multiplier modulo 2**48 */ 329 it4 = iseed[3] * m4; 330 it3 = it4 / ipw2; 331 it4 = it4 - ipw2 * it3; 332 it3 = it3 + iseed[2] * m4 + iseed[3] * m3; 333 it2 = it3 / ipw2; 334 it3 = it3 - ipw2 * it2; 335 it2 = it2 + iseed[1] * m4 + iseed[2] * m3 + iseed[3] * m2; 336 it1 = it2 / ipw2; 337 it2 = it2 - ipw2 * it1; 338 it1 = it1 + iseed[0] * m4 + iseed[1] * m3 + iseed[2] * m2 + 339 iseed[3] * m1; 340 it1 = it1 % ipw2; 341 342 /* return updated seed */ 343 iseed[0] = it1; 344 iseed[1] = it2; 345 iseed[2] = it3; 346 iseed[3] = it4; 347 348 /* convert 48-bit integer to a real number in the interval (0,1) */ 349 value = r * ((double)it1 + r * ((double)it2 + r * ((double)it3 + 350 r * ((double)it4)))); 351 352 return (value); 353} 354 355/* 356 * MATGEN(REAL a[], int lda, int n, REAL b[], REAL *norma) 357 * generates matrix a and b. 358 */ 359 360#define ALPHA 1.68750 361static int 362MATGEN(REAL a[], int lda, int n, REAL b[], REAL *norma) 363{ 364 int i; 365 int init[4]; 366 int j; 367 REAL value; 368 369 init[0] = 1; 370 init[1] = 2; 371 init[2] = 3; 372 init[3] = 1325; 373 *norma = LP_ZERO; 374 for (j = 0; j < n; j++) { 375 for (i = 0; i < n; i++) { 376#ifdef FPS_LAPA_UNK 377 a[lda*j+i] = 378 (i < j) ? (double)(i+1) : (double)(j+ALPHA); 379 if (fabs(a[lda*j+i]) > *norma) 380 *norma = fabs(a[lda*j+i]); 381 } /* i */ 382#else 383 value = (REAL) dran(init) - 0.5; 384 a[lda * j + i] = value; 385 value = fabs(value); 386 if (value > *norma) { 387 *norma = value; 388 } 389 } /* i */ 390#endif /* FPS_LAPA_UNK */ 391 } /* j */ 392 393 394 for (i = 0; i < n; i++) { 395 b[i] = LP_ZERO; 396 } 397 for (j = 0; j < n; j++) { 398 for (i = 0; i < n; i++) { 399 b[i] = b[i] + a[lda * j + i]; 400 } 401 } 402 403 return (0); 404} 405 406/* 407 * IAMAX(int n, REAL dx[])finds the index of element 408 * having maximum absolute value. 409 */ 410int 411IAMAX(int n, REAL dx[]) 412{ 413 double abs; 414 double dmax; 415 int i; 416 int itemp; 417 418 if (n < 1) 419 return (-1); 420 if (n == 1) 421 return (0); 422 423 itemp = 0; 424 dmax = fabs((double)dx[0]); 425 426 for (i = 1; i < n; i++) { 427 abs = fabs((double)dx[i]); 428 if (abs > dmax) { 429 itemp = i; 430 dmax = abs; 431 } 432 } 433 434 return (itemp); 435} 436 437/* 438 * EPSLON(REAL x) estimates unit roundoff in 439 * quantities of size x. 440 */ 441static REAL 442EPSLON(REAL x) 443{ 444 REAL a; 445 REAL abs; 446 REAL b; 447 REAL c; 448 REAL eps; 449 450 a = 4.0e0 / 3.0e0; 451 eps = LP_ZERO; 452 453 while (eps == LP_ZERO) { 454 b = a - LP_ONE; 455 c = b + b + b; 456 eps = (REAL)fabs((double)(c - LP_ONE)); 457 } 458 459 abs = (REAL)fabs((double)x); 460 461 return (eps * abs); 462} 463 464/* 465 * MXPY(int n1, REAL y[], int n2, int ldm, REAL x[], REAL m[]) 466 * multiplies matrix m times vector x and add the result to 467 * vector y. 468 */ 469static void 470MXPY(int n1, REAL y[], int n2, int ldm, REAL x[], REAL m[]) 471{ 472 int i; 473 int j; 474 int jmin; 475 476 /* cleanup odd vector */ 477 j = n2 % 2; 478 if (j >= 1) { 479 j = j - 1; 480 for (i = 0; i < n1; i++) 481 y[i] = (y[i]) + x[j] * m[ldm * j + i]; 482 } 483 484 /* cleanup odd group of two vectors */ 485 j = n2 % 4; 486 if (j >= 2) { 487 j = j - 1; 488 for (i = 0; i < n1; i++) 489 y[i] = ((y[i]) 490 + x[j - 1] * m[ldm * (j - 1) + i]) 491 + x[j] * m[ldm * j + i]; 492 } 493 494 /* cleanup odd group of four vectors */ 495 j = n2 % 8; 496 if (j >= 4) { 497 j = j - 1; 498 for (i = 0; i < n1; i++) 499 y[i] = ((((y[i]) 500 + x[j - 3] * m[ldm * (j - 3) + i]) 501 + x[j - 2] * m[ldm * (j - 2) + i]) 502 + x[j - 1] * m[ldm * (j - 1) + i]) 503 + x[j] * m[ldm * j + i]; 504 } 505 506 /* cleanup odd group of eight vectors */ 507 j = n2 % 16; 508 if (j >= 8) { 509 j = j - 1; 510 for (i = 0; i < n1; i++) 511 y[i] = ((((((((y[i]) 512 + x[j - 7] * m[ldm * (j - 7) + i]) 513 + x[j - 6] * m[ldm * (j - 6) + i]) 514 + x[j - 5] * m[ldm * (j - 5) + i]) 515 + x[j - 4] * m[ldm * (j - 4) + i]) 516 + x[j - 3] * m[ldm * (j - 3) + i]) 517 + x[j - 2] * m[ldm * (j - 2) + i]) 518 + x[j - 1] * m[ldm * (j - 1) + i]) 519 + x[j] * m[ldm * j + i]; 520 } 521 522 /* main loop - groups of sixteen vectors */ 523 jmin = (n2 % 16) + 16; 524 for (j = jmin - 1; j < n2; j = j + 16) { 525 for (i = 0; i < n1; i++) 526 y[i] = ((((((((((((((((y[i]) 527 + x[j - 15] * m[ldm * (j - 15) + i]) 528 + x[j - 14] * m[ldm * (j - 14) + i]) 529 + x[j - 13] * m[ldm * (j - 13) + i]) 530 + x[j - 12] * m[ldm * (j - 12) + i]) 531 + x[j - 11] * m[ldm * (j - 11) + i]) 532 + x[j - 10] * m[ldm * (j - 10) + i]) 533 + x[j - 9] * m[ldm * (j - 9) + i]) 534 + x[j - 8] * m[ldm * (j - 8) + i]) 535 + x[j - 7] * m[ldm * (j - 7) + i]) 536 + x[j - 6] * m[ldm * (j - 6) + i]) 537 + x[j - 5] * m[ldm * (j - 5) + i]) 538 + x[j - 4] * m[ldm * (j - 4) + i]) 539 + x[j - 3] * m[ldm * (j - 3) + i]) 540 + x[j - 2] * m[ldm * (j - 2) + i]) 541 + x[j - 1] * m[ldm * (j - 1) + i]) 542 + x[j] * m[ldm * j + i]; 543 } 544} 545