1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms 5 * of the Common Development and Distribution License 6 * (the "License"). You may not use this file except 7 * in compliance with the License. 8 * 9 * You can obtain a copy of the license at 10 * src/OPENSOLARIS.LICENSE 11 * or http://www.opensolaris.org/os/licensing. 12 * See the License for the specific language governing 13 * permissions and limitations under the License. 14 * 15 * When distributing Covered Code, include this CDDL 16 * HEADER in each file and include the License file at 17 * usr/src/OPENSOLARIS.LICENSE. If applicable, 18 * add the following below this CDDL HEADER, with the 19 * fields enclosed by brackets "[]" replaced with your 20 * own identifying information: Portions Copyright [yyyy] 21 * [name of copyright owner] 22 * 23 * CDDL HEADER END 24 */ 25 26/* 27 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 28 * Use is subject to license terms. 29 */ 30 31/* 32 * routine to benchmark cache-to-cache transfer times... uses 33 * solaris features to find and bind to cpus in the current 34 * processor set, so not likely to work elsewhere. 35 */ 36 37 38#include <unistd.h> 39#include <stdlib.h> 40#include <stdio.h> 41#include <fcntl.h> 42#include <string.h> 43#include <sys/processor.h> 44#include <sys/types.h> 45#include <stdio.h> 46#include <errno.h> 47#include <sys/pset.h> 48 49#include "libmicro.h" 50 51static long opts = 1024*512; 52 53typedef struct { 54 long **ts_data; 55 long ts_result; 56 pthread_mutex_t ts_lock; 57} tsd_t; 58 59static unsigned int ncpu = 1024; 60 61static tsd_t *thread_data[1024]; 62static processorid_t cpus[1024]; 63 64int traverse_ptrchain(long **, int, int); 65 66int 67benchmark_init() 68{ 69 lm_tsdsize = sizeof (tsd_t); 70 71 (void) sprintf(lm_optstr, "s:"); 72 73 (void) sprintf(lm_usage, 74 " [-s size] size of access area in bytes" 75 " (default %ld)\n" 76 "notes: measures cache to cache transfer times on Solaris\n", 77 opts); 78 79 (void) sprintf(lm_header, "%8s", "size"); 80 81 return (0); 82} 83 84int 85benchmark_optswitch(int opt, char *optarg) 86{ 87 switch (opt) { 88 case 's': 89 opts = sizetoint(optarg); 90 break; 91 default: 92 return (-1); 93 } 94 95 return (0); 96} 97 98int 99benchmark_initrun() 100{ 101 if (pset_info(PS_MYID, NULL, &ncpu, cpus) < 0) { 102 perror("pset_info"); 103 return (1); 104 } 105 106 return (0); 107} 108 109int 110benchmark_initworker(void *tsd) 111{ 112 tsd_t *ts = (tsd_t *)tsd; 113 int i, j; 114 processorid_t cpu; 115 116 ts->ts_data = malloc(opts); 117 118 if (ts->ts_data == NULL) { 119 return (1); 120 } 121 122 (void) pthread_mutex_init(&ts->ts_lock, NULL); 123 124 125 if (processor_bind(P_LWPID, P_MYID, 126 cpu = cpus[(pthread_self() - 1) % ncpu], 127 NULL) < 0) { 128 perror("processor_bind:"); 129 return (1); 130 } 131 132 (void) printf("# thread %d using processor %d\n", pthread_self(), cpu); 133 134 /* 135 * use lmbench style backwards stride 136 */ 137 138 for (i = 0; i < opts / sizeof (long); i++) { 139 j = i - 128; 140 if (j < 0) 141 j = j + opts / sizeof (long); 142 ts->ts_data[i] = (long *)&(ts->ts_data[j]); 143 } 144 145 thread_data[pthread_self() - 1] = ts; 146 147 return (0); 148} 149 150/* 151 * here we go in order for each thread, causing inherent serialization 152 * this is normally not a good idea, but in this case we're trying to 153 * measure cache-to-cache transfer times, and if we run threads in 154 * parallel we're likely to see saturation effects rather than cache-to-cache, 155 * esp. on wimpy memory platforms like P4. 156 */ 157 158 159/*ARGSUSED*/ 160int 161benchmark(void *tsd, result_t *res) 162{ 163 tsd_t *ts; 164 int i, j; 165 int count = opts / 128 / sizeof (long); 166 167 for (j = 0; j < lm_optB; j++) 168 for (i = 0; i < lm_optT; i++) { 169 ts = thread_data[i]; 170 (void) pthread_mutex_lock(&ts->ts_lock); 171 ts->ts_result += traverse_ptrchain( 172 (long **)ts->ts_data, count, 0); 173 (void) pthread_mutex_unlock(&ts->ts_lock); 174 } 175 176 res->re_count = lm_optB * lm_optT * count; 177 178 return (0); 179} 180 181int 182traverse_ptrchain(long **ptr, int count, int value) 183{ 184 int i; 185 186 for (i = 0; i < count; i += 10) { 187 *ptr = *ptr + value; 188 ptr = (long **)*ptr; 189 *ptr = *ptr + value; 190 ptr = (long **)*ptr; 191 *ptr = *ptr + value; 192 ptr = (long **)*ptr; 193 *ptr = *ptr + value; 194 ptr = (long **)*ptr; 195 *ptr = *ptr + value; 196 ptr = (long **)*ptr; 197 *ptr = *ptr + value; 198 ptr = (long **)*ptr; 199 *ptr = *ptr + value; 200 ptr = (long **)*ptr; 201 *ptr = *ptr + value; 202 ptr = (long **)*ptr; 203 *ptr = *ptr + value; 204 ptr = (long **)*ptr; 205 *ptr = *ptr + value; 206 ptr = (long **)*ptr; 207 *ptr = *ptr + value; 208 } 209 return ((int)*ptr); /* bogus return */ 210} 211 212 213char * 214benchmark_result() 215{ 216 static char result[256]; 217 218 (void) sprintf(result, "%8ld ", opts); 219 220 221 return (result); 222} 223