1/* 2 * Copyright (c) 2009, 2010, 2011, ETH Zurich. 3 * All rights reserved. 4 * 5 * This file is distributed under the terms in the attached LICENSE file. 6 * If you do not find this file, copies can be found by writing to: 7 * ETH Zurich D-INFK, Haldeneggsteig 4, CH-8092 Zurich. Attn: Systems Group. 8 */ 9 10#include <stdlib.h> 11#include <stdio.h> 12#include <assert.h> 13#include <stdint.h> 14#include <omp.h> 15#include <arch/x86/barrelfish_kpi/asm_inlines_arch.h> 16 17#define GANG_SCHEDULING 18#undef MEASURE_SYNC 19#define MEASURE 20 21#define WORK_PERIOD 5000000000UL 22#define STACK_SIZE (64 * 1024) 23 24int main(int argc, char *argv[]) 25{ 26 uint64_t now, start; 27 volatile uint64_t workcnt, workload = 0; 28 int64_t workmax = 1000; 29 int64_t i; 30 31 if(argc == 1) { 32 printf("calibrating...\n"); 33 34 do { 35 workload = 0; 36 workmax *= 2; 37 38 start = rdtsc(); 39 40#pragma omp parallel private(i,workload) 41 for(i = 0; i < workmax; i++) { 42#pragma omp barrier 43 workload++; 44 } 45 46 now = rdtsc(); 47 } while(now - start < WORK_PERIOD); 48 49 printf("workmax = %ld\n", workmax); 50 return 0; 51 } else { 52 workmax = atol(argv[1]); 53 } 54 55 int nthreads = omp_get_max_threads(); 56 57 if(argc == 3) { 58 nthreads = atoi(argv[2]); 59 assert(!"REVISE!!!"); 60 bomp_bomp_init(nthreads); 61 omp_set_num_threads(nthreads); 62 } 63 64 printf("threads %d, workmax %ld, CPUs %d\n", nthreads, workmax, 65 omp_get_num_procs()); 66 67#ifdef MEASURE_SYNC 68 uint64_t waits[16] = { 69 0, 1000, 1000000, 1000000000, 500, 5000000, 5000000000, 3000000, 70 0, 1000, 1000000, 1000000000, 500, 5000000, 5000000000, 3000000 71 }; 72 uint64_t ts[16][10]; 73 74 printf("before sync:\n"); 75 76#pragma omp parallel private(workcnt) 77 { 78 for(int j = 0; j < waits[omp_get_thread_num()]; j++) { 79 workcnt++; 80 } 81 82 for(int j = 0; j < 10; j++) { 83 ts[omp_get_thread_num()][j] = rdtsc(); 84 } 85 } 86 87 for(int j = 0; j < 10; j++) { 88 printf("timestamp %d: ", j); 89 90 for(int n = 1; n < nthreads; n++) { 91 printf("%ld ", ts[n][j] - ts[n - 1][j]); 92 } 93 94 printf("\n"); 95 } 96 97 printf("after sync:\n"); 98 99#pragma omp parallel 100 { 101 bomp_synchronize(); 102 103 for(int j = 0; j < 10; j++) { 104 ts[omp_get_thread_num()][j] = rdtsc(); 105 } 106 } 107 108 for(int j = 0; j < 10; j++) { 109 printf("timestamp %d: ", j); 110 111 for(int n = 1; n < nthreads; n++) { 112 printf("%ld ", ts[n][j] - ts[n - 1][j]); 113 } 114 115 printf("\n"); 116 } 117#endif 118 119#ifdef GANG_SCHEDULING 120#pragma omp parallel 121 { 122 // bomp_synchronize(); 123 } 124#endif 125 126 start = rdtsc(); 127 128#ifdef MEASURE 129# define MAXTHREADS 16 130# define WORKMAX 10000 131 132 static uint64_t starta[MAXTHREADS][WORKMAX]; 133 static uint64_t end1[MAXTHREADS][WORKMAX]; 134 static uint64_t end2[MAXTHREADS][WORKMAX]; 135#endif 136 137 // Do some work 138#pragma omp parallel private(workcnt,i) 139 for(i = 0; i < workmax; i++) { 140#ifdef MEASURE 141 starta[omp_get_thread_num()][i < WORKMAX ? i : WORKMAX] = rdtsc(); 142#endif 143 workcnt++; 144#ifdef MEASURE 145 end1[omp_get_thread_num()][i < WORKMAX ? i : WORKMAX] = rdtsc(); 146#endif 147 148#pragma omp barrier 149 150#ifdef MEASURE 151 end2[omp_get_thread_num()][i < WORKMAX ? i : WORKMAX] = rdtsc(); 152#endif 153 } 154 155 now = rdtsc(); 156 157#ifdef MEASURE 158 printf("avg compute time: "); 159 for(int n = 0; n < nthreads; n++) { 160 uint64_t sum = 0, min = end1[0][0], max = 0; 161 162 for(i = 0; i < WORKMAX; i++) { 163 uint64_t val = end1[n][i] - starta[n][i]; 164 sum += val; 165 min = val < min ? val : min; 166 max = val > max ? val : max; 167 } 168 169 printf("%lu(%lu,%lu) ", sum / WORKMAX, min, max); 170 } 171 printf("\n"); 172 173#if 0 174 printf("wait time dump:\n"); 175 for(i = 0; i < WORKMAX; i++) { 176 for(int n = 0; n < nthreads; n++) { 177 uint64_t val = end2[n][i] - end1[n][i]; 178 printf("%lu ", val); 179 } 180 printf("\n"); 181 } 182#endif 183 184 printf("avg wait time: "); 185 for(int n = 0; n < nthreads; n++) { 186 uint64_t sum = 0, min = end2[0][0], max = 0; 187 188 for(i = 0; i < WORKMAX; i++) { 189 uint64_t val = end2[n][i] - end1[n][i]; 190 sum += val; 191 min = val < min ? val : min; 192 max = val > max ? val : max; 193 } 194 195 printf("%lu(%lu,%lu) ", sum / WORKMAX, min, max); 196 } 197 printf("\n"); 198#endif 199 200 printf("%s: threads %d, compute time %lu ticks\n", argv[0], nthreads, now - start); 201 202 for(;;); 203 return 0; 204} 205