1/* Copyright (C) 2021-2023 Free Software Foundation, Inc. 2 Contributed by Oracle. 3 4 This file is part of GNU Binutils. 5 6 This program is free software; you can redistribute it and/or modify 7 it under the terms of the GNU General Public License as published by 8 the Free Software Foundation; either version 3, or (at your option) 9 any later version. 10 11 This program is distributed in the hope that it will be useful, 12 but WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 GNU General Public License for more details. 15 16 You should have received a copy of the GNU General Public License 17 along with this program; if not, write to the Free Software 18 Foundation, 51 Franklin Street - Fifth Floor, Boston, 19 MA 02110-1301, USA. */ 20 21/* 22* ----------------------------------------------------------------------------- 23* This program implements the multiplication of an m by n matrix with a vector 24* of length n. The Posix Threads parallel programming model is used to 25* parallelize the core matrix-vector multiplication algorithm. 26* ----------------------------------------------------------------------------- 27*/ 28 29#include "mydefs.h" 30 31int main (int argc, char **argv) 32{ 33 bool verbose = false; 34 35 thread_data *thread_data_arguments; 36 pthread_t *pthread_ids; 37 38 int64_t remainder_rows; 39 int64_t rows_per_thread; 40 int64_t active_threads; 41 42 int64_t number_of_rows; 43 int64_t number_of_columns; 44 int64_t number_of_threads; 45 int64_t repeat_count; 46 47 double **A; 48 double *b; 49 double *c; 50 double *ref; 51 52 int64_t errors; 53 54/* 55* ----------------------------------------------------------------------------- 56* Start the ball rolling - Get the user options and parse them. 57* ----------------------------------------------------------------------------- 58*/ 59 (void) get_user_options ( 60 argc, 61 argv, 62 &number_of_rows, 63 &number_of_columns, 64 &repeat_count, 65 &number_of_threads, 66 &verbose); 67 68 if (verbose) printf ("Verbose mode enabled\n"); 69 70/* 71* ----------------------------------------------------------------------------- 72* Allocate storage for all data structures. 73* ----------------------------------------------------------------------------- 74*/ 75 (void) allocate_data ( 76 number_of_threads, number_of_rows, 77 number_of_columns, &A, &b, &c, &ref, 78 &thread_data_arguments, &pthread_ids); 79 80 if (verbose) printf ("Allocated data structures\n"); 81 82/* 83* ----------------------------------------------------------------------------- 84* Initialize the data. 85* ----------------------------------------------------------------------------- 86*/ 87 (void) init_data (number_of_rows, number_of_columns, A, b, c, ref); 88 89 if (verbose) printf ("Initialized matrix and vectors\n"); 90 91/* 92* ----------------------------------------------------------------------------- 93* Determine the main workload settings. 94* ----------------------------------------------------------------------------- 95*/ 96 (void) get_workload_stats ( 97 number_of_threads, number_of_rows, 98 number_of_columns, &rows_per_thread, 99 &remainder_rows, &active_threads); 100 101 if (verbose) printf ("Defined workload distribution\n"); 102 103 for (int64_t TID=active_threads; TID<number_of_threads; TID++) 104 { 105 thread_data_arguments[TID].do_work = false; 106 } 107 for (int64_t TID=0; TID<active_threads; TID++) 108 { 109 thread_data_arguments[TID].thread_id = TID; 110 thread_data_arguments[TID].verbose = verbose; 111 thread_data_arguments[TID].do_work = true; 112 thread_data_arguments[TID].repeat_count = repeat_count; 113 114 (void) determine_work_per_thread ( 115 TID, rows_per_thread, remainder_rows, 116 &thread_data_arguments[TID].row_index_start, 117 &thread_data_arguments[TID].row_index_end); 118 119 thread_data_arguments[TID].m = number_of_rows; 120 thread_data_arguments[TID].n = number_of_columns; 121 thread_data_arguments[TID].b = b; 122 thread_data_arguments[TID].c = c; 123 thread_data_arguments[TID].A = A; 124 } 125 126 if (verbose) printf ("Assigned work to threads\n"); 127 128/* 129* ----------------------------------------------------------------------------- 130* Create and execute the threads. Note that this means that there will be 131* <t+1> threads, with <t> the number of threads specified on the commandline, 132* or the default if the -t option was not used. 133* 134* Per the pthread_create () call, the threads start executing right away. 135* ----------------------------------------------------------------------------- 136*/ 137 for (int TID=0; TID<active_threads; TID++) 138 { 139 if (pthread_create (&pthread_ids[TID], NULL, driver_mxv, 140 (void *) &thread_data_arguments[TID]) != 0) 141 { 142 printf ("Error creating thread %d\n", TID); 143 perror ("pthread_create"); exit (-1); 144 } 145 else 146 { 147 if (verbose) printf ("Thread %d has been created\n", TID); 148 } 149 } 150/* 151* ----------------------------------------------------------------------------- 152* Wait for all threads to finish. 153* ----------------------------------------------------------------------------- 154*/ 155 for (int TID=0; TID<active_threads; TID++) 156 { 157 pthread_join (pthread_ids[TID], NULL); 158 } 159 160 if (verbose) 161 { 162 printf ("Matrix vector multiplication has completed\n"); 163 printf ("Verify correctness of result\n"); 164 } 165 166/* 167* ----------------------------------------------------------------------------- 168* Check the numerical results. 169* ----------------------------------------------------------------------------- 170*/ 171 if ((errors = check_results (number_of_rows, number_of_columns, 172 c, ref)) == 0) 173 { 174 if (verbose) printf ("Error check passed\n"); 175 } 176 else 177 { 178 printf ("Error: %ld differences in the results detected\n", errors); 179 } 180 181/* 182* ----------------------------------------------------------------------------- 183* Print a summary of the execution. 184* ----------------------------------------------------------------------------- 185*/ 186 print_all_results (number_of_rows, number_of_columns, number_of_threads, 187 errors); 188 189/* 190* ----------------------------------------------------------------------------- 191* Release the allocated memory and end execution. 192* ----------------------------------------------------------------------------- 193*/ 194 free (A); 195 free (b); 196 free (c); 197 free (ref); 198 free (pthread_ids); 199 200 return (0); 201} 202 203/* 204* ----------------------------------------------------------------------------- 205* Parse user options and set variables accordingly. In case of an error, print 206* a message, but do not bail out yet. In this way we can catch multiple input 207* errors. 208* ----------------------------------------------------------------------------- 209*/ 210int get_user_options (int argc, char *argv[], 211 int64_t *number_of_rows, 212 int64_t *number_of_columns, 213 int64_t *repeat_count, 214 int64_t *number_of_threads, 215 bool *verbose) 216{ 217 int opt; 218 int errors = 0; 219 int64_t default_number_of_threads = 1; 220 int64_t default_rows = 2000; 221 int64_t default_columns = 3000; 222 int64_t default_repeat_count = 200; 223 bool default_verbose = false; 224 225 *number_of_rows = default_rows; 226 *number_of_columns = default_columns; 227 *number_of_threads = default_number_of_threads; 228 *repeat_count = default_repeat_count; 229 *verbose = default_verbose; 230 231 while ((opt = getopt (argc, argv, "m:n:r:t:vh")) != -1) 232 { 233 switch (opt) 234 { 235 case 'm': 236 *number_of_rows = atol (optarg); 237 break; 238 case 'n': 239 *number_of_columns = atol (optarg); 240 break; 241 case 'r': 242 *repeat_count = atol (optarg); 243 break; 244 case 't': 245 *number_of_threads = atol (optarg); 246 break; 247 case 'v': 248 *verbose = true; 249 break; 250 case 'h': 251 default: 252 printf ("Usage: %s " \ 253 "[-m <number of rows>] " \ 254 "[-n <number of columns] [-r <repeat count>] " \ 255 "[-t <number of threads] [-v] [-h]\n", argv[0]); 256 printf ("\t-m - number of rows, default = %ld\n", 257 default_rows); 258 printf ("\t-n - number of columns, default = %ld\n", 259 default_columns); 260 printf ("\t-r - the number of times the algorithm is " \ 261 "repeatedly executed, default = %ld\n", 262 default_repeat_count); 263 printf ("\t-t - the number of threads used, default = %ld\n", 264 default_number_of_threads); 265 printf ("\t-v - enable verbose mode, %s by default\n", 266 (default_verbose) ? "on" : "off"); 267 printf ("\t-h - print this usage overview and exit\n"); 268 269 exit (0); 270 break; 271 } 272 } 273 274/* 275* ----------------------------------------------------------------------------- 276* Check for errors and bail out in case of problems. 277* ----------------------------------------------------------------------------- 278*/ 279 if (*number_of_rows <= 0) 280 { 281 errors++; 282 printf ("Error: The number of rows is %ld but should be strictly " \ 283 "positive\n", *number_of_rows); 284 } 285 if (*number_of_columns <= 0) 286 { 287 errors++; 288 printf ("Error: The number of columns is %ld but should be strictly " \ 289 "positive\n", *number_of_columns); 290 } 291 if (*repeat_count <= 0) 292 { 293 errors++; 294 printf ("Error: The repeat count is %ld but should be strictly " \ 295 "positive\n", *repeat_count); 296 } 297 if (*number_of_threads <= 0) 298 { 299 errors++; 300 printf ("Error: The number of threads is %ld but should be strictly " \ 301 "positive\n", *number_of_threads); 302 } 303 if (errors != 0) 304 { 305 printf ("There are %d input error (s)\n", errors); exit (-1); 306 } 307 308 return (errors); 309} 310 311/* 312* ----------------------------------------------------------------------------- 313* Print a summary of the execution status. 314* ----------------------------------------------------------------------------- 315*/ 316void print_all_results (int64_t number_of_rows, 317 int64_t number_of_columns, 318 int64_t number_of_threads, 319 int64_t errors) 320{ 321 printf ("mxv: error check %s - rows = %ld columns = %ld threads = %ld\n", 322 (errors == 0) ? "passed" : "failed", 323 number_of_rows, number_of_columns, number_of_threads); 324} 325 326/* 327* ----------------------------------------------------------------------------- 328* Check whether the computations produced the correct results. 329* ----------------------------------------------------------------------------- 330*/ 331int64_t check_results (int64_t m, int64_t n, double *c, double *ref) 332{ 333 char *marker; 334 int64_t errors = 0; 335 double relerr; 336 double TOL = 100.0 * DBL_EPSILON; 337 double SMALL = 100.0 * DBL_MIN; 338 339 if ((marker=(char *)malloc (m*sizeof (char))) == NULL) 340 { 341 perror ("array marker"); 342 exit (-1); 343 } 344 345 for (int64_t i=0; i<m; i++) 346 { 347 if (fabs (ref[i]) > SMALL) 348 { 349 relerr = fabs ((c[i]-ref[i])/ref[i]); 350 } 351 else 352 { 353 relerr = fabs ((c[i]-ref[i])); 354 } 355 if (relerr <= TOL) 356 { 357 marker[i] = ' '; 358 } 359 else 360 { 361 errors++; 362 marker[i] = '*'; 363 } 364 } 365 if (errors > 0) 366 { 367 printf ("Found %ld differences in results for m = %ld n = %ld:\n", 368 errors,m,n); 369 for (int64_t i=0; i<m; i++) 370 printf (" %c c[%ld] = %f ref[%ld] = %f\n",marker[i],i,c[i],i,ref[i]); 371 } 372 373 return (errors); 374} 375