1/* Copyright (C) 2021-2023 Free Software Foundation, Inc.
2   Contributed by Oracle.
3
4   This file is part of GNU Binutils.
5
6   This program is free software; you can redistribute it and/or modify
7   it under the terms of the GNU General Public License as published by
8   the Free Software Foundation; either version 3, or (at your option)
9   any later version.
10
11   This program is distributed in the hope that it will be useful,
12   but WITHOUT ANY WARRANTY; without even the implied warranty of
13   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14   GNU General Public License for more details.
15
16   You should have received a copy of the GNU General Public License
17   along with this program; if not, write to the Free Software
18   Foundation, 51 Franklin Street - Fifth Floor, Boston,
19   MA 02110-1301, USA.  */
20
21/*
22* -----------------------------------------------------------------------------
23* This program implements the multiplication of an m by n matrix with a vector
24* of length n.  The Posix Threads parallel programming model is used to
25* parallelize the core matrix-vector multiplication algorithm.
26* -----------------------------------------------------------------------------
27*/
28
29#include "mydefs.h"
30
31int main (int argc, char **argv)
32{
33  bool verbose = false;
34
35  thread_data *thread_data_arguments;
36  pthread_t   *pthread_ids;
37
38  int64_t remainder_rows;
39  int64_t rows_per_thread;
40  int64_t active_threads;
41
42  int64_t number_of_rows;
43  int64_t number_of_columns;
44  int64_t number_of_threads;
45  int64_t repeat_count;
46
47  double  **A;
48  double  *b;
49  double  *c;
50  double  *ref;
51
52  int64_t errors;
53
54/*
55* -----------------------------------------------------------------------------
56* Start the ball rolling - Get the user options and parse them.
57* -----------------------------------------------------------------------------
58*/
59  (void) get_user_options (
60			argc,
61			argv,
62			&number_of_rows,
63			&number_of_columns,
64			&repeat_count,
65			&number_of_threads,
66			&verbose);
67
68  if (verbose) printf ("Verbose mode enabled\n");
69
70/*
71* -----------------------------------------------------------------------------
72* Allocate storage for all data structures.
73* -----------------------------------------------------------------------------
74*/
75  (void) allocate_data (
76		number_of_threads, number_of_rows,
77		number_of_columns, &A, &b, &c, &ref,
78		&thread_data_arguments, &pthread_ids);
79
80  if (verbose) printf ("Allocated data structures\n");
81
82/*
83* -----------------------------------------------------------------------------
84* Initialize the data.
85* -----------------------------------------------------------------------------
86*/
87  (void) init_data (number_of_rows, number_of_columns, A, b, c, ref);
88
89  if (verbose) printf ("Initialized matrix and vectors\n");
90
91/*
92* -----------------------------------------------------------------------------
93* Determine the main workload settings.
94* -----------------------------------------------------------------------------
95*/
96  (void) get_workload_stats (
97		number_of_threads, number_of_rows,
98		number_of_columns, &rows_per_thread,
99		&remainder_rows, &active_threads);
100
101  if (verbose) printf ("Defined workload distribution\n");
102
103  for (int64_t TID=active_threads; TID<number_of_threads; TID++)
104    {
105      thread_data_arguments[TID].do_work      = false;
106    }
107  for (int64_t TID=0; TID<active_threads; TID++)
108    {
109      thread_data_arguments[TID].thread_id    = TID;
110      thread_data_arguments[TID].verbose      = verbose;
111      thread_data_arguments[TID].do_work      = true;
112      thread_data_arguments[TID].repeat_count = repeat_count;
113
114      (void) determine_work_per_thread (
115		TID, rows_per_thread, remainder_rows,
116		&thread_data_arguments[TID].row_index_start,
117		&thread_data_arguments[TID].row_index_end);
118
119      thread_data_arguments[TID].m = number_of_rows;
120      thread_data_arguments[TID].n = number_of_columns;
121      thread_data_arguments[TID].b = b;
122      thread_data_arguments[TID].c = c;
123      thread_data_arguments[TID].A = A;
124    }
125
126  if (verbose) printf ("Assigned work to threads\n");
127
128/*
129* -----------------------------------------------------------------------------
130* Create and execute the threads.  Note that this means that there will be
131* <t+1> threads, with <t> the number of threads specified on the commandline,
132* or the default if the -t option was not used.
133*
134* Per the pthread_create () call, the threads start executing right away.
135* -----------------------------------------------------------------------------
136*/
137  for (int TID=0; TID<active_threads; TID++)
138    {
139      if (pthread_create (&pthread_ids[TID], NULL, driver_mxv,
140	  (void *) &thread_data_arguments[TID]) != 0)
141	{
142	  printf ("Error creating thread %d\n", TID);
143	  perror ("pthread_create"); exit (-1);
144	}
145      else
146	{
147	  if (verbose) printf ("Thread %d has been created\n", TID);
148	}
149    }
150/*
151* -----------------------------------------------------------------------------
152* Wait for all threads to finish.
153* -----------------------------------------------------------------------------
154*/
155  for (int TID=0; TID<active_threads; TID++)
156    {
157      pthread_join (pthread_ids[TID], NULL);
158    }
159
160  if (verbose)
161    {
162      printf ("Matrix vector multiplication has completed\n");
163      printf ("Verify correctness of result\n");
164    }
165
166/*
167* -----------------------------------------------------------------------------
168* Check the numerical results.
169* -----------------------------------------------------------------------------
170*/
171  if ((errors = check_results (number_of_rows, number_of_columns,
172				c, ref)) == 0)
173    {
174      if (verbose) printf ("Error check passed\n");
175    }
176  else
177    {
178      printf ("Error: %ld differences in the results detected\n", errors);
179    }
180
181/*
182* -----------------------------------------------------------------------------
183* Print a summary of the execution.
184* -----------------------------------------------------------------------------
185*/
186  print_all_results (number_of_rows, number_of_columns, number_of_threads,
187		     errors);
188
189/*
190* -----------------------------------------------------------------------------
191* Release the allocated memory and end execution.
192* -----------------------------------------------------------------------------
193*/
194  free (A);
195  free (b);
196  free (c);
197  free (ref);
198  free (pthread_ids);
199
200  return (0);
201}
202
203/*
204* -----------------------------------------------------------------------------
205* Parse user options and set variables accordingly.  In case of an error, print
206* a message, but do not bail out yet.  In this way we can catch multiple input
207* errors.
208* -----------------------------------------------------------------------------
209*/
210int get_user_options (int argc, char *argv[],
211		      int64_t *number_of_rows,
212		      int64_t *number_of_columns,
213		      int64_t *repeat_count,
214		      int64_t *number_of_threads,
215		      bool    *verbose)
216{
217  int      opt;
218  int      errors		     = 0;
219  int64_t  default_number_of_threads = 1;
220  int64_t  default_rows		     = 2000;
221  int64_t  default_columns	     = 3000;
222  int64_t  default_repeat_count      = 200;
223  bool     default_verbose	     = false;
224
225  *number_of_rows    = default_rows;
226  *number_of_columns = default_columns;
227  *number_of_threads = default_number_of_threads;
228  *repeat_count      = default_repeat_count;
229  *verbose	     = default_verbose;
230
231  while ((opt = getopt (argc, argv, "m:n:r:t:vh")) != -1)
232    {
233      switch (opt)
234	{
235	  case 'm':
236	    *number_of_rows = atol (optarg);
237	    break;
238	  case 'n':
239	    *number_of_columns = atol (optarg);
240	    break;
241	  case 'r':
242	    *repeat_count = atol (optarg);
243	    break;
244	  case 't':
245	    *number_of_threads = atol (optarg);
246	    break;
247	  case 'v':
248	    *verbose = true;
249	    break;
250	  case 'h':
251	  default:
252	    printf ("Usage: %s " \
253		"[-m <number of rows>] " \
254		"[-n <number of columns] [-r <repeat count>] " \
255		"[-t <number of threads] [-v] [-h]\n", argv[0]);
256	    printf ("\t-m - number of rows, default = %ld\n",
257		default_rows);
258	    printf ("\t-n - number of columns, default = %ld\n",
259		default_columns);
260	    printf ("\t-r - the number of times the algorithm is " \
261		"repeatedly executed, default = %ld\n",
262		default_repeat_count);
263	    printf ("\t-t - the number of threads used, default = %ld\n",
264		default_number_of_threads);
265	    printf ("\t-v - enable verbose mode, %s by default\n",
266		(default_verbose) ? "on" : "off");
267	    printf ("\t-h - print this usage overview and exit\n");
268
269	   exit (0);
270	   break;
271	}
272    }
273
274/*
275* -----------------------------------------------------------------------------
276* Check for errors and bail out in case of problems.
277* -----------------------------------------------------------------------------
278*/
279  if (*number_of_rows <= 0)
280    {
281      errors++;
282      printf ("Error: The number of rows is %ld but should be strictly " \
283	      "positive\n", *number_of_rows);
284    }
285  if (*number_of_columns <= 0)
286    {
287      errors++;
288      printf ("Error: The number of columns is %ld but should be strictly " \
289	      "positive\n", *number_of_columns);
290    }
291  if (*repeat_count <= 0)
292    {
293      errors++;
294      printf ("Error: The repeat count is %ld but should be strictly " \
295	      "positive\n", *repeat_count);
296    }
297  if (*number_of_threads <= 0)
298    {
299      errors++;
300      printf ("Error: The number of threads is %ld but should be strictly " \
301	      "positive\n", *number_of_threads);
302    }
303  if (errors != 0)
304    {
305      printf ("There are %d input error (s)\n", errors); exit (-1);
306    }
307
308  return (errors);
309}
310
311/*
312* -----------------------------------------------------------------------------
313* Print a summary of the execution status.
314* -----------------------------------------------------------------------------
315*/
316void print_all_results (int64_t number_of_rows,
317			int64_t number_of_columns,
318			int64_t number_of_threads,
319			int64_t errors)
320{
321  printf ("mxv: error check %s - rows = %ld columns = %ld threads = %ld\n",
322	  (errors == 0) ? "passed" : "failed",
323	  number_of_rows, number_of_columns, number_of_threads);
324}
325
326/*
327* -----------------------------------------------------------------------------
328* Check whether the computations produced the correct results.
329* -----------------------------------------------------------------------------
330*/
331int64_t check_results (int64_t m, int64_t n, double *c, double *ref)
332{
333  char    *marker;
334  int64_t errors = 0;
335  double  relerr;
336  double  TOL   = 100.0 * DBL_EPSILON;
337  double  SMALL = 100.0 * DBL_MIN;
338
339  if ((marker=(char *)malloc (m*sizeof (char))) == NULL)
340    {
341      perror ("array marker");
342      exit (-1);
343    }
344
345  for (int64_t i=0; i<m; i++)
346  {
347    if (fabs (ref[i]) > SMALL)
348      {
349	relerr = fabs ((c[i]-ref[i])/ref[i]);
350      }
351    else
352      {
353	relerr = fabs ((c[i]-ref[i]));
354      }
355    if (relerr <= TOL)
356      {
357	marker[i] = ' ';
358      }
359    else
360      {
361	errors++;
362	marker[i] = '*';
363      }
364  }
365  if (errors > 0)
366  {
367    printf ("Found %ld differences in results for m = %ld n = %ld:\n",
368		errors,m,n);
369    for (int64_t i=0; i<m; i++)
370      printf ("  %c c[%ld] = %f ref[%ld] = %f\n",marker[i],i,c[i],i,ref[i]);
371  }
372
373  return (errors);
374}
375