1/* BEGIN LICENSE BLOCK
2 * Version: CMPL 1.1
3 *
4 * The contents of this file are subject to the Cisco-style Mozilla Public
5 * License Version 1.1 (the "License"); you may not use this file except
6 * in compliance with the License.  You may obtain a copy of the License
7 * at www.eclipse-clp.org/license.
8 *
9 * Software distributed under the License is distributed on an "AS IS"
10 * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied.  See
11 * the License for the specific language governing rights and limitations
12 * under the License.
13 *
14 * The Original Code is  The ECLiPSe Constraint Logic Programming System.
15 * The Initial Developer of the Original Code is  Cisco Systems, Inc.
16 * Portions created by the Initial Developer are
17 * Copyright (C) 1995-2006 Cisco Systems, Inc.  All Rights Reserved.
18 *
19 * Contributor(s):
20 *
21 * END LICENSE BLOCK */
22
23/**********************************************************************
24**      System: Parallel Distributed System
25**        File: trace.c
26**      Author: Shyam Mudambi
27** Description: Tracing routines
28***********************************************************************/
29
30#include "config.h"
31#include <sys/types.h>
32#include <sys/param.h>
33#include <sys/stat.h>
34#include <fcntl.h>
35#include <signal.h>
36#include <stdlib.h>
37#include <stdio.h>
38#ifdef HAVE_UNISTD_H
39#include <unistd.h>
40#endif
41#include <errno.h>
42
43#ifdef HAVE_SGIHRCLOCK
44#include <sys/mman.h>
45#include <sys/syssgi.h>
46#include <sys/immu.h>
47#endif
48
49#ifdef PATH_IN_LIMITS
50#  include 	<limits.h>
51#  define MAX_PATH_LEN	PATH_MAX
52#else
53#  include <sys/param.h>
54#  define MAX_PATH_LEN	MAXPATHLEN
55#endif
56
57#if !defined(HAVE_GETHOSTID)
58#include <sys/utsname.h>	/* for uname() */
59#endif
60
61#ifdef HAVE_SYS_SYSTEMINFO_H
62#include <sys/systeminfo.h>
63#endif
64
65#include "os_support.h"
66#include "sepia.h"
67#include <pds.h>   /* pds.h has to be included before types.h since both
68                      define machine word size dependent types */
69#include <nsrv.h>
70#include "types.h"
71#include "embed.h"
72#include "error.h"
73#include "mem.h"
74#include "dict.h"
75#include "memman.h"
76
77#include "trace.h"	/* sets WORKER_TRACING */
78#include "wm_msgs.h"
79#include "wm.h"
80
81#define ParallelWorker (ec_options.parallel_worker)
82
83struct worker_stat wstat_;
84struct worker_stat_ext wstat_ext;
85
86#ifdef WORKER_TRACING
87
88#include <sys/time.h>
89
90extern void	check_amsg();
91extern nsrv_name_t session_key;
92extern aport_id_t wm_low_aport_id;
93
94trace_header_t theader;
95trace_event_t events[MAX_EVENTS];
96volatile int tracing = 0;
97
98
99/* During tracing, if the buffer overflows, it is dumped onto the
100   temp_trace_file (temp_fd) by each worker. After the run, each worker
101   processes its temp_trace_file to convert it to alog format and this
102   is stored in trace_file. Lastly all the trace files are appended to
103   each other to produce the final log file.
104*/
105
106static char *tmpdir;
107static char temp_trace_file[MAX_PATH_LEN];
108static char trace_file[MAX_PATH_LEN];
109static trace_header_t copy_theader;
110static int temp_fd;
111
112init_trace_file_names()
113{
114  tmpdir = getenv("ECLIPSETMP");
115  if(!tmpdir)
116    tmpdir = "/tmp";
117
118  sprintf(temp_trace_file,"%s/%d.%d.temp.log",
119	  tmpdir, ParallelWorker,session_key);
120  sprintf(trace_file,"%s/%d.%d.gsx.log",
121	  tmpdir, ParallelWorker,session_key);
122}
123
124p_start_trace()
125{
126  tracing = -1;
127  start_tracing_message();
128  while (tracing <= 0) ;
129
130  LOG_EVENT(WORKING);
131
132  Succeed_
133}
134
135start_tracing_message()
136{
137  send_simple_wm_message(wm_low_aport_id, START_TRACING);
138}
139
140p_stop_trace()
141{
142  LOG_EVENT(IDLE);
143  stop_tracing_message();
144
145  while(tracing > 0) ;
146
147  Succeed_
148}
149
150stop_tracing_message()
151{
152  send_simple_wm_message(wm_low_aport_id, STOP_TRACING);
153}
154
155start_tracing()
156{
157
158  temp_fd = ec_open(temp_trace_file, O_WRONLY | O_CREAT, 0660);
159  if(temp_fd < 0)
160    printf("%d: Could not creat trace file %s\n", temp_trace_file);
161
162  theader.entries = 0;
163  theader.start_time = gethrtime();
164
165  if (tracing >=  0)
166    events[theader.entries % MAX_EVENTS].event = IDLE;
167  else
168    events[theader.entries % MAX_EVENTS].event = WORKING;
169
170  events[theader.entries % MAX_EVENTS].time = theader.start_time;
171  theader.entries++;
172  tracing = 1;
173}
174
175log_reset()
176{
177    int i;
178    wstat_.current_activity_start = gethrtime();
179    for (i=0; i<MAX_NUM_EVENTS; i++)
180	wstat_.cumulated_event_time[i] = 0;
181}
182
183log_init(event)
184int event;
185{
186    wstat_.trace_stack_ptr = 0;
187    wstat_.current_activity = event;
188    log_reset();
189}
190
191stop_tracing()
192{
193  tracing = 0;
194}
195
196trace_header_t * get_trace_ptr()
197{
198  return(&theader);
199}
200
201p_write_traces(v_widlist, t_widlist, v_filename, t_filename)
202value v_widlist, v_filename;
203type t_widlist, t_filename;
204{
205
206  pword *p;
207  int totalentries = 0;
208  int numprocs = 0;
209  hrtime_t trace_start = 0;
210  int utime, i;
211  int gsxid;          /* id for gsx - needs to be consecutive */
212  char *logname;
213  FILE *logfile;
214  char command[1024];
215  char worker_trace[MAX_PATH_LEN];
216
217  Check_List(t_widlist);
218  p = v_widlist.ptr;
219  Dereference_(p);
220  while(!IsNil(p->tag))
221    {
222/*      wm_get_worker_info(p->val.nint, TRACE_INFO,
223			 sizeof(trace_header_t), &copy_theader); */
224      get_trace_header(p->val.nint, &copy_theader);
225      numprocs++;
226      if(numprocs == 1)
227	trace_start = copy_theader.start_time;
228      else if(copy_theader.start_time < trace_start)
229	trace_start = copy_theader.start_time;
230      totalentries+= copy_theader.entries;
231
232      p = p + 1;
233      if (!IsNil(p->tag)) {
234	p = p->val.ptr;
235	Dereference_(p);
236      }
237    }
238
239  Get_Name(v_filename,t_filename, logname);
240
241/*  sprintf(command,"/bin/rm -f %s", logname);*/
242
243  logfile = fopen(logname,"w");
244
245  if(logfile == NULL)
246    {
247      (void) fprintf(stderr, "can't open event log file %s for writing\n", logname);
248      Fail_
249    }
250  else
251    {
252      (void) fprintf(logfile,"-3 0 0 %d 0 0 \n", numprocs);
253      (void) fprintf(logfile,"-2 0 0 %d 0 0 \n", totalentries);
254      (void) fprintf(logfile,"-6 0 0 0 0 0 \n");
255      fclose(logfile);
256
257      p = v_widlist.ptr;
258      gsxid = 0;
259      Dereference_(p);
260      while(!IsNil(p->tag)) {
261	copy_theader.start_time = trace_start;
262	copy_theader.entries = gsxid++;
263/*	wm_set_worker_info(p->val.nint, TRACE_INFO,
264			   sizeof(trace_header_t), &copy_theader);*/
265	set_trace_header(p->val.nint, &copy_theader);
266
267	sprintf(worker_trace,"%s/%d.%d.gsx.log",
268		tmpdir,p->val.nint,session_key);
269	sprintf(command,"cat %s >> %s",
270		worker_trace,logname);
271	if (system(command) < 0)
272	  (void) fprintf(stderr,"command %s failed\n",command);
273
274	if (ec_unlink(worker_trace) < 0)
275	  perror("Could not remove worker trace file");
276
277	p = p + 1;
278	if (!IsNil(p->tag)) {
279	  p = p->val.ptr;
280	  Dereference_(p);
281	}
282      }
283      Succeed_
284    }
285}
286
287write_trace_buffer(full)
288int full;
289{
290  int i,ub;
291
292  if (temp_fd >= 0) {
293    if (full)
294      {
295	ub = MAX_EVENTS;
296	LOG_EVENT_PUSH(TRACE_BUF_OVERFLOW)
297      }
298    else
299      ub = theader.entries % MAX_EVENTS;
300    for(i = 0; i < ub; i++)
301      if (write(temp_fd, &events[i],sizeof(trace_event_t)) < 0) {
302	Set_Errno;
303	return SYS_ERROR ;
304      }
305    if (full)
306      LOG_EVENT_POP
307    else
308       close(temp_fd);
309  }
310}
311
312
313
314print_trace(trace_header)
315trace_header_t * trace_header;
316{
317  hrtime_t trace_start;
318  word utime, hrtime_to_usec();
319  int i;
320  trace_event_t evt;
321  FILE *tf, *temp_log;
322  char command[1024];
323  int gsxid;  /* process id for gsx (has to be consecutive) */
324
325  copy_theader = *trace_header;
326
327  trace_start = copy_theader.start_time;
328  gsxid = copy_theader.entries;
329
330  write_trace_buffer(0);
331
332  tf = fopen(trace_file,"a");
333
334  if(tf == NULL)
335    (void) fprintf(stderr,"%d: can't open trace file %s\n",
336	    ParallelWorker, trace_file);
337  else
338    {
339      temp_fd = ec_open(temp_trace_file,O_RDONLY, 0660);
340      if (temp_fd < 0)
341	(void) fprintf(stderr,"%d: can't open trace file %s\n",
342		ParallelWorker, temp_trace_file);
343      else
344	{
345	  for(i = 0; i < theader.entries ; i++)  {
346	    read(temp_fd, &evt, sizeof(trace_event_t));
347	    utime = (word) hrtime_to_usec(evt.time - trace_start);
348	    if (utime < 0 )
349	      {
350		printf("%d:Error in logfile utime = %d", ParallelWorker, utime);
351		printf(" start_time = %" W_MOD "d, evt = %d, evt_time = %" W_MOD "d\n",
352		       trace_start, evt.event, evt.time);
353		printf("i = %d entries = %d local_trace_start = %" W_MOD "d \n",
354		       i, theader.entries, theader.start_time);
355	      }
356	    else
357	      (void) fprintf(tf,"%d %d 0 0 0  %d\n",
358		      evt.event, gsxid, utime);
359	  }
360	  close(temp_fd);
361	  if (ec_unlink(temp_trace_file) < 0)
362	    perror("could not remove temporary trace file");
363	}
364      fclose(tf);
365    }
366}
367
368#if defined(HAVE_SGIHRCLOCK)
369volatile hrtime_t *timer_addr;
370/* cycleval stores the resolution of the hardware clock in picoseconds */
371__psunsigned_t cycleval;
372
373/* initialise high-resolution clock */
374void init_hrclock()
375{
376  int fd, poffmask;
377__psunsigned_t phys_addr, raddr;
378
379  poffmask = getpagesize() - 1;
380  if((phys_addr = syssgi(SGI_QUERY_CYCLECNTR, &cycleval)) == -1
381     && errno == ENODEV)
382    printf("Error in Configuration! No high-resolution timer!");
383
384  raddr = phys_addr & ~poffmask;
385  fd = ec_open("/dev/mmem", O_RDONLY);
386  timer_addr = (volatile hrtime_t *)mmap(0, poffmask, PROT_READ,
387					 MAP_PRIVATE, fd, (__psint_t)raddr);
388
389  timer_addr = (hrtime_t *)((__psunsigned_t)timer_addr +
390			    (phys_addr & poffmask));
391
392}
393/* convert time returned by high-resolution  clock to microseconds */
394word hrtime_to_usec(hrtime)
395hrtime_t hrtime;
396{
397    unsigned long long temp; /* just in case unsinged long long > 64 bits */
398
399  temp = hrtime;
400  temp = temp * cycleval / 1000000;  /* cycleval is in picoseconds */
401  return((word) temp);
402}
403
404double hrtime_to_msec(hrtime)
405hrtime_t hrtime;
406{
407  double temp;
408  temp = (hrtime / 1000000000.0);
409  return(temp * cycleval);
410}
411
412#else /* if HAVE_GETHRTIME */
413
414void init_hrclock()
415{ }
416
417/* convert time returned by high-resolution  clock to microseconds */
418word hrtime_to_usec(hrtime)
419hrtime_t hrtime;
420{
421  return((word) (hrtime / 1000));
422}
423
424double hrtime_to_msec(hrtime)
425hrtime_t hrtime;
426{
427  return((hrtime / 1000000.0));
428}
429
430#endif /* HAVE_SGIHRCLOCK */
431
432#else  /* WORKER_TRACING */
433
434/*ARGSUSED*/
435p_write_traces(v_widlist, t_widlist, v_filename, t_filename)
436value v_widlist, v_filename;
437type t_widlist, t_filename;
438{
439  Succeed_
440}
441
442p_start_trace()
443{
444  Succeed_
445}
446
447p_stop_trace()
448{
449  Succeed_
450}
451
452#endif /* WORKER_TRACING */
453
454
455/*---------------------------------------------------------------------
456 * Access to local worker statistics
457 * It is collected in the structure wstat_
458 *---------------------------------------------------------------------*/
459
460wstat_init()
461{
462    wstat_.job_count = 0;
463    wstat_.copy_from_bytes = 0;
464    wstat_.copy_to_bytes = 0;
465    wstat_.copy_to_extra = 0;
466    wstat_.cut_count = 0;
467    wstat_.prune_count = 0;
468    wstat_.publish_count = 0;
469    wstat_.parallel_chpts = 0;
470    wstat_.parallel_alts = 0;
471    wstat_.published_chpts = 0;
472    wstat_.published_alts = 0;
473    wstat_.polls_expired = 0;
474    wstat_.polls_succeeded = 0;
475    wstat_.rpc_sleep = 0;
476    wstat_.copy_more_count = 0;
477    wstat_.copy_from_count = 0;
478    wstat_.copy_to_count = 0;
479#ifdef WORKER_TRACING
480    init_hrclock();
481#endif
482}
483
484
485/*
486 * worker_statistics(+Wid, -Stat)
487 * Retrieve the local statistics data of worker Wid
488 */
489
490#define WSTAT_ITEMS (13 + MAX_NUM_EVENTS)
491
492p_worker_stat(vwid, twid, vstat, tstat)
493value vwid, vstat;
494type twid, tstat;
495{
496    struct worker_stat_ext s;
497    pword *p;
498    int i;
499
500    Check_Integer(twid);
501    if (vwid.nint == ParallelWorker)
502        get_worker_stat(&s);
503    else if (valid_wid((int) vwid.nint))
504      request_wstat((int) vwid.nint, &s);
505    else
506    {
507	Bip_Error(RANGE_ERROR);
508    }
509
510    p = TG;
511    Push_Struct_Frame(in_dict("data", WSTAT_ITEMS));
512
513    Make_Integer(&p[1], s.job_count);
514    Make_Integer(&p[2], s.prune_count);
515    Make_Integer(&p[3], s.cut_count);
516    Make_Integer(&p[4], s.copy_from_count);
517    Make_Integer(&p[5], s.copy_from_bytes);
518    Make_Integer(&p[6], s.copy_to_count);
519    Make_Integer(&p[7], s.copy_to_bytes);
520    Make_Integer(&p[8], s.publish_count);
521    Make_Integer(&p[9], s.published_chpts);
522    Make_Integer(&p[10], s.published_alts);
523
524    Make_Float(&p[11], s.times[0]);
525    Make_Float(&p[12], s.times[1]);
526    Make_Float(&p[13], s.times[2]);
527
528    for (i=0; i<MAX_NUM_EVENTS; i++)
529    {
530	Make_Checked_Double(&p[14+i], s.cumulated_event_time[i]);
531    }
532    Return_Unify_Structure(vstat, tstat, p);
533}
534
535p_worker_stat_reset(vwid, twid)
536value vwid;
537type twid;
538{
539    Check_Integer(twid);
540    if (vwid.nint == ParallelWorker)
541    {
542	reset_worker_stat();
543    }
544    else if (valid_wid((int) vwid.nint))
545    {
546/*	wm_set_worker_info(vwid.nint, WSTAT_INFO, 0, (void_ptr) 0);*/
547      reset_wstat((int) vwid.nint);
548    }
549    else
550    {
551	Bip_Error(RANGE_ERROR);
552    }
553    Succeed_;
554}
555
556/* called on the worker that provides the statistics data */
557
558int get_worker_stat(wstat_ext1)
559struct worker_stat_ext * wstat_ext1;
560{
561    /* to force updating the cumulative counters */
562    LOG_EVENT_PUSH(TRACE_BUF_OVERFLOW)
563    LOG_EVENT_POP
564    (void) all_times(&wstat_.times[0], &wstat_.times[1], &wstat_.times[2]);
565    convert_to_ext(wstat_,wstat_ext1);
566}
567
568int convert_to_ext(w,wext)
569struct worker_stat w;
570struct worker_stat_ext *wext;
571{
572  int i;
573
574  wext->job_count = w.job_count;
575  wext->copy_from_bytes = w.copy_from_bytes;
576  wext->copy_to_bytes = w.copy_to_bytes;
577  wext->copy_to_extra = w.copy_to_extra;
578  wext->cut_count = w.cut_count;
579  wext->prune_count = w.prune_count;
580  wext->publish_count = w.publish_count;
581  wext->parallel_chpts = w.parallel_chpts;
582  wext->parallel_alts = w.parallel_alts;
583  wext->published_chpts = w.published_chpts;
584  wext->published_alts = w.published_alts;
585  wext->polls_expired = w.polls_expired;
586  wext->polls_succeeded = w.polls_succeeded;
587  wext->rpc_sleep = w.rpc_sleep;
588  wext->copy_more_count = w.copy_more_count;
589  wext->copy_from_count = w.copy_from_count;
590  wext->copy_to_count = w.copy_to_count;
591  wext->times[0] = w.times[0];
592  wext->times[1] = w.times[1];
593  wext->times[2] = w.times[2];
594#ifdef WORKER_TRACING
595  wext->current_activity_start = hrtime_to_msec(w.current_activity_start);
596  for (i=0; i<MAX_NUM_EVENTS; i++)
597    wext->cumulated_event_time[i] = hrtime_to_msec(w.cumulated_event_time[i]);
598#else
599  wext->current_activity_start = 0.0;
600  for (i=0; i<MAX_NUM_EVENTS; i++)
601    wext->cumulated_event_time[i] = 0.0;
602#endif
603}
604
605reset_worker_stat()
606{
607    wstat_init();
608#ifdef WORKER_TRACING
609    log_reset();
610#endif
611}
612
613wstat_types_init(mdt_wstat)
614amsg_type_t * mdt_wstat;
615
616{
617  amsg_typedef_t template[33];
618
619  /* mdt_wstat */
620  template[0] = MDT_BEGIN;
621  template[1] = MDT_STRUCT_OPEN;
622  template[2] = MDT_INT32;
623  template[3] = MDT_INT32;
624  template[4] = MDT_INT32;
625  template[5] = MDT_INT32;
626  template[6] = MDT_INT32;
627  template[7] = MDT_INT32;
628  template[8] = MDT_INT32;
629  template[9] = MDT_INT32;
630  template[10] = MDT_INT32;
631  template[11] = MDT_INT32;
632  template[12] = MDT_INT32;
633  template[13] = MDT_INT32;
634  template[14] = MDT_INT32;
635  template[15] = MDT_INT32;
636  template[16] = MDT_INT32;
637  template[17] = MDT_INT32;
638  template[18] = MDT_INT32;
639  template[19] = MDT_ARRAY_OF;
640  template[20] = 3;
641  template[21] = MDT_DP_FLOAT;
642  template[22] = MDT_DP_FLOAT;
643  template[23] = MDT_ARRAY_OF;
644  template[24] = MAX_NUM_EVENTS;
645  template[25] = MDT_DP_FLOAT;
646  template[26] = MDT_INT32;
647  template[27] = MDT_INT32;
648  template[28] = MDT_ARRAY_OF;
649  template[29] = MAX_NESTING;
650  template[30] = MDT_INT32;
651  template[31] = MDT_STRUCT_CLOSE;
652  template[32] = MDT_END;
653  check_amsg(amsg_type_define(ECLIPSE_WM_INTERFACE, 100,
654		       template, mdt_wstat), __LINE__, 4);
655}
656
657#ifdef WORKER_TRACING
658trace_types_init(mdt_trace)
659amsg_type_t * mdt_trace;
660{
661  amsg_type_t template[6];
662
663  template[0] = MDT_BEGIN;
664  template[1] = MDT_STRUCT_OPEN;
665  template[2] = MDT_INT32;
666  template[3] = MDT_DWORD;
667  template[4] = MDT_STRUCT_CLOSE;
668  template[5] = MDT_END;
669  check_amsg(amsg_type_define(ECLIPSE_WM_INTERFACE, 101,
670		       template, mdt_trace), __LINE__, 4);
671}
672#endif
673