1/* BEGIN LICENSE BLOCK 2 * Version: CMPL 1.1 3 * 4 * The contents of this file are subject to the Cisco-style Mozilla Public 5 * License Version 1.1 (the "License"); you may not use this file except 6 * in compliance with the License. You may obtain a copy of the License 7 * at www.eclipse-clp.org/license. 8 * 9 * Software distributed under the License is distributed on an "AS IS" 10 * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See 11 * the License for the specific language governing rights and limitations 12 * under the License. 13 * 14 * The Original Code is The ECLiPSe Constraint Logic Programming System. 15 * The Initial Developer of the Original Code is Cisco Systems, Inc. 16 * Portions created by the Initial Developer are 17 * Copyright (C) 1995-2006 Cisco Systems, Inc. All Rights Reserved. 18 * 19 * Contributor(s): 20 * 21 * END LICENSE BLOCK */ 22 23/********************************************************************** 24** System: Parallel Distributed System 25** File: trace.c 26** Author: Shyam Mudambi 27** Description: Tracing routines 28***********************************************************************/ 29 30#include "config.h" 31#include <sys/types.h> 32#include <sys/param.h> 33#include <sys/stat.h> 34#include <fcntl.h> 35#include <signal.h> 36#include <stdlib.h> 37#include <stdio.h> 38#ifdef HAVE_UNISTD_H 39#include <unistd.h> 40#endif 41#include <errno.h> 42 43#ifdef HAVE_SGIHRCLOCK 44#include <sys/mman.h> 45#include <sys/syssgi.h> 46#include <sys/immu.h> 47#endif 48 49#ifdef PATH_IN_LIMITS 50# include <limits.h> 51# define MAX_PATH_LEN PATH_MAX 52#else 53# include <sys/param.h> 54# define MAX_PATH_LEN MAXPATHLEN 55#endif 56 57#if !defined(HAVE_GETHOSTID) 58#include <sys/utsname.h> /* for uname() */ 59#endif 60 61#ifdef HAVE_SYS_SYSTEMINFO_H 62#include <sys/systeminfo.h> 63#endif 64 65#include "os_support.h" 66#include "sepia.h" 67#include <pds.h> /* pds.h has to be included before types.h since both 68 define machine word size dependent types */ 69#include <nsrv.h> 70#include "types.h" 71#include "embed.h" 72#include "error.h" 73#include "mem.h" 74#include "dict.h" 75#include "memman.h" 76 77#include "trace.h" /* sets WORKER_TRACING */ 78#include "wm_msgs.h" 79#include "wm.h" 80 81#define ParallelWorker (ec_options.parallel_worker) 82 83struct worker_stat wstat_; 84struct worker_stat_ext wstat_ext; 85 86#ifdef WORKER_TRACING 87 88#include <sys/time.h> 89 90extern void check_amsg(); 91extern nsrv_name_t session_key; 92extern aport_id_t wm_low_aport_id; 93 94trace_header_t theader; 95trace_event_t events[MAX_EVENTS]; 96volatile int tracing = 0; 97 98 99/* During tracing, if the buffer overflows, it is dumped onto the 100 temp_trace_file (temp_fd) by each worker. After the run, each worker 101 processes its temp_trace_file to convert it to alog format and this 102 is stored in trace_file. Lastly all the trace files are appended to 103 each other to produce the final log file. 104*/ 105 106static char *tmpdir; 107static char temp_trace_file[MAX_PATH_LEN]; 108static char trace_file[MAX_PATH_LEN]; 109static trace_header_t copy_theader; 110static int temp_fd; 111 112init_trace_file_names() 113{ 114 tmpdir = getenv("ECLIPSETMP"); 115 if(!tmpdir) 116 tmpdir = "/tmp"; 117 118 sprintf(temp_trace_file,"%s/%d.%d.temp.log", 119 tmpdir, ParallelWorker,session_key); 120 sprintf(trace_file,"%s/%d.%d.gsx.log", 121 tmpdir, ParallelWorker,session_key); 122} 123 124p_start_trace() 125{ 126 tracing = -1; 127 start_tracing_message(); 128 while (tracing <= 0) ; 129 130 LOG_EVENT(WORKING); 131 132 Succeed_ 133} 134 135start_tracing_message() 136{ 137 send_simple_wm_message(wm_low_aport_id, START_TRACING); 138} 139 140p_stop_trace() 141{ 142 LOG_EVENT(IDLE); 143 stop_tracing_message(); 144 145 while(tracing > 0) ; 146 147 Succeed_ 148} 149 150stop_tracing_message() 151{ 152 send_simple_wm_message(wm_low_aport_id, STOP_TRACING); 153} 154 155start_tracing() 156{ 157 158 temp_fd = ec_open(temp_trace_file, O_WRONLY | O_CREAT, 0660); 159 if(temp_fd < 0) 160 printf("%d: Could not creat trace file %s\n", temp_trace_file); 161 162 theader.entries = 0; 163 theader.start_time = gethrtime(); 164 165 if (tracing >= 0) 166 events[theader.entries % MAX_EVENTS].event = IDLE; 167 else 168 events[theader.entries % MAX_EVENTS].event = WORKING; 169 170 events[theader.entries % MAX_EVENTS].time = theader.start_time; 171 theader.entries++; 172 tracing = 1; 173} 174 175log_reset() 176{ 177 int i; 178 wstat_.current_activity_start = gethrtime(); 179 for (i=0; i<MAX_NUM_EVENTS; i++) 180 wstat_.cumulated_event_time[i] = 0; 181} 182 183log_init(event) 184int event; 185{ 186 wstat_.trace_stack_ptr = 0; 187 wstat_.current_activity = event; 188 log_reset(); 189} 190 191stop_tracing() 192{ 193 tracing = 0; 194} 195 196trace_header_t * get_trace_ptr() 197{ 198 return(&theader); 199} 200 201p_write_traces(v_widlist, t_widlist, v_filename, t_filename) 202value v_widlist, v_filename; 203type t_widlist, t_filename; 204{ 205 206 pword *p; 207 int totalentries = 0; 208 int numprocs = 0; 209 hrtime_t trace_start = 0; 210 int utime, i; 211 int gsxid; /* id for gsx - needs to be consecutive */ 212 char *logname; 213 FILE *logfile; 214 char command[1024]; 215 char worker_trace[MAX_PATH_LEN]; 216 217 Check_List(t_widlist); 218 p = v_widlist.ptr; 219 Dereference_(p); 220 while(!IsNil(p->tag)) 221 { 222/* wm_get_worker_info(p->val.nint, TRACE_INFO, 223 sizeof(trace_header_t), ©_theader); */ 224 get_trace_header(p->val.nint, ©_theader); 225 numprocs++; 226 if(numprocs == 1) 227 trace_start = copy_theader.start_time; 228 else if(copy_theader.start_time < trace_start) 229 trace_start = copy_theader.start_time; 230 totalentries+= copy_theader.entries; 231 232 p = p + 1; 233 if (!IsNil(p->tag)) { 234 p = p->val.ptr; 235 Dereference_(p); 236 } 237 } 238 239 Get_Name(v_filename,t_filename, logname); 240 241/* sprintf(command,"/bin/rm -f %s", logname);*/ 242 243 logfile = fopen(logname,"w"); 244 245 if(logfile == NULL) 246 { 247 (void) fprintf(stderr, "can't open event log file %s for writing\n", logname); 248 Fail_ 249 } 250 else 251 { 252 (void) fprintf(logfile,"-3 0 0 %d 0 0 \n", numprocs); 253 (void) fprintf(logfile,"-2 0 0 %d 0 0 \n", totalentries); 254 (void) fprintf(logfile,"-6 0 0 0 0 0 \n"); 255 fclose(logfile); 256 257 p = v_widlist.ptr; 258 gsxid = 0; 259 Dereference_(p); 260 while(!IsNil(p->tag)) { 261 copy_theader.start_time = trace_start; 262 copy_theader.entries = gsxid++; 263/* wm_set_worker_info(p->val.nint, TRACE_INFO, 264 sizeof(trace_header_t), ©_theader);*/ 265 set_trace_header(p->val.nint, ©_theader); 266 267 sprintf(worker_trace,"%s/%d.%d.gsx.log", 268 tmpdir,p->val.nint,session_key); 269 sprintf(command,"cat %s >> %s", 270 worker_trace,logname); 271 if (system(command) < 0) 272 (void) fprintf(stderr,"command %s failed\n",command); 273 274 if (ec_unlink(worker_trace) < 0) 275 perror("Could not remove worker trace file"); 276 277 p = p + 1; 278 if (!IsNil(p->tag)) { 279 p = p->val.ptr; 280 Dereference_(p); 281 } 282 } 283 Succeed_ 284 } 285} 286 287write_trace_buffer(full) 288int full; 289{ 290 int i,ub; 291 292 if (temp_fd >= 0) { 293 if (full) 294 { 295 ub = MAX_EVENTS; 296 LOG_EVENT_PUSH(TRACE_BUF_OVERFLOW) 297 } 298 else 299 ub = theader.entries % MAX_EVENTS; 300 for(i = 0; i < ub; i++) 301 if (write(temp_fd, &events[i],sizeof(trace_event_t)) < 0) { 302 Set_Errno; 303 return SYS_ERROR ; 304 } 305 if (full) 306 LOG_EVENT_POP 307 else 308 close(temp_fd); 309 } 310} 311 312 313 314print_trace(trace_header) 315trace_header_t * trace_header; 316{ 317 hrtime_t trace_start; 318 word utime, hrtime_to_usec(); 319 int i; 320 trace_event_t evt; 321 FILE *tf, *temp_log; 322 char command[1024]; 323 int gsxid; /* process id for gsx (has to be consecutive) */ 324 325 copy_theader = *trace_header; 326 327 trace_start = copy_theader.start_time; 328 gsxid = copy_theader.entries; 329 330 write_trace_buffer(0); 331 332 tf = fopen(trace_file,"a"); 333 334 if(tf == NULL) 335 (void) fprintf(stderr,"%d: can't open trace file %s\n", 336 ParallelWorker, trace_file); 337 else 338 { 339 temp_fd = ec_open(temp_trace_file,O_RDONLY, 0660); 340 if (temp_fd < 0) 341 (void) fprintf(stderr,"%d: can't open trace file %s\n", 342 ParallelWorker, temp_trace_file); 343 else 344 { 345 for(i = 0; i < theader.entries ; i++) { 346 read(temp_fd, &evt, sizeof(trace_event_t)); 347 utime = (word) hrtime_to_usec(evt.time - trace_start); 348 if (utime < 0 ) 349 { 350 printf("%d:Error in logfile utime = %d", ParallelWorker, utime); 351 printf(" start_time = %" W_MOD "d, evt = %d, evt_time = %" W_MOD "d\n", 352 trace_start, evt.event, evt.time); 353 printf("i = %d entries = %d local_trace_start = %" W_MOD "d \n", 354 i, theader.entries, theader.start_time); 355 } 356 else 357 (void) fprintf(tf,"%d %d 0 0 0 %d\n", 358 evt.event, gsxid, utime); 359 } 360 close(temp_fd); 361 if (ec_unlink(temp_trace_file) < 0) 362 perror("could not remove temporary trace file"); 363 } 364 fclose(tf); 365 } 366} 367 368#if defined(HAVE_SGIHRCLOCK) 369volatile hrtime_t *timer_addr; 370/* cycleval stores the resolution of the hardware clock in picoseconds */ 371__psunsigned_t cycleval; 372 373/* initialise high-resolution clock */ 374void init_hrclock() 375{ 376 int fd, poffmask; 377__psunsigned_t phys_addr, raddr; 378 379 poffmask = getpagesize() - 1; 380 if((phys_addr = syssgi(SGI_QUERY_CYCLECNTR, &cycleval)) == -1 381 && errno == ENODEV) 382 printf("Error in Configuration! No high-resolution timer!"); 383 384 raddr = phys_addr & ~poffmask; 385 fd = ec_open("/dev/mmem", O_RDONLY); 386 timer_addr = (volatile hrtime_t *)mmap(0, poffmask, PROT_READ, 387 MAP_PRIVATE, fd, (__psint_t)raddr); 388 389 timer_addr = (hrtime_t *)((__psunsigned_t)timer_addr + 390 (phys_addr & poffmask)); 391 392} 393/* convert time returned by high-resolution clock to microseconds */ 394word hrtime_to_usec(hrtime) 395hrtime_t hrtime; 396{ 397 unsigned long long temp; /* just in case unsinged long long > 64 bits */ 398 399 temp = hrtime; 400 temp = temp * cycleval / 1000000; /* cycleval is in picoseconds */ 401 return((word) temp); 402} 403 404double hrtime_to_msec(hrtime) 405hrtime_t hrtime; 406{ 407 double temp; 408 temp = (hrtime / 1000000000.0); 409 return(temp * cycleval); 410} 411 412#else /* if HAVE_GETHRTIME */ 413 414void init_hrclock() 415{ } 416 417/* convert time returned by high-resolution clock to microseconds */ 418word hrtime_to_usec(hrtime) 419hrtime_t hrtime; 420{ 421 return((word) (hrtime / 1000)); 422} 423 424double hrtime_to_msec(hrtime) 425hrtime_t hrtime; 426{ 427 return((hrtime / 1000000.0)); 428} 429 430#endif /* HAVE_SGIHRCLOCK */ 431 432#else /* WORKER_TRACING */ 433 434/*ARGSUSED*/ 435p_write_traces(v_widlist, t_widlist, v_filename, t_filename) 436value v_widlist, v_filename; 437type t_widlist, t_filename; 438{ 439 Succeed_ 440} 441 442p_start_trace() 443{ 444 Succeed_ 445} 446 447p_stop_trace() 448{ 449 Succeed_ 450} 451 452#endif /* WORKER_TRACING */ 453 454 455/*--------------------------------------------------------------------- 456 * Access to local worker statistics 457 * It is collected in the structure wstat_ 458 *---------------------------------------------------------------------*/ 459 460wstat_init() 461{ 462 wstat_.job_count = 0; 463 wstat_.copy_from_bytes = 0; 464 wstat_.copy_to_bytes = 0; 465 wstat_.copy_to_extra = 0; 466 wstat_.cut_count = 0; 467 wstat_.prune_count = 0; 468 wstat_.publish_count = 0; 469 wstat_.parallel_chpts = 0; 470 wstat_.parallel_alts = 0; 471 wstat_.published_chpts = 0; 472 wstat_.published_alts = 0; 473 wstat_.polls_expired = 0; 474 wstat_.polls_succeeded = 0; 475 wstat_.rpc_sleep = 0; 476 wstat_.copy_more_count = 0; 477 wstat_.copy_from_count = 0; 478 wstat_.copy_to_count = 0; 479#ifdef WORKER_TRACING 480 init_hrclock(); 481#endif 482} 483 484 485/* 486 * worker_statistics(+Wid, -Stat) 487 * Retrieve the local statistics data of worker Wid 488 */ 489 490#define WSTAT_ITEMS (13 + MAX_NUM_EVENTS) 491 492p_worker_stat(vwid, twid, vstat, tstat) 493value vwid, vstat; 494type twid, tstat; 495{ 496 struct worker_stat_ext s; 497 pword *p; 498 int i; 499 500 Check_Integer(twid); 501 if (vwid.nint == ParallelWorker) 502 get_worker_stat(&s); 503 else if (valid_wid((int) vwid.nint)) 504 request_wstat((int) vwid.nint, &s); 505 else 506 { 507 Bip_Error(RANGE_ERROR); 508 } 509 510 p = TG; 511 Push_Struct_Frame(in_dict("data", WSTAT_ITEMS)); 512 513 Make_Integer(&p[1], s.job_count); 514 Make_Integer(&p[2], s.prune_count); 515 Make_Integer(&p[3], s.cut_count); 516 Make_Integer(&p[4], s.copy_from_count); 517 Make_Integer(&p[5], s.copy_from_bytes); 518 Make_Integer(&p[6], s.copy_to_count); 519 Make_Integer(&p[7], s.copy_to_bytes); 520 Make_Integer(&p[8], s.publish_count); 521 Make_Integer(&p[9], s.published_chpts); 522 Make_Integer(&p[10], s.published_alts); 523 524 Make_Float(&p[11], s.times[0]); 525 Make_Float(&p[12], s.times[1]); 526 Make_Float(&p[13], s.times[2]); 527 528 for (i=0; i<MAX_NUM_EVENTS; i++) 529 { 530 Make_Checked_Double(&p[14+i], s.cumulated_event_time[i]); 531 } 532 Return_Unify_Structure(vstat, tstat, p); 533} 534 535p_worker_stat_reset(vwid, twid) 536value vwid; 537type twid; 538{ 539 Check_Integer(twid); 540 if (vwid.nint == ParallelWorker) 541 { 542 reset_worker_stat(); 543 } 544 else if (valid_wid((int) vwid.nint)) 545 { 546/* wm_set_worker_info(vwid.nint, WSTAT_INFO, 0, (void_ptr) 0);*/ 547 reset_wstat((int) vwid.nint); 548 } 549 else 550 { 551 Bip_Error(RANGE_ERROR); 552 } 553 Succeed_; 554} 555 556/* called on the worker that provides the statistics data */ 557 558int get_worker_stat(wstat_ext1) 559struct worker_stat_ext * wstat_ext1; 560{ 561 /* to force updating the cumulative counters */ 562 LOG_EVENT_PUSH(TRACE_BUF_OVERFLOW) 563 LOG_EVENT_POP 564 (void) all_times(&wstat_.times[0], &wstat_.times[1], &wstat_.times[2]); 565 convert_to_ext(wstat_,wstat_ext1); 566} 567 568int convert_to_ext(w,wext) 569struct worker_stat w; 570struct worker_stat_ext *wext; 571{ 572 int i; 573 574 wext->job_count = w.job_count; 575 wext->copy_from_bytes = w.copy_from_bytes; 576 wext->copy_to_bytes = w.copy_to_bytes; 577 wext->copy_to_extra = w.copy_to_extra; 578 wext->cut_count = w.cut_count; 579 wext->prune_count = w.prune_count; 580 wext->publish_count = w.publish_count; 581 wext->parallel_chpts = w.parallel_chpts; 582 wext->parallel_alts = w.parallel_alts; 583 wext->published_chpts = w.published_chpts; 584 wext->published_alts = w.published_alts; 585 wext->polls_expired = w.polls_expired; 586 wext->polls_succeeded = w.polls_succeeded; 587 wext->rpc_sleep = w.rpc_sleep; 588 wext->copy_more_count = w.copy_more_count; 589 wext->copy_from_count = w.copy_from_count; 590 wext->copy_to_count = w.copy_to_count; 591 wext->times[0] = w.times[0]; 592 wext->times[1] = w.times[1]; 593 wext->times[2] = w.times[2]; 594#ifdef WORKER_TRACING 595 wext->current_activity_start = hrtime_to_msec(w.current_activity_start); 596 for (i=0; i<MAX_NUM_EVENTS; i++) 597 wext->cumulated_event_time[i] = hrtime_to_msec(w.cumulated_event_time[i]); 598#else 599 wext->current_activity_start = 0.0; 600 for (i=0; i<MAX_NUM_EVENTS; i++) 601 wext->cumulated_event_time[i] = 0.0; 602#endif 603} 604 605reset_worker_stat() 606{ 607 wstat_init(); 608#ifdef WORKER_TRACING 609 log_reset(); 610#endif 611} 612 613wstat_types_init(mdt_wstat) 614amsg_type_t * mdt_wstat; 615 616{ 617 amsg_typedef_t template[33]; 618 619 /* mdt_wstat */ 620 template[0] = MDT_BEGIN; 621 template[1] = MDT_STRUCT_OPEN; 622 template[2] = MDT_INT32; 623 template[3] = MDT_INT32; 624 template[4] = MDT_INT32; 625 template[5] = MDT_INT32; 626 template[6] = MDT_INT32; 627 template[7] = MDT_INT32; 628 template[8] = MDT_INT32; 629 template[9] = MDT_INT32; 630 template[10] = MDT_INT32; 631 template[11] = MDT_INT32; 632 template[12] = MDT_INT32; 633 template[13] = MDT_INT32; 634 template[14] = MDT_INT32; 635 template[15] = MDT_INT32; 636 template[16] = MDT_INT32; 637 template[17] = MDT_INT32; 638 template[18] = MDT_INT32; 639 template[19] = MDT_ARRAY_OF; 640 template[20] = 3; 641 template[21] = MDT_DP_FLOAT; 642 template[22] = MDT_DP_FLOAT; 643 template[23] = MDT_ARRAY_OF; 644 template[24] = MAX_NUM_EVENTS; 645 template[25] = MDT_DP_FLOAT; 646 template[26] = MDT_INT32; 647 template[27] = MDT_INT32; 648 template[28] = MDT_ARRAY_OF; 649 template[29] = MAX_NESTING; 650 template[30] = MDT_INT32; 651 template[31] = MDT_STRUCT_CLOSE; 652 template[32] = MDT_END; 653 check_amsg(amsg_type_define(ECLIPSE_WM_INTERFACE, 100, 654 template, mdt_wstat), __LINE__, 4); 655} 656 657#ifdef WORKER_TRACING 658trace_types_init(mdt_trace) 659amsg_type_t * mdt_trace; 660{ 661 amsg_type_t template[6]; 662 663 template[0] = MDT_BEGIN; 664 template[1] = MDT_STRUCT_OPEN; 665 template[2] = MDT_INT32; 666 template[3] = MDT_DWORD; 667 template[4] = MDT_STRUCT_CLOSE; 668 template[5] = MDT_END; 669 check_amsg(amsg_type_define(ECLIPSE_WM_INTERFACE, 101, 670 template, mdt_trace), __LINE__, 4); 671} 672#endif 673