1/* 2 * Copyright (c) 2007 The Regents of the University of California. 3 * Copyright (c) 2007-2008 Voltaire, Inc. All rights reserved. 4 * 5 * This software is available to you under a choice of one of two 6 * licenses. You may choose to be licensed under the terms of the GNU 7 * General Public License (GPL) Version 2, available from the file 8 * COPYING in the main directory of this source tree, or the 9 * OpenIB.org BSD license below: 10 * 11 * Redistribution and use in source and binary forms, with or 12 * without modification, are permitted provided that the following 13 * conditions are met: 14 * 15 * - Redistributions of source code must retain the above 16 * copyright notice, this list of conditions and the following 17 * disclaimer. 18 * 19 * - Redistributions in binary form must reproduce the above 20 * copyright notice, this list of conditions and the following 21 * disclaimer in the documentation and/or other materials 22 * provided with the distribution. 23 * 24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 * SOFTWARE. 32 * 33 */ 34 35/* 36 * Abstract: 37 * Implementation of osm_perfmgr_t. 38 * This object implements an IBA performance manager. 39 * 40 * Author: 41 * Ira Weiny, LLNL 42 */ 43 44#if HAVE_CONFIG_H 45# include <config.h> 46#endif /* HAVE_CONFIG_H */ 47 48#ifdef ENABLE_OSM_PERF_MGR 49 50#include <stdlib.h> 51#include <stdint.h> 52#include <string.h> 53#include <poll.h> 54#include <errno.h> 55#include <sys/time.h> 56#include <netinet/in.h> 57#include <float.h> 58#include <arpa/inet.h> 59#include <iba/ib_types.h> 60#include <complib/cl_debug.h> 61#include <complib/cl_thread.h> 62#include <vendor/osm_vendor_api.h> 63#include <opensm/osm_perfmgr.h> 64#include <opensm/osm_log.h> 65#include <opensm/osm_node.h> 66#include <opensm/osm_opensm.h> 67 68#define OSM_PERFMGR_INITIAL_TID_VALUE 0xcafe 69 70#if ENABLE_OSM_PERF_MGR_PROFILE 71struct { 72 double fastest_us; 73 double slowest_us; 74 double avg_us; 75 uint64_t num; 76} perfmgr_mad_stats = { 77 fastest_us: DBL_MAX, 78 slowest_us: DBL_MIN, 79 avg_us: 0, 80 num: 0 81}; 82 83/* diff must be something which can fit in a susecond_t */ 84static inline void update_mad_stats(struct timeval *diff) 85{ 86 double new = (diff->tv_sec * 1000000) + diff->tv_usec; 87 if (new < perfmgr_mad_stats.fastest_us) 88 perfmgr_mad_stats.fastest_us = new; 89 if (new > perfmgr_mad_stats.slowest_us) 90 perfmgr_mad_stats.slowest_us = new; 91 92 perfmgr_mad_stats.avg_us = 93 ((perfmgr_mad_stats.avg_us * perfmgr_mad_stats.num) + new) 94 / (perfmgr_mad_stats.num + 1); 95 perfmgr_mad_stats.num++; 96} 97 98static inline void perfmgr_clear_mad_stats(void) 99{ 100 perfmgr_mad_stats.fastest_us = DBL_MAX; 101 perfmgr_mad_stats.slowest_us = DBL_MIN; 102 perfmgr_mad_stats.avg_us = 0; 103 perfmgr_mad_stats.num = 0; 104} 105 106/* after and diff can be the same struct */ 107static inline void diff_time(struct timeval *before, 108 struct timeval *after, struct timeval *diff) 109{ 110 struct timeval tmp = *after; 111 if (tmp.tv_usec < before->tv_usec) { 112 tmp.tv_sec--; 113 tmp.tv_usec += 1000000; 114 } 115 diff->tv_sec = tmp.tv_sec - before->tv_sec; 116 diff->tv_usec = tmp.tv_usec - before->tv_usec; 117} 118 119#endif 120 121extern int wait_for_pending_transactions(osm_stats_t * stats); 122 123/********************************************************************** 124 * Internal helper functions. 125 **********************************************************************/ 126static inline void __init_monitored_nodes(osm_perfmgr_t * pm) 127{ 128 cl_qmap_init(&pm->monitored_map); 129 pm->remove_list = NULL; 130 cl_event_construct(&pm->sig_query); 131 cl_event_init(&pm->sig_query, FALSE); 132} 133 134static inline void 135__mark_for_removal(osm_perfmgr_t * pm, __monitored_node_t * node) 136{ 137 if (pm->remove_list) { 138 node->next = pm->remove_list; 139 pm->remove_list = node; 140 } else { 141 node->next = NULL; 142 pm->remove_list = node; 143 } 144} 145 146static inline void __remove_marked_nodes(osm_perfmgr_t * pm) 147{ 148 while (pm->remove_list) { 149 __monitored_node_t *next = pm->remove_list->next; 150 151 cl_qmap_remove_item(&(pm->monitored_map), 152 (cl_map_item_t *) (pm->remove_list)); 153 154 if (pm->remove_list->name) 155 free(pm->remove_list->name); 156 free(pm->remove_list); 157 pm->remove_list = next; 158 } 159} 160 161static inline void __decrement_outstanding_queries(osm_perfmgr_t * pm) 162{ 163 cl_atomic_dec(&(pm->outstanding_queries)); 164 cl_event_signal(&(pm->sig_query)); 165} 166 167/********************************************************************** 168 * Receive the MAD from the vendor layer and post it for processing by 169 * the dispatcher. 170 **********************************************************************/ 171static void 172osm_perfmgr_mad_recv_callback(osm_madw_t * p_madw, void *bind_context, 173 osm_madw_t * p_req_madw) 174{ 175 osm_perfmgr_t *pm = (osm_perfmgr_t *) bind_context; 176 177 OSM_LOG_ENTER(pm->log); 178 179 osm_madw_copy_context(p_madw, p_req_madw); 180 osm_mad_pool_put(pm->mad_pool, p_req_madw); 181 182 __decrement_outstanding_queries(pm); 183 184 /* post this message for later processing. */ 185 if (cl_disp_post(pm->pc_disp_h, OSM_MSG_MAD_PORT_COUNTERS, 186 (void *)p_madw, NULL, NULL) != CL_SUCCESS) { 187 OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 4C01: " 188 "PerfMgr Dispatcher post failed\n"); 189 osm_mad_pool_put(pm->mad_pool, p_madw); 190 } 191 OSM_LOG_EXIT(pm->log); 192} 193 194/********************************************************************** 195 * Process MAD send errors. 196 **********************************************************************/ 197static void 198osm_perfmgr_mad_send_err_callback(void *bind_context, osm_madw_t * p_madw) 199{ 200 osm_perfmgr_t *pm = (osm_perfmgr_t *) bind_context; 201 osm_madw_context_t *context = &(p_madw->context); 202 uint64_t node_guid = context->perfmgr_context.node_guid; 203 uint8_t port = context->perfmgr_context.port; 204 cl_map_item_t *p_node; 205 __monitored_node_t *p_mon_node; 206 207 OSM_LOG_ENTER(pm->log); 208 209 /* go ahead and get the monitored node struct to have the printable 210 * name if needed in messages 211 */ 212 if ((p_node = cl_qmap_get(&(pm->monitored_map), node_guid)) == 213 cl_qmap_end(&(pm->monitored_map))) { 214 OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 4C15: GUID 0x%016" 215 PRIx64 " not found in monitored map\n", 216 node_guid); 217 goto Exit; 218 } 219 p_mon_node = (__monitored_node_t *) p_node; 220 221 OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 4C02: %s (0x%" PRIx64 222 ") port %u\n", p_mon_node->name, p_mon_node->guid, port); 223 224 if (pm->subn->opt.perfmgr_redir && p_madw->status == IB_TIMEOUT) { 225 /* First, find the node in the monitored map */ 226 cl_plock_acquire(pm->lock); 227 /* Now, validate port number */ 228 if (port > p_mon_node->redir_tbl_size) { 229 cl_plock_release(pm->lock); 230 OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 4C16: " 231 "Invalid port num %u for %s (GUID 0x%016" 232 PRIx64 ") num ports %u\n", port, p_mon_node->name, 233 p_mon_node->guid, p_mon_node->redir_tbl_size); 234 goto Exit; 235 } 236 /* Clear redirection info */ 237 p_mon_node->redir_port[port].redir_lid = 0; 238 p_mon_node->redir_port[port].redir_qp = 0; 239 cl_plock_release(pm->lock); 240 } 241 242Exit: 243 osm_mad_pool_put(pm->mad_pool, p_madw); 244 245 __decrement_outstanding_queries(pm); 246 247 OSM_LOG_EXIT(pm->log); 248} 249 250/********************************************************************** 251 * Bind the PerfMgr to the vendor layer for MAD sends/receives 252 **********************************************************************/ 253ib_api_status_t 254osm_perfmgr_bind(osm_perfmgr_t * const pm, const ib_net64_t port_guid) 255{ 256 osm_bind_info_t bind_info; 257 ib_api_status_t status = IB_SUCCESS; 258 259 OSM_LOG_ENTER(pm->log); 260 261 if (pm->bind_handle != OSM_BIND_INVALID_HANDLE) { 262 OSM_LOG(pm->log, OSM_LOG_ERROR, 263 "ERR 4C03: Multiple binds not allowed\n"); 264 status = IB_ERROR; 265 goto Exit; 266 } 267 268 bind_info.port_guid = port_guid; 269 bind_info.mad_class = IB_MCLASS_PERF; 270 bind_info.class_version = 1; 271 bind_info.is_responder = FALSE; 272 bind_info.is_report_processor = FALSE; 273 bind_info.is_trap_processor = FALSE; 274 bind_info.recv_q_size = OSM_PM_DEFAULT_QP1_RCV_SIZE; 275 bind_info.send_q_size = OSM_PM_DEFAULT_QP1_SEND_SIZE; 276 277 OSM_LOG(pm->log, OSM_LOG_VERBOSE, 278 "Binding to port GUID 0x%" PRIx64 "\n", cl_ntoh64(port_guid)); 279 280 pm->bind_handle = osm_vendor_bind(pm->vendor, 281 &bind_info, 282 pm->mad_pool, 283 osm_perfmgr_mad_recv_callback, 284 osm_perfmgr_mad_send_err_callback, 285 pm); 286 287 if (pm->bind_handle == OSM_BIND_INVALID_HANDLE) { 288 status = IB_ERROR; 289 OSM_LOG(pm->log, OSM_LOG_ERROR, 290 "ERR 4C04: Vendor specific bind failed (%s)\n", 291 ib_get_err_str(status)); 292 goto Exit; 293 } 294 295Exit: 296 OSM_LOG_EXIT(pm->log); 297 return (status); 298} 299 300/********************************************************************** 301 * Unbind the PerfMgr from the vendor layer for MAD sends/receives 302 **********************************************************************/ 303static void osm_perfmgr_mad_unbind(osm_perfmgr_t * const pm) 304{ 305 OSM_LOG_ENTER(pm->log); 306 if (pm->bind_handle == OSM_BIND_INVALID_HANDLE) { 307 OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 4C05: No previous bind\n"); 308 goto Exit; 309 } 310 osm_vendor_unbind(pm->bind_handle); 311Exit: 312 OSM_LOG_EXIT(pm->log); 313} 314 315/********************************************************************** 316 * Given a monitored node and a port, return the qp 317 **********************************************************************/ 318static ib_net32_t get_qp(__monitored_node_t * mon_node, uint8_t port) 319{ 320 ib_net32_t qp = cl_ntoh32(1); 321 322 if (mon_node && mon_node->redir_tbl_size && 323 port < mon_node->redir_tbl_size && 324 mon_node->redir_port[port].redir_lid && 325 mon_node->redir_port[port].redir_qp) 326 qp = mon_node->redir_port[port].redir_qp; 327 328 return qp; 329} 330 331/********************************************************************** 332 * Given a node, a port, and an optional monitored node, 333 * return the appropriate lid to query that port 334 **********************************************************************/ 335static ib_net16_t 336get_lid(osm_node_t * p_node, uint8_t port, __monitored_node_t * mon_node) 337{ 338 if (mon_node && mon_node->redir_tbl_size && 339 port < mon_node->redir_tbl_size && 340 mon_node->redir_port[port].redir_lid) 341 return mon_node->redir_port[port].redir_lid; 342 343 switch (p_node->node_info.node_type) { 344 case IB_NODE_TYPE_CA: 345 case IB_NODE_TYPE_ROUTER: 346 return osm_node_get_base_lid(p_node, port); 347 case IB_NODE_TYPE_SWITCH: 348 return osm_node_get_base_lid(p_node, 0); 349 default: 350 return 0; 351 } 352} 353 354/********************************************************************** 355 * Form and send the Port Counters MAD for a single port. 356 **********************************************************************/ 357static ib_api_status_t 358osm_perfmgr_send_pc_mad(osm_perfmgr_t * perfmgr, ib_net16_t dest_lid, 359 ib_net32_t dest_qp, uint8_t port, uint8_t mad_method, 360 osm_madw_context_t * const p_context) 361{ 362 ib_api_status_t status = IB_SUCCESS; 363 ib_port_counters_t *port_counter = NULL; 364 ib_perfmgt_mad_t *pm_mad = NULL; 365 osm_madw_t *p_madw = NULL; 366 367 OSM_LOG_ENTER(perfmgr->log); 368 369 p_madw = 370 osm_mad_pool_get(perfmgr->mad_pool, perfmgr->bind_handle, 371 MAD_BLOCK_SIZE, NULL); 372 if (p_madw == NULL) 373 return (IB_INSUFFICIENT_MEMORY); 374 375 pm_mad = osm_madw_get_perfmgt_mad_ptr(p_madw); 376 377 /* build the mad */ 378 pm_mad->header.base_ver = 1; 379 pm_mad->header.mgmt_class = IB_MCLASS_PERF; 380 pm_mad->header.class_ver = 1; 381 pm_mad->header.method = mad_method; 382 pm_mad->header.status = 0; 383 pm_mad->header.class_spec = 0; 384 pm_mad->header.trans_id = 385 cl_hton64((uint64_t) cl_atomic_inc(&(perfmgr->trans_id))); 386 pm_mad->header.attr_id = IB_MAD_ATTR_PORT_CNTRS; 387 pm_mad->header.resv = 0; 388 pm_mad->header.attr_mod = 0; 389 390 port_counter = (ib_port_counters_t *) & (pm_mad->data); 391 memset(port_counter, 0, sizeof(*port_counter)); 392 port_counter->port_select = port; 393 port_counter->counter_select = 0xFFFF; 394 395 p_madw->mad_addr.dest_lid = dest_lid; 396 p_madw->mad_addr.addr_type.gsi.remote_qp = dest_qp; 397 p_madw->mad_addr.addr_type.gsi.remote_qkey = 398 cl_hton32(IB_QP1_WELL_KNOWN_Q_KEY); 399 /* FIXME what about other partitions */ 400 p_madw->mad_addr.addr_type.gsi.pkey_ix = 0; 401 p_madw->mad_addr.addr_type.gsi.service_level = 0; 402 p_madw->mad_addr.addr_type.gsi.global_route = FALSE; 403 p_madw->resp_expected = TRUE; 404 405 if (p_context) 406 p_madw->context = *p_context; 407 408 status = osm_vendor_send(perfmgr->bind_handle, p_madw, TRUE); 409 410 if (status == IB_SUCCESS) { 411 /* pause this thread if we have too many outstanding requests */ 412 cl_atomic_inc(&(perfmgr->outstanding_queries)); 413 if (perfmgr->outstanding_queries > 414 perfmgr->max_outstanding_queries) { 415 perfmgr->sweep_state = PERFMGR_SWEEP_SUSPENDED; 416 cl_event_wait_on(&perfmgr->sig_query, EVENT_NO_TIMEOUT, 417 TRUE); 418 perfmgr->sweep_state = PERFMGR_SWEEP_ACTIVE; 419 } 420 } 421 422 OSM_LOG_EXIT(perfmgr->log); 423 return (status); 424} 425 426/********************************************************************** 427 * sweep the node_guid_tbl and collect the node guids to be tracked 428 **********************************************************************/ 429static void __collect_guids(cl_map_item_t * const p_map_item, void *context) 430{ 431 osm_node_t *node = (osm_node_t *) p_map_item; 432 uint64_t node_guid = cl_ntoh64(node->node_info.node_guid); 433 osm_perfmgr_t *pm = (osm_perfmgr_t *) context; 434 __monitored_node_t *mon_node = NULL; 435 uint32_t size; 436 437 OSM_LOG_ENTER(pm->log); 438 439 if (cl_qmap_get(&(pm->monitored_map), node_guid) 440 == cl_qmap_end(&(pm->monitored_map))) { 441 /* if not already in our map add it */ 442 size = node->node_info.num_ports; 443 mon_node = malloc(sizeof(*mon_node) + sizeof(redir_t) * size); 444 if (!mon_node) { 445 OSM_LOG(pm->log, OSM_LOG_ERROR, "PerfMgr: ERR 4C06: " 446 "malloc failed: not handling node %s" 447 "(GUID 0x%" PRIx64 ")\n", node->print_desc, node_guid); 448 goto Exit; 449 } 450 memset(mon_node, 0, sizeof(*mon_node) + sizeof(redir_t) * size); 451 mon_node->guid = node_guid; 452 mon_node->name = strdup(node->print_desc); 453 mon_node->redir_tbl_size = size + 1; 454 cl_qmap_insert(&(pm->monitored_map), node_guid, 455 (cl_map_item_t *) mon_node); 456 } 457 458Exit: 459 OSM_LOG_EXIT(pm->log); 460} 461 462/********************************************************************** 463 * query the Port Counters of all the nodes in the subnet. 464 **********************************************************************/ 465static void 466__osm_perfmgr_query_counters(cl_map_item_t * const p_map_item, void *context) 467{ 468 ib_api_status_t status = IB_SUCCESS; 469 uint8_t port = 0, startport = 1; 470 osm_perfmgr_t *pm = (osm_perfmgr_t *) context; 471 osm_node_t *node = NULL; 472 __monitored_node_t *mon_node = (__monitored_node_t *) p_map_item; 473 osm_madw_context_t mad_context; 474 uint8_t num_ports = 0; 475 uint64_t node_guid = 0; 476 ib_net32_t remote_qp; 477 478 OSM_LOG_ENTER(pm->log); 479 480 cl_plock_acquire(pm->lock); 481 node = osm_get_node_by_guid(pm->subn, cl_hton64(mon_node->guid)); 482 if (!node) { 483 OSM_LOG(pm->log, OSM_LOG_ERROR, 484 "ERR 4C07: Node \"%s\" (guid 0x%" PRIx64 485 ") no longer exists so removing from PerfMgr monitoring\n", 486 mon_node->name, mon_node->guid); 487 __mark_for_removal(pm, mon_node); 488 goto Exit; 489 } 490 491 num_ports = osm_node_get_num_physp(node); 492 node_guid = cl_ntoh64(node->node_info.node_guid); 493 494 /* make sure we have a database object ready to store this information */ 495 if (perfmgr_db_create_entry(pm->db, node_guid, num_ports, 496 node->print_desc) != 497 PERFMGR_EVENT_DB_SUCCESS) { 498 OSM_LOG(pm->log, OSM_LOG_ERROR, 499 "ERR 4C08: DB create entry failed for 0x%" 500 PRIx64 " (%s) : %s\n", node_guid, node->print_desc, 501 strerror(errno)); 502 goto Exit; 503 } 504 505 /* if switch, check for enhanced port 0 */ 506 if (osm_node_get_type(node) == IB_NODE_TYPE_SWITCH && 507 node->sw && 508 ib_switch_info_is_enhanced_port0(&node->sw->switch_info)) 509 startport = 0; 510 511 /* issue the query for each port */ 512 for (port = startport; port < num_ports; port++) { 513 ib_net16_t lid; 514 515 if (!osm_node_get_physp_ptr(node, port)) 516 continue; 517 518 lid = get_lid(node, port, mon_node); 519 if (lid == 0) { 520 OSM_LOG(pm->log, OSM_LOG_DEBUG, "WARN: node 0x%" PRIx64 521 " port %d (%s): port out of range, skipping\n", 522 cl_ntoh64(node->node_info.node_guid), port, 523 node->print_desc); 524 continue; 525 } 526 527 remote_qp = get_qp(mon_node, port); 528 529 mad_context.perfmgr_context.node_guid = node_guid; 530 mad_context.perfmgr_context.port = port; 531 mad_context.perfmgr_context.mad_method = IB_MAD_METHOD_GET; 532#if ENABLE_OSM_PERF_MGR_PROFILE 533 gettimeofday(&(mad_context.perfmgr_context.query_start), NULL); 534#endif 535 OSM_LOG(pm->log, OSM_LOG_VERBOSE, "Getting stats for node 0x%" 536 PRIx64 " port %d (lid %u) (%s)\n", node_guid, port, 537 cl_ntoh16(lid), node->print_desc); 538 status = 539 osm_perfmgr_send_pc_mad(pm, lid, remote_qp, port, 540 IB_MAD_METHOD_GET, &mad_context); 541 if (status != IB_SUCCESS) 542 OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 4C09: " 543 "Failed to issue port counter query for node 0x%" 544 PRIx64 " port %d (%s)\n", 545 node->node_info.node_guid, port, 546 node->print_desc); 547 } 548Exit: 549 cl_plock_release(pm->lock); 550 OSM_LOG_EXIT(pm->log); 551} 552 553/********************************************************************** 554 * Discovery stuff. 555 * Basically this code should not be here, but merged with main OpenSM 556 **********************************************************************/ 557extern void osm_drop_mgr_process(IN osm_sm_t *sm); 558 559static int sweep_hop_1(osm_sm_t * sm) 560{ 561 ib_api_status_t status = IB_SUCCESS; 562 osm_bind_handle_t h_bind; 563 osm_madw_context_t context; 564 osm_node_t *p_node; 565 osm_port_t *p_port; 566 osm_physp_t *p_physp; 567 osm_dr_path_t *p_dr_path; 568 osm_dr_path_t hop_1_path; 569 ib_net64_t port_guid; 570 uint8_t port_num; 571 uint8_t path_array[IB_SUBNET_PATH_HOPS_MAX]; 572 uint8_t num_ports; 573 osm_physp_t *p_ext_physp; 574 575 port_guid = sm->p_subn->sm_port_guid; 576 577 p_port = osm_get_port_by_guid(sm->p_subn, port_guid); 578 if (!p_port) { 579 OSM_LOG(sm->p_log, OSM_LOG_ERROR, 580 "ERR 4C81: No SM port object\n"); 581 return -1; 582 } 583 584 p_node = p_port->p_node; 585 port_num = ib_node_info_get_local_port_num(&p_node->node_info); 586 587 OSM_LOG(sm->p_log, OSM_LOG_DEBUG, 588 "Probing hop 1 on local port %u\n", port_num); 589 590 p_physp = osm_node_get_physp_ptr(p_node, port_num); 591 592 CL_ASSERT(p_physp); 593 594 p_dr_path = osm_physp_get_dr_path_ptr(p_physp); 595 h_bind = osm_dr_path_get_bind_handle(p_dr_path); 596 597 CL_ASSERT(h_bind != OSM_BIND_INVALID_HANDLE); 598 599 memset(path_array, 0, sizeof(path_array)); 600 /* the hop_1 operations depend on the type of our node. 601 * Currently - legal nodes that can host SM are SW and CA */ 602 switch (osm_node_get_type(p_node)) { 603 case IB_NODE_TYPE_CA: 604 case IB_NODE_TYPE_ROUTER: 605 memset(&context, 0, sizeof(context)); 606 context.ni_context.node_guid = osm_node_get_node_guid(p_node); 607 context.ni_context.port_num = port_num; 608 609 path_array[1] = port_num; 610 611 osm_dr_path_init(&hop_1_path, h_bind, 1, path_array); 612 status = osm_req_get(sm, &hop_1_path, 613 IB_MAD_ATTR_NODE_INFO, 0, 614 CL_DISP_MSGID_NONE, &context); 615 616 if (status != IB_SUCCESS) 617 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 4C82: " 618 "Request for NodeInfo failed\n"); 619 break; 620 621 case IB_NODE_TYPE_SWITCH: 622 /* Need to go over all the ports of the switch, and send a node_info 623 * from them. This doesn't include the port 0 of the switch, which 624 * hosts the SM. 625 * Note: We'll send another switchInfo on port 0, since if no ports 626 * are connected, we still want to get some response, and have the 627 * subnet come up. 628 */ 629 num_ports = osm_node_get_num_physp(p_node); 630 for (port_num = 0; port_num < num_ports; port_num++) { 631 /* go through the port only if the port is not DOWN */ 632 p_ext_physp = osm_node_get_physp_ptr(p_node, port_num); 633 if (!p_ext_physp || ib_port_info_get_port_state 634 (&p_ext_physp->port_info) <= IB_LINK_DOWN) 635 continue; 636 637 memset(&context, 0, sizeof(context)); 638 context.ni_context.node_guid = 639 osm_node_get_node_guid(p_node); 640 context.ni_context.port_num = port_num; 641 642 path_array[1] = port_num; 643 644 osm_dr_path_init(&hop_1_path, h_bind, 1, path_array); 645 status = osm_req_get(sm, &hop_1_path, 646 IB_MAD_ATTR_NODE_INFO, 0, 647 CL_DISP_MSGID_NONE, &context); 648 649 if (status != IB_SUCCESS) 650 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 4C82: " 651 "Request for NodeInfo failed\n"); 652 } 653 break; 654 655 default: 656 OSM_LOG(sm->p_log, OSM_LOG_ERROR, 657 "ERR 4C83: Unknown node type %d\n", 658 osm_node_get_type(p_node)); 659 } 660 661 return (status); 662} 663 664static unsigned is_sm_port_down(osm_sm_t * const sm) 665{ 666 ib_net64_t port_guid; 667 osm_port_t *p_port; 668 669 port_guid = sm->p_subn->sm_port_guid; 670 if (port_guid == 0) 671 return 1; 672 673 CL_PLOCK_ACQUIRE(sm->p_lock); 674 p_port = osm_get_port_by_guid(sm->p_subn, port_guid); 675 if (!p_port) { 676 CL_PLOCK_RELEASE(sm->p_lock); 677 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 4C85: " 678 "SM port with GUID:%016" PRIx64 " is unknown\n", 679 cl_ntoh64(port_guid)); 680 return 1; 681 } 682 CL_PLOCK_RELEASE(sm->p_lock); 683 684 return osm_physp_get_port_state(p_port->p_physp) == IB_LINK_DOWN; 685} 686 687static int sweep_hop_0(osm_sm_t * const sm) 688{ 689 ib_api_status_t status; 690 osm_dr_path_t dr_path; 691 osm_bind_handle_t h_bind; 692 uint8_t path_array[IB_SUBNET_PATH_HOPS_MAX]; 693 694 memset(path_array, 0, sizeof(path_array)); 695 696 h_bind = osm_sm_mad_ctrl_get_bind_handle(&sm->mad_ctrl); 697 if (h_bind == OSM_BIND_INVALID_HANDLE) { 698 OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "No bound ports.\n"); 699 return -1; 700 } 701 702 osm_dr_path_init(&dr_path, h_bind, 0, path_array); 703 status = osm_req_get(sm, &dr_path, IB_MAD_ATTR_NODE_INFO, 0, 704 CL_DISP_MSGID_NONE, NULL); 705 706 if (status != IB_SUCCESS) 707 OSM_LOG(sm->p_log, OSM_LOG_ERROR, 708 "ERR 4C86: Request for NodeInfo failed\n"); 709 710 return (status); 711} 712 713static void reset_node_count(cl_map_item_t * const p_map_item, void *cxt) 714{ 715 osm_node_t *p_node = (osm_node_t *) p_map_item; 716 p_node->discovery_count = 0; 717} 718 719static void reset_port_count(cl_map_item_t * const p_map_item, void *cxt) 720{ 721 osm_port_t *p_port = (osm_port_t *) p_map_item; 722 p_port->discovery_count = 0; 723} 724 725static void reset_switch_count(cl_map_item_t * const p_map_item, void *cxt) 726{ 727 osm_switch_t *p_sw = (osm_switch_t *) p_map_item; 728 p_sw->discovery_count = 0; 729 p_sw->need_update = 0; 730} 731 732static int perfmgr_discovery(osm_opensm_t * osm) 733{ 734 int ret; 735 736 CL_PLOCK_ACQUIRE(&osm->lock); 737 cl_qmap_apply_func(&osm->subn.node_guid_tbl, reset_node_count, NULL); 738 cl_qmap_apply_func(&osm->subn.port_guid_tbl, reset_port_count, NULL); 739 cl_qmap_apply_func(&osm->subn.sw_guid_tbl, reset_switch_count, NULL); 740 CL_PLOCK_RELEASE(&osm->lock); 741 742 osm->subn.in_sweep_hop_0 = TRUE; 743 744 ret = sweep_hop_0(&osm->sm); 745 if (ret) 746 goto _exit; 747 748 if (wait_for_pending_transactions(&osm->stats)) 749 goto _exit; 750 751 if (is_sm_port_down(&osm->sm)) { 752 OSM_LOG(&osm->log, OSM_LOG_VERBOSE, "SM port is down\n"); 753 goto _drop; 754 } 755 756 osm->subn.in_sweep_hop_0 = FALSE; 757 758 ret = sweep_hop_1(&osm->sm); 759 if (ret) 760 goto _exit; 761 762 if (wait_for_pending_transactions(&osm->stats)) 763 goto _exit; 764 765_drop: 766 osm_drop_mgr_process(&osm->sm); 767 768_exit: 769 return ret; 770} 771 772/********************************************************************** 773 * Main PerfMgr processor - query the performance counters. 774 **********************************************************************/ 775void osm_perfmgr_process(osm_perfmgr_t * pm) 776{ 777#if ENABLE_OSM_PERF_MGR_PROFILE 778 struct timeval before, after; 779#endif 780 781 if (pm->state != PERFMGR_STATE_ENABLED) 782 return; 783 784 if (pm->subn->sm_state == IB_SMINFO_STATE_STANDBY || 785 pm->subn->sm_state == IB_SMINFO_STATE_NOTACTIVE) 786 perfmgr_discovery(pm->subn->p_osm); 787 788#if ENABLE_OSM_PERF_MGR_PROFILE 789 gettimeofday(&before, NULL); 790#endif 791 pm->sweep_state = PERFMGR_SWEEP_ACTIVE; 792 /* With the global lock held collect the node guids */ 793 /* FIXME we should be able to track SA notices 794 * and not have to sweep the node_guid_tbl each pass 795 */ 796 OSM_LOG(pm->log, OSM_LOG_VERBOSE, "Gathering PerfMgr stats\n"); 797 cl_plock_acquire(pm->lock); 798 cl_qmap_apply_func(&(pm->subn->node_guid_tbl), 799 __collect_guids, (void *)pm); 800 cl_plock_release(pm->lock); 801 802 /* then for each node query their counters */ 803 cl_qmap_apply_func(&(pm->monitored_map), 804 __osm_perfmgr_query_counters, (void *)pm); 805 806 /* Clean out any nodes found to be removed during the 807 * sweep 808 */ 809 __remove_marked_nodes(pm); 810 811#if ENABLE_OSM_PERF_MGR_PROFILE 812 /* spin on outstanding queries */ 813 while (pm->outstanding_queries > 0) 814 cl_event_wait_on(&pm->sig_sweep, 1000, TRUE); 815 816 gettimeofday(&after, NULL); 817 diff_time(&before, &after, &after); 818 osm_log(pm->log, OSM_LOG_INFO, 819 "PerfMgr total sweep time : %ld.%06ld s\n" 820 " fastest mad : %g us\n" 821 " slowest mad : %g us\n" 822 " average mad : %g us\n", 823 after.tv_sec, after.tv_usec, 824 perfmgr_mad_stats.fastest_us, 825 perfmgr_mad_stats.slowest_us, perfmgr_mad_stats.avg_us); 826 perfmgr_clear_mad_stats(); 827#endif 828 829 pm->sweep_state = PERFMGR_SWEEP_SLEEP; 830} 831 832/********************************************************************** 833 * PerfMgr timer - loop continuously and signal SM to run PerfMgr 834 * processor. 835 **********************************************************************/ 836static void perfmgr_sweep(void *arg) 837{ 838 osm_perfmgr_t *pm = arg; 839 840 if (pm->state == PERFMGR_STATE_ENABLED) 841 osm_sm_signal(pm->sm, OSM_SIGNAL_PERFMGR_SWEEP); 842 cl_timer_start(&pm->sweep_timer, pm->sweep_time_s * 1000); 843} 844 845/********************************************************************** 846 **********************************************************************/ 847void osm_perfmgr_shutdown(osm_perfmgr_t * const pm) 848{ 849 OSM_LOG_ENTER(pm->log); 850 cl_timer_stop(&pm->sweep_timer); 851 osm_perfmgr_mad_unbind(pm); 852 OSM_LOG_EXIT(pm->log); 853} 854 855/********************************************************************** 856 **********************************************************************/ 857void osm_perfmgr_destroy(osm_perfmgr_t * const pm) 858{ 859 OSM_LOG_ENTER(pm->log); 860 perfmgr_db_destroy(pm->db); 861 cl_timer_destroy(&pm->sweep_timer); 862 OSM_LOG_EXIT(pm->log); 863} 864 865/********************************************************************** 866 * Detect if someone else on the network could have cleared the counters 867 * without us knowing. This is easy to detect because the counters never wrap 868 * but are "sticky" 869 * 870 * The one time this will not work is if the port is getting errors fast enough 871 * to have the reading overtake the previous reading. In this case counters 872 * will be missed. 873 **********************************************************************/ 874static void 875osm_perfmgr_check_oob_clear(osm_perfmgr_t * pm, __monitored_node_t *mon_node, 876 uint8_t port, perfmgr_db_err_reading_t * cr, 877 perfmgr_db_data_cnt_reading_t * dc) 878{ 879 perfmgr_db_err_reading_t prev_err; 880 perfmgr_db_data_cnt_reading_t prev_dc; 881 882 if (perfmgr_db_get_prev_err(pm->db, mon_node->guid, port, &prev_err) 883 != PERFMGR_EVENT_DB_SUCCESS) { 884 OSM_LOG(pm->log, OSM_LOG_VERBOSE, "Failed to find previous " 885 "error reading for %s (guid 0x%" PRIx64 ") port %u\n", 886 mon_node->name, mon_node->guid, port); 887 return; 888 } 889 890 if (cr->symbol_err_cnt < prev_err.symbol_err_cnt || 891 cr->link_err_recover < prev_err.link_err_recover || 892 cr->link_downed < prev_err.link_downed || 893 cr->rcv_err < prev_err.rcv_err || 894 cr->rcv_rem_phys_err < prev_err.rcv_rem_phys_err || 895 cr->rcv_switch_relay_err < prev_err.rcv_switch_relay_err || 896 cr->xmit_discards < prev_err.xmit_discards || 897 cr->xmit_constraint_err < prev_err.xmit_constraint_err || 898 cr->rcv_constraint_err < prev_err.rcv_constraint_err || 899 cr->link_integrity < prev_err.link_integrity || 900 cr->buffer_overrun < prev_err.buffer_overrun || 901 cr->vl15_dropped < prev_err.vl15_dropped) { 902 OSM_LOG(pm->log, OSM_LOG_ERROR, "PerfMgr: ERR 4C0A: " 903 "Detected an out of band error clear " 904 "on %s (0x%" PRIx64 ") port %u\n", 905 mon_node->name, mon_node->guid, port); 906 perfmgr_db_clear_prev_err(pm->db, mon_node->guid, port); 907 } 908 909 /* FIXME handle extended counters */ 910 if (perfmgr_db_get_prev_dc(pm->db, mon_node->guid, port, &prev_dc) 911 != PERFMGR_EVENT_DB_SUCCESS) { 912 OSM_LOG(pm->log, OSM_LOG_VERBOSE, 913 "Failed to find previous data count " 914 "reading for %s (0x%" PRIx64 ") port %u\n", 915 mon_node->name, mon_node->guid, port); 916 return; 917 } 918 919 if (dc->xmit_data < prev_dc.xmit_data || 920 dc->rcv_data < prev_dc.rcv_data || 921 dc->xmit_pkts < prev_dc.xmit_pkts || 922 dc->rcv_pkts < prev_dc.rcv_pkts) { 923 OSM_LOG(pm->log, OSM_LOG_ERROR, 924 "PerfMgr: ERR 4C0B: Detected an out of band data counter " 925 "clear on node %s (0x%" PRIx64 ") port %u\n", 926 mon_node->name, mon_node->guid, port); 927 perfmgr_db_clear_prev_dc(pm->db, mon_node->guid, port); 928 } 929} 930 931/********************************************************************** 932 * Return 1 if the value is "close" to overflowing 933 **********************************************************************/ 934static int counter_overflow_4(uint8_t val) 935{ 936 return (val >= 10); 937} 938 939static int counter_overflow_8(uint8_t val) 940{ 941 return (val >= (UINT8_MAX - (UINT8_MAX / 4))); 942} 943 944static int counter_overflow_16(ib_net16_t val) 945{ 946 return (cl_ntoh16(val) >= (UINT16_MAX - (UINT16_MAX / 4))); 947} 948 949static int counter_overflow_32(ib_net32_t val) 950{ 951 return (cl_ntoh32(val) >= (UINT32_MAX - (UINT32_MAX / 4))); 952} 953 954/********************************************************************** 955 * Check if the port counters have overflowed and if so issue a clear 956 * MAD to the port. 957 **********************************************************************/ 958static void 959osm_perfmgr_check_overflow(osm_perfmgr_t * pm, __monitored_node_t *mon_node, 960 uint8_t port, ib_port_counters_t * pc) 961{ 962 osm_madw_context_t mad_context; 963 ib_api_status_t status; 964 ib_net32_t remote_qp; 965 966 OSM_LOG_ENTER(pm->log); 967 968 if (counter_overflow_16(pc->symbol_err_cnt) || 969 counter_overflow_8(pc->link_err_recover) || 970 counter_overflow_8(pc->link_downed) || 971 counter_overflow_16(pc->rcv_err) || 972 counter_overflow_16(pc->rcv_rem_phys_err) || 973 counter_overflow_16(pc->rcv_switch_relay_err) || 974 counter_overflow_16(pc->xmit_discards) || 975 counter_overflow_8(pc->xmit_constraint_err) || 976 counter_overflow_8(pc->rcv_constraint_err) || 977 counter_overflow_4(PC_LINK_INT(pc->link_int_buffer_overrun)) || 978 counter_overflow_4(PC_BUF_OVERRUN(pc->link_int_buffer_overrun)) || 979 counter_overflow_16(pc->vl15_dropped) || 980 counter_overflow_32(pc->xmit_data) || 981 counter_overflow_32(pc->rcv_data) || 982 counter_overflow_32(pc->xmit_pkts) || 983 counter_overflow_32(pc->rcv_pkts)) { 984 osm_node_t *p_node = NULL; 985 ib_net16_t lid = 0; 986 987 osm_log(pm->log, OSM_LOG_VERBOSE, 988 "PerfMgr: Counter overflow: %s (0x%" PRIx64 989 ") port %d; clearing counters\n", 990 mon_node->name, mon_node->guid, port); 991 992 cl_plock_acquire(pm->lock); 993 p_node = osm_get_node_by_guid(pm->subn, cl_hton64(mon_node->guid)); 994 lid = get_lid(p_node, port, mon_node); 995 cl_plock_release(pm->lock); 996 if (lid == 0) { 997 OSM_LOG(pm->log, OSM_LOG_ERROR, "PerfMgr: ERR 4C0C: " 998 "Failed to clear counters for %s (0x%" 999 PRIx64 ") port %d; failed to get lid\n", 1000 mon_node->name, mon_node->guid, port); 1001 goto Exit; 1002 } 1003 1004 remote_qp = get_qp(NULL, port); 1005 1006 mad_context.perfmgr_context.node_guid = mon_node->guid; 1007 mad_context.perfmgr_context.port = port; 1008 mad_context.perfmgr_context.mad_method = IB_MAD_METHOD_SET; 1009 /* clear port counters */ 1010 status = 1011 osm_perfmgr_send_pc_mad(pm, lid, remote_qp, port, 1012 IB_MAD_METHOD_SET, &mad_context); 1013 if (status != IB_SUCCESS) 1014 OSM_LOG(pm->log, OSM_LOG_ERROR, "PerfMgr: ERR 4C11: " 1015 "Failed to send clear counters MAD for %s (0x%" 1016 PRIx64 ") port %d\n", 1017 mon_node->name, mon_node->guid, port); 1018 1019 perfmgr_db_clear_prev_dc(pm->db, mon_node->guid, port); 1020 } 1021 1022Exit: 1023 OSM_LOG_EXIT(pm->log); 1024} 1025 1026/********************************************************************** 1027 * Check values for logging of errors 1028 **********************************************************************/ 1029static void 1030osm_perfmgr_log_events(osm_perfmgr_t * pm, __monitored_node_t *mon_node, uint8_t port, 1031 perfmgr_db_err_reading_t * reading) 1032{ 1033 perfmgr_db_err_reading_t prev_read; 1034 time_t time_diff = 0; 1035 perfmgr_db_err_t err = 1036 perfmgr_db_get_prev_err(pm->db, mon_node->guid, port, &prev_read); 1037 1038 if (err != PERFMGR_EVENT_DB_SUCCESS) { 1039 OSM_LOG(pm->log, OSM_LOG_VERBOSE, "Failed to find previous " 1040 "reading for %s (0x%" PRIx64 ") port %u\n", 1041 mon_node->name, mon_node->guid, port); 1042 return; 1043 } 1044 time_diff = (reading->time - prev_read.time); 1045 1046 /* FIXME these events should be defineable by the user in a config 1047 * file somewhere. */ 1048 if (reading->symbol_err_cnt > prev_read.symbol_err_cnt) 1049 OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 4C0D: " 1050 "Found %" PRIu64 " Symbol errors in %lu sec on %s (0x%" 1051 PRIx64 ") port %u\n", 1052 (reading->symbol_err_cnt - prev_read.symbol_err_cnt), 1053 time_diff, mon_node->name, mon_node->guid, port); 1054 1055 if (reading->rcv_err > prev_read.rcv_err) 1056 OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 4C0E: " 1057 "Found %" PRIu64 1058 " Receive errors in %lu sec on %s (0x%" PRIx64 1059 ") port %u\n", (reading->rcv_err - prev_read.rcv_err), 1060 time_diff, mon_node->name, mon_node->guid, port); 1061 1062 if (reading->xmit_discards > prev_read.xmit_discards) 1063 OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 4C0F: " 1064 "Found %" PRIu64 " Xmit Discards in %lu sec on %s (0x%" 1065 PRIx64 ") port %u\n", 1066 (reading->xmit_discards - prev_read.xmit_discards), 1067 time_diff, mon_node->name, mon_node->guid, port); 1068} 1069 1070/********************************************************************** 1071 * The dispatcher uses a thread pool which will call this function when 1072 * we have a thread available to process our mad received from the wire. 1073 **********************************************************************/ 1074static void osm_pc_rcv_process(void *context, void *data) 1075{ 1076 osm_perfmgr_t *const pm = (osm_perfmgr_t *) context; 1077 osm_madw_t *p_madw = (osm_madw_t *) data; 1078 osm_madw_context_t *mad_context = &(p_madw->context); 1079 ib_port_counters_t *wire_read = 1080 (ib_port_counters_t *) & (osm_madw_get_perfmgt_mad_ptr(p_madw)-> 1081 data); 1082 ib_mad_t *p_mad = osm_madw_get_mad_ptr(p_madw); 1083 uint64_t node_guid = mad_context->perfmgr_context.node_guid; 1084 uint8_t port = mad_context->perfmgr_context.port; 1085 perfmgr_db_err_reading_t err_reading; 1086 perfmgr_db_data_cnt_reading_t data_reading; 1087 cl_map_item_t *p_node; 1088 __monitored_node_t *p_mon_node; 1089 1090 OSM_LOG_ENTER(pm->log); 1091 1092 /* go ahead and get the monitored node struct to have the printable 1093 * name if needed in messages 1094 */ 1095 if ((p_node = cl_qmap_get(&(pm->monitored_map), node_guid)) == 1096 cl_qmap_end(&(pm->monitored_map))) { 1097 OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 4C12: GUID 0x%016" 1098 PRIx64 " not found in monitored map\n", 1099 node_guid); 1100 goto Exit; 1101 } 1102 p_mon_node = (__monitored_node_t *) p_node; 1103 1104 OSM_LOG(pm->log, OSM_LOG_VERBOSE, 1105 "Processing received MAD status 0x%x context 0x%" 1106 PRIx64 " port %u\n", p_mad->status, node_guid, port); 1107 1108 /* Response could also be redirection (IBM eHCA PMA does this) */ 1109 if (p_mad->attr_id == IB_MAD_ATTR_CLASS_PORT_INFO) { 1110 char gid_str[INET6_ADDRSTRLEN]; 1111 ib_class_port_info_t *cpi = 1112 (ib_class_port_info_t *) & 1113 (osm_madw_get_perfmgt_mad_ptr(p_madw)->data); 1114 ib_api_status_t status; 1115 1116 OSM_LOG(pm->log, OSM_LOG_VERBOSE, 1117 "Redirection to LID %u GID %s QP 0x%x received\n", 1118 cl_ntoh16(cpi->redir_lid), 1119 inet_ntop(AF_INET6, cpi->redir_gid.raw, gid_str, 1120 sizeof gid_str), 1121 cl_ntoh32(cpi->redir_qp)); 1122 1123 /* LID or GID redirection ? */ 1124 /* For GID redirection, need to get PathRecord from SA */ 1125 if (cpi->redir_lid == 0) { 1126 OSM_LOG(pm->log, OSM_LOG_VERBOSE, 1127 "GID redirection not currently implemented!\n"); 1128 goto Exit; 1129 } 1130 1131 if (!pm->subn->opt.perfmgr_redir) { 1132 OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 4C16: " 1133 "redirection requested but disabled\n"); 1134 goto Exit; 1135 } 1136 1137 /* LID redirection support (easier than GID redirection) */ 1138 cl_plock_acquire(pm->lock); 1139 /* Now, validate port number */ 1140 if (port > p_mon_node->redir_tbl_size) { 1141 cl_plock_release(pm->lock); 1142 OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 4C13: " 1143 "Invalid port num %d for GUID 0x%016" 1144 PRIx64 " num ports %d\n", port, node_guid, 1145 p_mon_node->redir_tbl_size); 1146 goto Exit; 1147 } 1148 p_mon_node->redir_port[port].redir_lid = cpi->redir_lid; 1149 p_mon_node->redir_port[port].redir_qp = cpi->redir_qp; 1150 cl_plock_release(pm->lock); 1151 1152 /* Finally, reissue the query to the redirected location */ 1153 status = 1154 osm_perfmgr_send_pc_mad(pm, cpi->redir_lid, cpi->redir_qp, 1155 port, 1156 mad_context->perfmgr_context. 1157 mad_method, mad_context); 1158 if (status != IB_SUCCESS) 1159 OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 4C14: " 1160 "Failed to send redirected MAD with method 0x%x for node 0x%" 1161 PRIx64 " port %d\n", 1162 mad_context->perfmgr_context.mad_method, 1163 node_guid, port); 1164 goto Exit; 1165 } 1166 1167 CL_ASSERT(p_mad->attr_id == IB_MAD_ATTR_PORT_CNTRS); 1168 1169 perfmgr_db_fill_err_read(wire_read, &err_reading); 1170 /* FIXME separate query for extended counters if they are supported 1171 * on the port. 1172 */ 1173 perfmgr_db_fill_data_cnt_read_pc(wire_read, &data_reading); 1174 1175 /* detect an out of band clear on the port */ 1176 if (mad_context->perfmgr_context.mad_method != IB_MAD_METHOD_SET) 1177 osm_perfmgr_check_oob_clear(pm, p_mon_node, port, 1178 &err_reading, &data_reading); 1179 1180 /* log any critical events from this reading */ 1181 osm_perfmgr_log_events(pm, p_mon_node, port, &err_reading); 1182 1183 if (mad_context->perfmgr_context.mad_method == IB_MAD_METHOD_GET) { 1184 perfmgr_db_add_err_reading(pm->db, node_guid, port, 1185 &err_reading); 1186 perfmgr_db_add_dc_reading(pm->db, node_guid, port, 1187 &data_reading); 1188 } else { 1189 perfmgr_db_clear_prev_err(pm->db, node_guid, port); 1190 perfmgr_db_clear_prev_dc(pm->db, node_guid, port); 1191 } 1192 1193 osm_perfmgr_check_overflow(pm, p_mon_node, port, wire_read); 1194 1195#if ENABLE_OSM_PERF_MGR_PROFILE 1196 do { 1197 struct timeval proc_time; 1198 gettimeofday(&proc_time, NULL); 1199 diff_time(&(p_madw->context.perfmgr_context.query_start), 1200 &proc_time, &proc_time); 1201 update_mad_stats(&proc_time); 1202 } while (0); 1203#endif 1204 1205Exit: 1206 osm_mad_pool_put(pm->mad_pool, p_madw); 1207 1208 OSM_LOG_EXIT(pm->log); 1209} 1210 1211/********************************************************************** 1212 * Initialize the PerfMgr object 1213 **********************************************************************/ 1214ib_api_status_t 1215osm_perfmgr_init(osm_perfmgr_t * const pm, osm_opensm_t *osm, 1216 const osm_subn_opt_t * const p_opt) 1217{ 1218 ib_api_status_t status = IB_SUCCESS; 1219 1220 OSM_LOG_ENTER(&osm->log); 1221 1222 OSM_LOG(&osm->log, OSM_LOG_VERBOSE, "Initializing PerfMgr\n"); 1223 1224 memset(pm, 0, sizeof(*pm)); 1225 1226 cl_event_construct(&pm->sig_sweep); 1227 cl_event_init(&pm->sig_sweep, FALSE); 1228 pm->subn = &osm->subn; 1229 pm->sm = &osm->sm; 1230 pm->log = &osm->log; 1231 pm->mad_pool = &osm->mad_pool; 1232 pm->vendor = osm->p_vendor; 1233 pm->trans_id = OSM_PERFMGR_INITIAL_TID_VALUE; 1234 pm->lock = &osm->lock; 1235 pm->state = 1236 p_opt->perfmgr ? PERFMGR_STATE_ENABLED : PERFMGR_STATE_DISABLE; 1237 pm->sweep_time_s = p_opt->perfmgr_sweep_time_s; 1238 pm->max_outstanding_queries = p_opt->perfmgr_max_outstanding_queries; 1239 pm->osm = osm; 1240 1241 status = cl_timer_init(&pm->sweep_timer, perfmgr_sweep, pm); 1242 if (status != IB_SUCCESS) 1243 goto Exit; 1244 1245 pm->db = perfmgr_db_construct(pm); 1246 if (!pm->db) { 1247 pm->state = PERFMGR_STATE_NO_DB; 1248 goto Exit; 1249 } 1250 1251 pm->pc_disp_h = cl_disp_register(&osm->disp, OSM_MSG_MAD_PORT_COUNTERS, 1252 osm_pc_rcv_process, pm); 1253 if (pm->pc_disp_h == CL_DISP_INVALID_HANDLE) 1254 goto Exit; 1255 1256 __init_monitored_nodes(pm); 1257 1258 cl_timer_start(&pm->sweep_timer, pm->sweep_time_s * 1000); 1259 1260Exit: 1261 OSM_LOG_EXIT(pm->log); 1262 return (status); 1263} 1264 1265/********************************************************************** 1266 * Clear the counters from the db 1267 **********************************************************************/ 1268void osm_perfmgr_clear_counters(osm_perfmgr_t * pm) 1269{ 1270 /** 1271 * FIXME todo issue clear on the fabric? 1272 */ 1273 perfmgr_db_clear_counters(pm->db); 1274 osm_log(pm->log, OSM_LOG_INFO, "PerfMgr counters cleared\n"); 1275} 1276 1277/******************************************************************* 1278 * Have the DB dump its information to the file specified 1279 *******************************************************************/ 1280void osm_perfmgr_dump_counters(osm_perfmgr_t * pm, perfmgr_db_dump_t dump_type) 1281{ 1282 char path[256]; 1283 char *file_name; 1284 if (pm->subn->opt.event_db_dump_file) 1285 file_name = pm->subn->opt.event_db_dump_file; 1286 else { 1287 snprintf(path, sizeof(path), "%s/%s", 1288 pm->subn->opt.dump_files_dir, 1289 OSM_PERFMGR_DEFAULT_DUMP_FILE); 1290 file_name = path; 1291 } 1292 if (perfmgr_db_dump(pm->db, file_name, dump_type) != 0) 1293 OSM_LOG(pm->log, OSM_LOG_ERROR, "Failed to dump file %s : %s", 1294 file_name, strerror(errno)); 1295} 1296 1297/******************************************************************* 1298 * Have the DB print its information to the fp specified 1299 *******************************************************************/ 1300void 1301osm_perfmgr_print_counters(osm_perfmgr_t *pm, char *nodename, FILE *fp) 1302{ 1303 uint64_t guid = strtoull(nodename, NULL, 0); 1304 if (guid == 0 && errno == EINVAL) { 1305 perfmgr_db_print_by_name(pm->db, nodename, fp); 1306 } else { 1307 perfmgr_db_print_by_guid(pm->db, guid, fp); 1308 } 1309} 1310 1311#endif /* ENABLE_OSM_PERF_MGR */ 1312