1219820Sjeff/* 2219820Sjeff * Copyright (c) 2004-2008 Voltaire, Inc. All rights reserved. 3219820Sjeff * Copyright (c) 2002-2006 Mellanox Technologies LTD. All rights reserved. 4219820Sjeff * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. 5219820Sjeff * 6219820Sjeff * This software is available to you under a choice of one of two 7219820Sjeff * licenses. You may choose to be licensed under the terms of the GNU 8219820Sjeff * General Public License (GPL) Version 2, available from the file 9219820Sjeff * COPYING in the main directory of this source tree, or the 10219820Sjeff * OpenIB.org BSD license below: 11219820Sjeff * 12219820Sjeff * Redistribution and use in source and binary forms, with or 13219820Sjeff * without modification, are permitted provided that the following 14219820Sjeff * conditions are met: 15219820Sjeff * 16219820Sjeff * - Redistributions of source code must retain the above 17219820Sjeff * copyright notice, this list of conditions and the following 18219820Sjeff * disclaimer. 19219820Sjeff * 20219820Sjeff * - Redistributions in binary form must reproduce the above 21219820Sjeff * copyright notice, this list of conditions and the following 22219820Sjeff * disclaimer in the documentation and/or other materials 23219820Sjeff * provided with the distribution. 24219820Sjeff * 25219820Sjeff * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 26219820Sjeff * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 27219820Sjeff * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 28219820Sjeff * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 29219820Sjeff * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 30219820Sjeff * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 31219820Sjeff * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 32219820Sjeff * SOFTWARE. 33219820Sjeff * 34219820Sjeff */ 35219820Sjeff 36219820Sjeff/* 37219820Sjeff * Abstract: 38219820Sjeff * Implementation of osm_trap_rcv_t. 39219820Sjeff * This object represents the Trap Receiver object. 40219820Sjeff * This object is part of the opensm family of objects. 41219820Sjeff */ 42219820Sjeff 43219820Sjeff#if HAVE_CONFIG_H 44219820Sjeff# include <config.h> 45219820Sjeff#endif /* HAVE_CONFIG_H */ 46219820Sjeff 47219820Sjeff#include <string.h> 48219820Sjeff#include <iba/ib_types.h> 49219820Sjeff#include <complib/cl_qmap.h> 50219820Sjeff#include <complib/cl_debug.h> 51219820Sjeff#include <opensm/osm_madw.h> 52219820Sjeff#include <opensm/osm_log.h> 53219820Sjeff#include <opensm/osm_node.h> 54219820Sjeff#include <opensm/osm_helper.h> 55219820Sjeff#include <opensm/osm_subnet.h> 56219820Sjeff#include <opensm/osm_inform.h> 57219820Sjeff#include <opensm/osm_opensm.h> 58219820Sjeff 59219820Sjeffextern void osm_req_get_node_desc(IN osm_sm_t * sm, osm_physp_t *p_physp); 60219820Sjeff 61219820Sjeff/********************************************************************** 62219820Sjeff * 63219820Sjeff * TRAP HANDLING: 64219820Sjeff * 65219820Sjeff * Assuming traps can be caused by bad hardware we should provide 66219820Sjeff * a mechanism for filtering their propagation into the actual logic 67219820Sjeff * of OpenSM such that it is not overloaded by them. 68219820Sjeff * 69219820Sjeff * We will provide a trap filtering mechanism with "Aging" capability. 70219820Sjeff * This mechanism will track incoming traps, clasify them by their 71219820Sjeff * source and content and provide back their age. 72219820Sjeff * 73219820Sjeff * A timer running in the background will toggle a timer counter 74219820Sjeff * that should be referenced by the aging algorithm. 75219820Sjeff * To provide an efficient handling of aging. We also track all traps 76219820Sjeff * in a sorted list by their aging. 77219820Sjeff * 78219820Sjeff * The generic Aging Tracker mechanism is implemented in the 79219820Sjeff * cl_aging_tracker object. 80219820Sjeff * 81219820Sjeff **********************************************************************/ 82219820Sjeff 83219820Sjefftypedef struct osm_trap_agingracker_context { 84219820Sjeff osm_log_t *p_log; 85219820Sjeff osm_physp_t *p_physp; 86219820Sjeff} osm_trap_aging_tracker_context_t; 87219820Sjeff 88219820Sjeff/********************************************************************** 89219820Sjeff **********************************************************************/ 90219820Sjeffstatic osm_physp_t *get_physp_by_lid_and_num(IN osm_sm_t * sm, 91219820Sjeff IN uint16_t lid, IN uint8_t num) 92219820Sjeff{ 93219820Sjeff cl_ptr_vector_t *p_vec = &(sm->p_subn->port_lid_tbl); 94219820Sjeff osm_port_t *p_port; 95219820Sjeff 96219820Sjeff if (lid > cl_ptr_vector_get_size(p_vec)) 97219820Sjeff return NULL; 98219820Sjeff 99219820Sjeff p_port = (osm_port_t *) cl_ptr_vector_get(p_vec, lid); 100219820Sjeff if (!p_port) 101219820Sjeff return NULL; 102219820Sjeff 103219820Sjeff if (osm_node_get_num_physp(p_port->p_node) < num) 104219820Sjeff return NULL; 105219820Sjeff 106219820Sjeff return osm_node_get_physp_ptr(p_port->p_node, num); 107219820Sjeff} 108219820Sjeff 109219820Sjeff/********************************************************************** 110219820Sjeff **********************************************************************/ 111219820Sjeffuint64_t 112219820Sjeffosm_trap_rcv_aging_tracker_callback(IN uint64_t key, 113219820Sjeff IN uint32_t num_regs, IN void *context) 114219820Sjeff{ 115219820Sjeff osm_sm_t *sm = context; 116219820Sjeff uint16_t lid; 117219820Sjeff uint8_t port_num; 118219820Sjeff osm_physp_t *p_physp; 119219820Sjeff 120219820Sjeff OSM_LOG_ENTER(sm->p_log); 121219820Sjeff 122219820Sjeff if (osm_exit_flag) 123219820Sjeff /* We got an exit flag - do nothing */ 124219820Sjeff return 0; 125219820Sjeff 126219820Sjeff lid = cl_ntoh16((uint16_t) ((key & 0x0000FFFF00000000ULL) >> 32)); 127219820Sjeff port_num = (uint8_t) ((key & 0x00FF000000000000ULL) >> 48); 128219820Sjeff 129219820Sjeff p_physp = get_physp_by_lid_and_num(sm, lid, port_num); 130219820Sjeff if (!p_physp) 131219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, 132219820Sjeff "Cannot find port num:%u with lid:%u\n", 133219820Sjeff port_num, lid); 134219820Sjeff /* make sure the physp is still valid */ 135219820Sjeff /* If the health port was false - set it to true */ 136219820Sjeff else if (!osm_physp_is_healthy(p_physp)) { 137219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, 138219820Sjeff "Clearing health bit of port num:%u with lid:%u\n", 139219820Sjeff port_num, lid); 140219820Sjeff 141219820Sjeff /* Clear its health bit */ 142219820Sjeff osm_physp_set_health(p_physp, TRUE); 143219820Sjeff } 144219820Sjeff 145219820Sjeff OSM_LOG_EXIT(sm->p_log); 146219820Sjeff 147219820Sjeff /* We want to remove the event from the tracker - so 148219820Sjeff need to return zero. */ 149219820Sjeff return 0; 150219820Sjeff} 151219820Sjeff 152219820Sjeff/********************************************************************** 153219820Sjeff * CRC calculation for notice identification 154219820Sjeff **********************************************************************/ 155219820Sjeff 156219820Sjeff#define CRC32_POLYNOMIAL 0xEDB88320L 157219820Sjeff 158219820Sjeff/* calculate the crc for a given buffer */ 159219820Sjeffstatic uint32_t __osm_trap_calc_crc32(void *buffer, uint32_t count) 160219820Sjeff{ 161219820Sjeff uint32_t temp1, temp2; 162219820Sjeff uint32_t crc = -1L; 163219820Sjeff unsigned char *p = (unsigned char *)buffer; 164219820Sjeff /* pre - calculated table for faster crc calculation */ 165219820Sjeff static uint32_t crc_table[256]; 166219820Sjeff static boolean_t first = TRUE; 167219820Sjeff int i, j; 168219820Sjeff 169219820Sjeff /* if we need to initialize the lookup table */ 170219820Sjeff if (first) { 171219820Sjeff /* calc the CRC table */ 172219820Sjeff for (i = 0; i <= 255; i++) { 173219820Sjeff crc = i; 174219820Sjeff for (j = 8; j > 0; j--) 175219820Sjeff if (crc & 1) 176219820Sjeff crc = (crc >> 1) ^ CRC32_POLYNOMIAL; 177219820Sjeff else 178219820Sjeff crc >>= 1; 179219820Sjeff crc_table[i] = crc; 180219820Sjeff } 181219820Sjeff first = FALSE; 182219820Sjeff } 183219820Sjeff 184219820Sjeff crc = -1L; 185219820Sjeff /* do the calculation */ 186219820Sjeff while (count-- != 0) { 187219820Sjeff temp1 = (crc >> 8) & 0x00FFFFFFL; 188219820Sjeff temp2 = crc_table[((int)crc ^ *p++) & 0xFF]; 189219820Sjeff crc = temp1 ^ temp2; 190219820Sjeff } 191219820Sjeff return crc; 192219820Sjeff} 193219820Sjeff 194219820Sjeff/******************************************************************** 195219820Sjeff ********************************************************************/ 196219820Sjeff 197219820Sjeff/* The key is created in the following manner: 198219820Sjeff port_num lid crc 199219820Sjeff \______/ \___/ \___/ 200219820Sjeff 16b 16b 32b 201219820Sjeff*/ 202219820Sjeffstatic void 203219820Sjeff__osm_trap_get_key(IN uint16_t lid, 204219820Sjeff IN uint8_t port_num, 205219820Sjeff IN ib_mad_notice_attr_t * p_ntci, OUT uint64_t * trap_key) 206219820Sjeff{ 207219820Sjeff uint32_t crc = 0; 208219820Sjeff 209219820Sjeff CL_ASSERT(trap_key); 210219820Sjeff 211219820Sjeff crc = __osm_trap_calc_crc32(p_ntci, sizeof(ib_mad_notice_attr_t)); 212219820Sjeff *trap_key = ((uint64_t) port_num << 48) | ((uint64_t) lid << 32) | crc; 213219820Sjeff} 214219820Sjeff 215219820Sjeff/********************************************************************** 216219820Sjeff **********************************************************************/ 217219820Sjeffstatic int __print_num_received(IN uint32_t num_received) 218219820Sjeff{ 219219820Sjeff uint32_t i; 220219820Sjeff 221219820Sjeff /* Series is 10, 20, 50, 100, 200, 500, ... */ 222219820Sjeff i = num_received; 223219820Sjeff while (i >= 10) { 224219820Sjeff if (i % 10) 225219820Sjeff break; 226219820Sjeff i = i / 10; 227219820Sjeff } 228219820Sjeff 229219820Sjeff if (i == 1 || i == 2 || i == 5) 230219820Sjeff return 1; 231219820Sjeff else 232219820Sjeff return 0; 233219820Sjeff} 234219820Sjeff 235219820Sjeffstatic int disable_port(osm_sm_t *sm, osm_physp_t *p) 236219820Sjeff{ 237219820Sjeff uint8_t payload[IB_SMP_DATA_SIZE]; 238219820Sjeff osm_madw_context_t context; 239219820Sjeff ib_port_info_t *pi = (ib_port_info_t *)payload; 240219820Sjeff int ret; 241219820Sjeff 242219820Sjeff /* select the nearest port to master opensm */ 243219820Sjeff if (p->p_remote_physp && 244219820Sjeff p->dr_path.hop_count > p->p_remote_physp->dr_path.hop_count) 245219820Sjeff p = p->p_remote_physp; 246219820Sjeff 247219820Sjeff /* If trap 131, might want to disable peer port if available */ 248219820Sjeff /* but peer port has been observed not to respond to SM requests */ 249219820Sjeff 250219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3810: " 251219820Sjeff "Disabling physical port 0x%016" PRIx64 " num:%u\n", 252219820Sjeff cl_ntoh64(osm_physp_get_port_guid(p)), p->port_num); 253219820Sjeff 254219820Sjeff memcpy(payload, &p->port_info, sizeof(ib_port_info_t)); 255219820Sjeff 256219820Sjeff /* Set port to disabled/down */ 257219820Sjeff ib_port_info_set_port_state(pi, IB_LINK_DOWN); 258219820Sjeff ib_port_info_set_port_phys_state(IB_PORT_PHYS_STATE_DISABLED, pi); 259219820Sjeff 260219820Sjeff /* Issue set of PortInfo */ 261219820Sjeff context.pi_context.node_guid = osm_node_get_node_guid(p->p_node); 262219820Sjeff context.pi_context.port_guid = osm_physp_get_port_guid(p); 263219820Sjeff context.pi_context.set_method = TRUE; 264219820Sjeff context.pi_context.light_sweep = FALSE; 265219820Sjeff context.pi_context.active_transition = FALSE; 266219820Sjeff 267219820Sjeff ret = osm_req_set(sm, osm_physp_get_dr_path_ptr(p), 268219820Sjeff payload, sizeof(payload), IB_MAD_ATTR_PORT_INFO, 269219820Sjeff cl_hton32(osm_physp_get_port_num(p)), 270219820Sjeff CL_DISP_MSGID_NONE, &context); 271219820Sjeff if (ret) 272219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3811: " 273219820Sjeff "Request to set PortInfo failed\n"); 274219820Sjeff 275219820Sjeff return ret; 276219820Sjeff} 277219820Sjeff 278219820Sjeff/********************************************************************** 279219820Sjeff **********************************************************************/ 280219820Sjeffstatic void 281219820Sjeff__osm_trap_rcv_process_request(IN osm_sm_t * sm, 282219820Sjeff IN const osm_madw_t * const p_madw) 283219820Sjeff{ 284219820Sjeff uint8_t payload[sizeof(ib_mad_notice_attr_t)]; 285219820Sjeff ib_smp_t *p_smp; 286219820Sjeff ib_mad_notice_attr_t *p_ntci = (ib_mad_notice_attr_t *) payload; 287219820Sjeff ib_api_status_t status; 288219820Sjeff osm_madw_t tmp_madw; /* we need a copy to last after repress */ 289219820Sjeff uint64_t trap_key; 290219820Sjeff uint32_t num_received; 291219820Sjeff osm_physp_t *p_physp; 292219820Sjeff cl_ptr_vector_t *p_tbl; 293219820Sjeff osm_port_t *p_port; 294219820Sjeff ib_net16_t source_lid = 0; 295219820Sjeff boolean_t is_gsi = TRUE; 296219820Sjeff uint8_t port_num = 0; 297219820Sjeff boolean_t physp_change_trap = FALSE; 298219820Sjeff uint64_t event_wheel_timeout = OSM_DEFAULT_TRAP_SUPRESSION_TIMEOUT; 299219820Sjeff boolean_t run_heavy_sweep = FALSE; 300219820Sjeff 301219820Sjeff OSM_LOG_ENTER(sm->p_log); 302219820Sjeff 303219820Sjeff CL_ASSERT(p_madw); 304219820Sjeff 305219820Sjeff if (osm_exit_flag) 306219820Sjeff /* 307219820Sjeff We got an exit flag - do nothing 308219820Sjeff Otherwise we start a sweep on the trap 144 caused by 309219820Sjeff cleaning up SM Cap bit... 310219820Sjeff */ 311219820Sjeff goto Exit; 312219820Sjeff 313219820Sjeff /* update the is_gsi flag according to the mgmt_class field */ 314219820Sjeff if (p_madw->p_mad->mgmt_class == IB_MCLASS_SUBN_LID || 315219820Sjeff p_madw->p_mad->mgmt_class == IB_MCLASS_SUBN_DIR) 316219820Sjeff is_gsi = FALSE; 317219820Sjeff 318219820Sjeff /* No real need to grab the lock for this function. */ 319219820Sjeff memset(payload, 0, sizeof(payload)); 320219820Sjeff memset(&tmp_madw, 0, sizeof(tmp_madw)); 321219820Sjeff 322219820Sjeff p_smp = osm_madw_get_smp_ptr(p_madw); 323219820Sjeff 324219820Sjeff if (p_smp->method != IB_MAD_METHOD_TRAP) { 325219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3801: " 326219820Sjeff "Unsupported method 0x%X\n", p_smp->method); 327219820Sjeff goto Exit; 328219820Sjeff } 329219820Sjeff 330219820Sjeff /* 331219820Sjeff * The NOTICE Attribute is part of the SMP CLASS attributes 332219820Sjeff * As such the actual attribute data resides inside the SMP 333219820Sjeff * payload. 334219820Sjeff */ 335219820Sjeff 336219820Sjeff memcpy(payload, &(p_smp->data), IB_SMP_DATA_SIZE); 337219820Sjeff memcpy(&tmp_madw, p_madw, sizeof(tmp_madw)); 338219820Sjeff 339219820Sjeff if (is_gsi == FALSE) { 340219820Sjeff /* We are in smi flow */ 341219820Sjeff /* 342219820Sjeff * When we received a TRAP with dlid = 0 - it means it 343219820Sjeff * came from our own node. So we need to fix it. 344219820Sjeff */ 345219820Sjeff 346219820Sjeff if (p_madw->mad_addr.addr_type.smi.source_lid == 0) { 347219820Sjeff /* Check if the sm_base_lid is 0. If yes - this means 348219820Sjeff that the local lid wasn't configured yet. Don't send 349219820Sjeff a response to the trap. */ 350219820Sjeff if (sm->p_subn->sm_base_lid == 0) { 351219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_DEBUG, 352219820Sjeff "Received SLID=0 Trap with local LID=0. Ignoring MAD\n"); 353219820Sjeff goto Exit; 354219820Sjeff } 355219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_DEBUG, 356219820Sjeff "Received SLID=0 Trap. Using local LID:%u instead\n", 357219820Sjeff cl_ntoh16(sm->p_subn->sm_base_lid)); 358219820Sjeff tmp_madw.mad_addr.addr_type.smi.source_lid = 359219820Sjeff sm->p_subn->sm_base_lid; 360219820Sjeff } 361219820Sjeff 362219820Sjeff source_lid = tmp_madw.mad_addr.addr_type.smi.source_lid; 363219820Sjeff 364219820Sjeff /* Print some info about the incoming Trap */ 365219820Sjeff if (ib_notice_is_generic(p_ntci)) { 366219820Sjeff if ((p_ntci->g_or_v.generic.trap_num == CL_HTON16(129)) 367219820Sjeff || (p_ntci->g_or_v.generic.trap_num == 368219820Sjeff CL_HTON16(130)) 369219820Sjeff || (p_ntci->g_or_v.generic.trap_num == 370219820Sjeff CL_HTON16(131))) 371219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_ERROR, 372219820Sjeff "Received Generic Notice type:%u " 373219820Sjeff "num:%u (%s) Producer:%u (%s) " 374219820Sjeff "from LID:%u Port %d TID:0x%016" 375219820Sjeff PRIx64 "\n", ib_notice_get_type(p_ntci), 376219820Sjeff cl_ntoh16(p_ntci->g_or_v.generic. 377219820Sjeff trap_num), 378219820Sjeff ib_get_trap_str(p_ntci->g_or_v.generic. 379219820Sjeff trap_num), 380219820Sjeff cl_ntoh32(ib_notice_get_prod_type 381219820Sjeff (p_ntci)), 382219820Sjeff ib_get_producer_type_str 383219820Sjeff (ib_notice_get_prod_type(p_ntci)), 384219820Sjeff cl_hton16(source_lid), 385219820Sjeff p_ntci->data_details.ntc_129_131. 386219820Sjeff port_num, cl_ntoh64(p_smp->trans_id)); 387219820Sjeff else 388219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_ERROR, 389219820Sjeff "Received Generic Notice type:%u " 390219820Sjeff "num:%u (%s) Producer:%u (%s) " 391219820Sjeff "from LID:%u TID:0x%016" PRIx64 392219820Sjeff "\n", ib_notice_get_type(p_ntci), 393219820Sjeff cl_ntoh16(p_ntci->g_or_v.generic. 394219820Sjeff trap_num), 395219820Sjeff ib_get_trap_str(p_ntci->g_or_v.generic. 396219820Sjeff trap_num), 397219820Sjeff cl_ntoh32(ib_notice_get_prod_type 398219820Sjeff (p_ntci)), 399219820Sjeff ib_get_producer_type_str 400219820Sjeff (ib_notice_get_prod_type(p_ntci)), 401219820Sjeff cl_hton16(source_lid), 402219820Sjeff cl_ntoh64(p_smp->trans_id)); 403219820Sjeff } else 404219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_ERROR, 405219820Sjeff "Received Vendor Notice type:%u vend:0x%06X " 406219820Sjeff "dev:%u from LID:%u TID:0x%016" PRIx64 "\n", 407219820Sjeff ib_notice_get_type(p_ntci), 408219820Sjeff cl_ntoh32(ib_notice_get_vend_id(p_ntci)), 409219820Sjeff cl_ntoh16(p_ntci->g_or_v.vend.dev_id), 410219820Sjeff cl_ntoh16(source_lid), 411219820Sjeff cl_ntoh64(p_smp->trans_id)); 412219820Sjeff } 413219820Sjeff 414219820Sjeff osm_dump_notice(sm->p_log, p_ntci, OSM_LOG_VERBOSE); 415219820Sjeff 416219820Sjeff p_physp = osm_get_physp_by_mad_addr(sm->p_log, 417219820Sjeff sm->p_subn, &tmp_madw.mad_addr); 418219820Sjeff if (p_physp) 419219820Sjeff p_smp->m_key = p_physp->port_info.m_key; 420219820Sjeff else 421219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3809: " 422219820Sjeff "Failed to find source physical port for trap\n"); 423219820Sjeff 424219820Sjeff status = osm_resp_send(sm, &tmp_madw, 0, payload); 425219820Sjeff if (status != IB_SUCCESS) { 426219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3802: " 427219820Sjeff "Error sending response (%s)\n", 428219820Sjeff ib_get_err_str(status)); 429219820Sjeff goto Exit; 430219820Sjeff } 431219820Sjeff 432219820Sjeff /* 433219820Sjeff * We would like to filter out recurring Traps so we track them by 434219820Sjeff * their source lid and content. If the same trap was already 435219820Sjeff * received within the aging time window more than 10 times, 436219820Sjeff * we simply ignore it. This is done only if we are in smi mode 437219820Sjeff */ 438219820Sjeff 439219820Sjeff if (is_gsi == FALSE) { 440219820Sjeff if (ib_notice_is_generic(p_ntci) && 441219820Sjeff ((p_ntci->g_or_v.generic.trap_num == CL_HTON16(129)) || 442219820Sjeff (p_ntci->g_or_v.generic.trap_num == CL_HTON16(130)) || 443219820Sjeff (p_ntci->g_or_v.generic.trap_num == CL_HTON16(131)))) { 444219820Sjeff /* If this is a trap 129, 130, or 131 - then this is a 445219820Sjeff * trap signaling a change on a physical port. 446219820Sjeff * Mark the physp_change_trap flag as TRUE. 447219820Sjeff */ 448219820Sjeff physp_change_trap = TRUE; 449219820Sjeff /* The source_lid should be based on the source_lid from the trap */ 450219820Sjeff source_lid = p_ntci->data_details.ntc_129_131.lid; 451219820Sjeff } 452219820Sjeff 453219820Sjeff /* If physp_change_trap is TRUE - the key will include the port number. 454219820Sjeff If not - the port_number in the key will be zero. */ 455219820Sjeff if (physp_change_trap == TRUE) { 456219820Sjeff port_num = p_ntci->data_details.ntc_129_131.port_num; 457219820Sjeff __osm_trap_get_key(source_lid, port_num, p_ntci, 458219820Sjeff &trap_key); 459219820Sjeff } else 460219820Sjeff __osm_trap_get_key(source_lid, 0, p_ntci, &trap_key); 461219820Sjeff 462219820Sjeff /* try to find it in the aging tracker */ 463219820Sjeff num_received = 464219820Sjeff cl_event_wheel_num_regs(&sm->trap_aging_tracker, 465219820Sjeff trap_key); 466219820Sjeff 467219820Sjeff /* Now we know how many times it provided this trap */ 468219820Sjeff if (num_received > 10) { 469219820Sjeff if (__print_num_received(num_received)) 470219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3804: " 471219820Sjeff "Received trap %u times consecutively\n", 472219820Sjeff num_received); 473219820Sjeff /* 474219820Sjeff * If the trap provides info about a bad port 475219820Sjeff * we mark it as unhealthy. 476219820Sjeff */ 477219820Sjeff if (physp_change_trap == TRUE) { 478219820Sjeff /* get the port */ 479219820Sjeff p_physp = get_physp_by_lid_and_num(sm, 480219820Sjeff cl_ntoh16 481219820Sjeff (p_ntci-> 482219820Sjeff data_details. 483219820Sjeff ntc_129_131. 484219820Sjeff lid), 485219820Sjeff port_num); 486219820Sjeff 487219820Sjeff if (!p_physp) 488219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_ERROR, 489219820Sjeff "ERR 3805: " 490219820Sjeff "Failed to find physical port by lid:%u num:%u\n", 491219820Sjeff cl_ntoh16(p_ntci->data_details. 492219820Sjeff ntc_129_131.lid), 493219820Sjeff p_ntci->data_details. 494219820Sjeff ntc_129_131.port_num); 495219820Sjeff else { 496219820Sjeff /* When babbling port policy option is enabled and 497219820Sjeff Threshold for disabling a "babbling" port is exceeded */ 498219820Sjeff if (sm->p_subn->opt. 499219820Sjeff babbling_port_policy 500219820Sjeff && num_received >= 250 501219820Sjeff && disable_port(sm, p_physp) == 0) 502219820Sjeff goto Exit; 503219820Sjeff 504219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, 505219820Sjeff "Marking unhealthy physical port by lid:%u num:%u\n", 506219820Sjeff cl_ntoh16(p_ntci->data_details. 507219820Sjeff ntc_129_131.lid), 508219820Sjeff p_ntci->data_details. 509219820Sjeff ntc_129_131.port_num); 510219820Sjeff /* check if the current state of the p_physp is healthy. If 511219820Sjeff it is - then this is a first change of state. Run a heavy sweep. 512219820Sjeff if it is not - no need to mark it again - just restart the timer. */ 513219820Sjeff if (osm_physp_is_healthy(p_physp)) { 514219820Sjeff osm_physp_set_health(p_physp, 515219820Sjeff FALSE); 516219820Sjeff /* Make sure we sweep again - force a heavy sweep. */ 517219820Sjeff /* The sweep should be done only after the re-registration, or 518219820Sjeff else we'll be losing track of the timer. */ 519219820Sjeff run_heavy_sweep = TRUE; 520219820Sjeff } 521219820Sjeff /* If we are marking the port as unhealthy - we want to 522219820Sjeff keep this for a longer period of time than the 523219820Sjeff OSM_DEFAULT_TRAP_SUPRESSION_TIMEOUT. Use the 524219820Sjeff OSM_DEFAULT_UNHEALTHY_TIMEOUT */ 525219820Sjeff event_wheel_timeout = 526219820Sjeff OSM_DEFAULT_UNHEALTHY_TIMEOUT; 527219820Sjeff } 528219820Sjeff } 529219820Sjeff } 530219820Sjeff 531219820Sjeff /* restart the aging anyway */ 532219820Sjeff /* If physp_change_trap is TRUE - then use a callback to unset the 533219820Sjeff healthy bit. If not - no need to use a callback. */ 534219820Sjeff if (physp_change_trap == TRUE) 535219820Sjeff cl_event_wheel_reg(&sm->trap_aging_tracker, trap_key, cl_get_time_stamp() + event_wheel_timeout, osm_trap_rcv_aging_tracker_callback, /* no callback */ 536219820Sjeff sm /* no context */ ); 537219820Sjeff else 538219820Sjeff cl_event_wheel_reg(&sm->trap_aging_tracker, trap_key, cl_get_time_stamp() + event_wheel_timeout, NULL, /* no callback */ 539219820Sjeff NULL /* no context */ ); 540219820Sjeff 541219820Sjeff /* If was already registered do nothing more */ 542219820Sjeff if (num_received > 10 && run_heavy_sweep == FALSE) { 543219820Sjeff if (__print_num_received(num_received)) 544219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, 545219820Sjeff "Continuously received this trap %u times. Ignoring\n", 546219820Sjeff num_received); 547219820Sjeff goto Exit; 548219820Sjeff } 549219820Sjeff } 550219820Sjeff 551219820Sjeff /* Check for node description update. IB Spec v1.2.1 pg 823 */ 552219820Sjeff if ((p_ntci->data_details.ntc_144.local_changes & TRAP_144_MASK_OTHER_LOCAL_CHANGES) && 553219820Sjeff (p_ntci->data_details.ntc_144.change_flgs & TRAP_144_MASK_NODE_DESCRIPTION_CHANGE) 554219820Sjeff ) { 555219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_INFO, "Trap 144 Node description update\n"); 556219820Sjeff 557219820Sjeff if (p_physp) { 558219820Sjeff CL_PLOCK_ACQUIRE(sm->p_lock); 559219820Sjeff osm_req_get_node_desc(sm, p_physp); 560219820Sjeff CL_PLOCK_RELEASE(sm->p_lock); 561219820Sjeff } else { 562219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_ERROR, 563219820Sjeff "ERR 3812: No physical port found for " 564219820Sjeff "trap 144: \"node description update\"\n"); 565219820Sjeff } 566219820Sjeff } 567219820Sjeff 568219820Sjeff /* do a sweep if we received a trap */ 569219820Sjeff if (sm->p_subn->opt.sweep_on_trap) { 570219820Sjeff /* if this is trap number 128 or run_heavy_sweep is TRUE - update the 571219820Sjeff force_single_heavy_sweep flag of the subnet. 572219820Sjeff Sweep also on traps 144/145 - these traps signal a change of a certain 573219820Sjeff port capability/system image guid. 574219820Sjeff TODO: In the future we can change this to just getting PortInfo on 575219820Sjeff this port instead of sweeping the entire subnet. */ 576219820Sjeff if (ib_notice_is_generic(p_ntci) && 577219820Sjeff ((cl_ntoh16(p_ntci->g_or_v.generic.trap_num) == 128) || 578219820Sjeff (cl_ntoh16(p_ntci->g_or_v.generic.trap_num) == 144) || 579219820Sjeff (cl_ntoh16(p_ntci->g_or_v.generic.trap_num) == 145) || 580219820Sjeff run_heavy_sweep)) { 581219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, 582219820Sjeff "Forcing heavy sweep. Received trap:%u\n", 583219820Sjeff cl_ntoh16(p_ntci->g_or_v.generic.trap_num)); 584219820Sjeff 585219820Sjeff sm->p_subn->force_heavy_sweep = TRUE; 586219820Sjeff } 587219820Sjeff osm_sm_signal(sm, OSM_SIGNAL_SWEEP); 588219820Sjeff } 589219820Sjeff 590219820Sjeff /* If we reached here due to trap 129/130/131 - do not need to do 591219820Sjeff the notice report. Just goto exit. We know this is the case 592219820Sjeff if physp_change_trap is TRUE. */ 593219820Sjeff if (physp_change_trap == TRUE) 594219820Sjeff goto Exit; 595219820Sjeff 596219820Sjeff /* Add a call to osm_report_notice */ 597219820Sjeff /* We are going to report the notice - so need to fix the IssuerGID 598219820Sjeff accordingly. See IBA 1.2 p.739 or IBA 1.1 p.653 for details. */ 599219820Sjeff if (is_gsi) { 600219820Sjeff if (!tmp_madw.mad_addr.addr_type.gsi.global_route) { 601219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3806: " 602219820Sjeff "Received gsi trap with global_route FALSE. " 603219820Sjeff "Cannot update issuer_gid!\n"); 604219820Sjeff goto Exit; 605219820Sjeff } 606219820Sjeff memcpy(&(p_ntci->issuer_gid), 607219820Sjeff &(tmp_madw.mad_addr.addr_type.gsi.grh_info.src_gid), 608219820Sjeff sizeof(ib_gid_t)); 609219820Sjeff } else { 610219820Sjeff /* Need to use the IssuerLID */ 611219820Sjeff p_tbl = &sm->p_subn->port_lid_tbl; 612219820Sjeff 613219820Sjeff CL_ASSERT(cl_ptr_vector_get_size(p_tbl) < 0x10000); 614219820Sjeff 615219820Sjeff if ((uint16_t) cl_ptr_vector_get_size(p_tbl) <= 616219820Sjeff cl_ntoh16(source_lid)) { 617219820Sjeff /* the source lid is out of range */ 618219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, 619219820Sjeff "source lid is out of range:%u\n", 620219820Sjeff cl_ntoh16(source_lid)); 621219820Sjeff 622219820Sjeff goto Exit; 623219820Sjeff } 624219820Sjeff p_port = cl_ptr_vector_get(p_tbl, cl_ntoh16(source_lid)); 625219820Sjeff if (p_port == 0) { 626219820Sjeff /* We have the lid - but no corresponding port */ 627219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, 628219820Sjeff "Cannot find port corresponding to lid:%u\n", 629219820Sjeff cl_ntoh16(source_lid)); 630219820Sjeff 631219820Sjeff goto Exit; 632219820Sjeff } 633219820Sjeff 634219820Sjeff p_ntci->issuer_gid.unicast.prefix = 635219820Sjeff sm->p_subn->opt.subnet_prefix; 636219820Sjeff p_ntci->issuer_gid.unicast.interface_id = p_port->guid; 637219820Sjeff } 638219820Sjeff 639219820Sjeff /* we need a lock here as the InformInfo DB must be stable */ 640219820Sjeff CL_PLOCK_ACQUIRE(sm->p_lock); 641219820Sjeff status = osm_report_notice(sm->p_log, sm->p_subn, p_ntci); 642219820Sjeff CL_PLOCK_RELEASE(sm->p_lock); 643219820Sjeff if (status != IB_SUCCESS) { 644219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3803: " 645219820Sjeff "Error sending trap reports (%s)\n", 646219820Sjeff ib_get_err_str(status)); 647219820Sjeff goto Exit; 648219820Sjeff } 649219820Sjeff 650219820SjeffExit: 651219820Sjeff OSM_LOG_EXIT(sm->p_log); 652219820Sjeff} 653219820Sjeff 654219820Sjeff#if 0 655219820Sjeff/********************************************************************** 656219820Sjeff CURRENTLY WE ARE NOT CREATING TRAPS - SO THIS CALL IS AN ERROR 657219820Sjeff**********************************************************************/ 658219820Sjeffstatic void 659219820Sjeff__osm_trap_rcv_process_sm(IN osm_sm_t * sm, 660219820Sjeff IN const osm_remote_sm_t * const p_sm) 661219820Sjeff{ 662219820Sjeff /* const ib_sm_info_t* p_smi; */ 663219820Sjeff 664219820Sjeff OSM_LOG_ENTER(sm->p_log); 665219820Sjeff 666219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3807: " 667219820Sjeff "This function is not supported yet\n"); 668219820Sjeff 669219820Sjeff OSM_LOG_EXIT(sm->p_log); 670219820Sjeff} 671219820Sjeff#endif 672219820Sjeff 673219820Sjeff/********************************************************************** 674219820Sjeff CURRENTLY WE ARE NOT CREATING TRAPS - SO THIS CALL IN AN ERROR 675219820Sjeff**********************************************************************/ 676219820Sjeffstatic void 677219820Sjeff__osm_trap_rcv_process_response(IN osm_sm_t * sm, 678219820Sjeff IN const osm_madw_t * const p_madw) 679219820Sjeff{ 680219820Sjeff 681219820Sjeff OSM_LOG_ENTER(sm->p_log); 682219820Sjeff 683219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3808: " 684219820Sjeff "This function is not supported yet\n"); 685219820Sjeff 686219820Sjeff OSM_LOG_EXIT(sm->p_log); 687219820Sjeff} 688219820Sjeff 689219820Sjeff/********************************************************************** 690219820Sjeff **********************************************************************/ 691219820Sjeffvoid osm_trap_rcv_process(IN void *context, IN void *data) 692219820Sjeff{ 693219820Sjeff osm_sm_t *sm = context; 694219820Sjeff osm_madw_t *p_madw = data; 695219820Sjeff ib_smp_t *p_smp; 696219820Sjeff 697219820Sjeff OSM_LOG_ENTER(sm->p_log); 698219820Sjeff 699219820Sjeff CL_ASSERT(p_madw); 700219820Sjeff 701219820Sjeff p_smp = osm_madw_get_smp_ptr(p_madw); 702219820Sjeff 703219820Sjeff /* 704219820Sjeff Determine if this is a request for our own Trap 705219820Sjeff or if this is a response to our request for another 706219820Sjeff SM's Trap. 707219820Sjeff */ 708219820Sjeff if (ib_smp_is_response(p_smp)) 709219820Sjeff __osm_trap_rcv_process_response(sm, p_madw); 710219820Sjeff else 711219820Sjeff __osm_trap_rcv_process_request(sm, p_madw); 712219820Sjeff 713219820Sjeff OSM_LOG_EXIT(sm->p_log); 714219820Sjeff} 715