1219820Sjeff/*
2219820Sjeff * Copyright (c) 2004-2008 Voltaire, Inc. All rights reserved.
3219820Sjeff * Copyright (c) 2002-2006 Mellanox Technologies LTD. All rights reserved.
4219820Sjeff * Copyright (c) 1996-2003 Intel Corporation. All rights reserved.
5219820Sjeff *
6219820Sjeff * This software is available to you under a choice of one of two
7219820Sjeff * licenses.  You may choose to be licensed under the terms of the GNU
8219820Sjeff * General Public License (GPL) Version 2, available from the file
9219820Sjeff * COPYING in the main directory of this source tree, or the
10219820Sjeff * OpenIB.org BSD license below:
11219820Sjeff *
12219820Sjeff *     Redistribution and use in source and binary forms, with or
13219820Sjeff *     without modification, are permitted provided that the following
14219820Sjeff *     conditions are met:
15219820Sjeff *
16219820Sjeff *      - Redistributions of source code must retain the above
17219820Sjeff *        copyright notice, this list of conditions and the following
18219820Sjeff *        disclaimer.
19219820Sjeff *
20219820Sjeff *      - Redistributions in binary form must reproduce the above
21219820Sjeff *        copyright notice, this list of conditions and the following
22219820Sjeff *        disclaimer in the documentation and/or other materials
23219820Sjeff *        provided with the distribution.
24219820Sjeff *
25219820Sjeff * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26219820Sjeff * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27219820Sjeff * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28219820Sjeff * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29219820Sjeff * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30219820Sjeff * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31219820Sjeff * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32219820Sjeff * SOFTWARE.
33219820Sjeff *
34219820Sjeff */
35219820Sjeff
36219820Sjeff/*
37219820Sjeff * Abstract:
38219820Sjeff *    Implementation of osm_trap_rcv_t.
39219820Sjeff * This object represents the Trap Receiver object.
40219820Sjeff * This object is part of the opensm family of objects.
41219820Sjeff */
42219820Sjeff
43219820Sjeff#if HAVE_CONFIG_H
44219820Sjeff#  include <config.h>
45219820Sjeff#endif				/* HAVE_CONFIG_H */
46219820Sjeff
47219820Sjeff#include <string.h>
48219820Sjeff#include <iba/ib_types.h>
49219820Sjeff#include <complib/cl_qmap.h>
50219820Sjeff#include <complib/cl_debug.h>
51219820Sjeff#include <opensm/osm_madw.h>
52219820Sjeff#include <opensm/osm_log.h>
53219820Sjeff#include <opensm/osm_node.h>
54219820Sjeff#include <opensm/osm_helper.h>
55219820Sjeff#include <opensm/osm_subnet.h>
56219820Sjeff#include <opensm/osm_inform.h>
57219820Sjeff#include <opensm/osm_opensm.h>
58219820Sjeff
59219820Sjeffextern void osm_req_get_node_desc(IN osm_sm_t * sm, osm_physp_t *p_physp);
60219820Sjeff
61219820Sjeff/**********************************************************************
62219820Sjeff *
63219820Sjeff * TRAP HANDLING:
64219820Sjeff *
65219820Sjeff * Assuming traps can be caused by bad hardware we should provide
66219820Sjeff * a mechanism for filtering their propagation into the actual logic
67219820Sjeff * of OpenSM such that it is not overloaded by them.
68219820Sjeff *
69219820Sjeff * We will provide a trap filtering mechanism with "Aging" capability.
70219820Sjeff * This mechanism will track incoming traps, clasify them by their
71219820Sjeff * source and content and provide back their age.
72219820Sjeff *
73219820Sjeff * A timer running in the background will toggle a timer counter
74219820Sjeff * that should be referenced by the aging algorithm.
75219820Sjeff * To provide an efficient handling of aging. We also track all traps
76219820Sjeff * in a sorted list by their aging.
77219820Sjeff *
78219820Sjeff * The generic Aging Tracker mechanism is implemented in the
79219820Sjeff * cl_aging_tracker object.
80219820Sjeff *
81219820Sjeff **********************************************************************/
82219820Sjeff
83219820Sjefftypedef struct osm_trap_agingracker_context {
84219820Sjeff	osm_log_t *p_log;
85219820Sjeff	osm_physp_t *p_physp;
86219820Sjeff} osm_trap_aging_tracker_context_t;
87219820Sjeff
88219820Sjeff/**********************************************************************
89219820Sjeff **********************************************************************/
90219820Sjeffstatic osm_physp_t *get_physp_by_lid_and_num(IN osm_sm_t * sm,
91219820Sjeff					     IN uint16_t lid, IN uint8_t num)
92219820Sjeff{
93219820Sjeff	cl_ptr_vector_t *p_vec = &(sm->p_subn->port_lid_tbl);
94219820Sjeff	osm_port_t *p_port;
95219820Sjeff
96219820Sjeff	if (lid > cl_ptr_vector_get_size(p_vec))
97219820Sjeff		return NULL;
98219820Sjeff
99219820Sjeff	p_port = (osm_port_t *) cl_ptr_vector_get(p_vec, lid);
100219820Sjeff	if (!p_port)
101219820Sjeff		return NULL;
102219820Sjeff
103219820Sjeff	if (osm_node_get_num_physp(p_port->p_node) < num)
104219820Sjeff		return NULL;
105219820Sjeff
106219820Sjeff	return osm_node_get_physp_ptr(p_port->p_node, num);
107219820Sjeff}
108219820Sjeff
109219820Sjeff/**********************************************************************
110219820Sjeff **********************************************************************/
111219820Sjeffuint64_t
112219820Sjeffosm_trap_rcv_aging_tracker_callback(IN uint64_t key,
113219820Sjeff				    IN uint32_t num_regs, IN void *context)
114219820Sjeff{
115219820Sjeff	osm_sm_t *sm = context;
116219820Sjeff	uint16_t lid;
117219820Sjeff	uint8_t port_num;
118219820Sjeff	osm_physp_t *p_physp;
119219820Sjeff
120219820Sjeff	OSM_LOG_ENTER(sm->p_log);
121219820Sjeff
122219820Sjeff	if (osm_exit_flag)
123219820Sjeff		/* We got an exit flag - do nothing */
124219820Sjeff		return 0;
125219820Sjeff
126219820Sjeff	lid = cl_ntoh16((uint16_t) ((key & 0x0000FFFF00000000ULL) >> 32));
127219820Sjeff	port_num = (uint8_t) ((key & 0x00FF000000000000ULL) >> 48);
128219820Sjeff
129219820Sjeff	p_physp = get_physp_by_lid_and_num(sm, lid, port_num);
130219820Sjeff	if (!p_physp)
131219820Sjeff		OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
132219820Sjeff			"Cannot find port num:%u with lid:%u\n",
133219820Sjeff			port_num, lid);
134219820Sjeff	/* make sure the physp is still valid */
135219820Sjeff	/* If the health port was false - set it to true */
136219820Sjeff	else if (!osm_physp_is_healthy(p_physp)) {
137219820Sjeff		OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
138219820Sjeff			"Clearing health bit of port num:%u with lid:%u\n",
139219820Sjeff			port_num, lid);
140219820Sjeff
141219820Sjeff		/* Clear its health bit */
142219820Sjeff		osm_physp_set_health(p_physp, TRUE);
143219820Sjeff	}
144219820Sjeff
145219820Sjeff	OSM_LOG_EXIT(sm->p_log);
146219820Sjeff
147219820Sjeff	/* We want to remove the event from the tracker - so
148219820Sjeff	   need to return zero. */
149219820Sjeff	return 0;
150219820Sjeff}
151219820Sjeff
152219820Sjeff/**********************************************************************
153219820Sjeff * CRC calculation for notice identification
154219820Sjeff **********************************************************************/
155219820Sjeff
156219820Sjeff#define CRC32_POLYNOMIAL   0xEDB88320L
157219820Sjeff
158219820Sjeff/* calculate the crc for a given buffer */
159219820Sjeffstatic uint32_t __osm_trap_calc_crc32(void *buffer, uint32_t count)
160219820Sjeff{
161219820Sjeff	uint32_t temp1, temp2;
162219820Sjeff	uint32_t crc = -1L;
163219820Sjeff	unsigned char *p = (unsigned char *)buffer;
164219820Sjeff	/* pre - calculated table for faster crc calculation */
165219820Sjeff	static uint32_t crc_table[256];
166219820Sjeff	static boolean_t first = TRUE;
167219820Sjeff	int i, j;
168219820Sjeff
169219820Sjeff	/* if we need to initialize the lookup table */
170219820Sjeff	if (first) {
171219820Sjeff		/* calc the CRC table */
172219820Sjeff		for (i = 0; i <= 255; i++) {
173219820Sjeff			crc = i;
174219820Sjeff			for (j = 8; j > 0; j--)
175219820Sjeff				if (crc & 1)
176219820Sjeff					crc = (crc >> 1) ^ CRC32_POLYNOMIAL;
177219820Sjeff				else
178219820Sjeff					crc >>= 1;
179219820Sjeff			crc_table[i] = crc;
180219820Sjeff		}
181219820Sjeff		first = FALSE;
182219820Sjeff	}
183219820Sjeff
184219820Sjeff	crc = -1L;
185219820Sjeff	/* do the calculation */
186219820Sjeff	while (count-- != 0) {
187219820Sjeff		temp1 = (crc >> 8) & 0x00FFFFFFL;
188219820Sjeff		temp2 = crc_table[((int)crc ^ *p++) & 0xFF];
189219820Sjeff		crc = temp1 ^ temp2;
190219820Sjeff	}
191219820Sjeff	return crc;
192219820Sjeff}
193219820Sjeff
194219820Sjeff/********************************************************************
195219820Sjeff ********************************************************************/
196219820Sjeff
197219820Sjeff/* The key is created in the following manner:
198219820Sjeff   port_num  lid   crc
199219820Sjeff   \______/ \___/ \___/
200219820Sjeff     16b     16b   32b
201219820Sjeff*/
202219820Sjeffstatic void
203219820Sjeff__osm_trap_get_key(IN uint16_t lid,
204219820Sjeff		   IN uint8_t port_num,
205219820Sjeff		   IN ib_mad_notice_attr_t * p_ntci, OUT uint64_t * trap_key)
206219820Sjeff{
207219820Sjeff	uint32_t crc = 0;
208219820Sjeff
209219820Sjeff	CL_ASSERT(trap_key);
210219820Sjeff
211219820Sjeff	crc = __osm_trap_calc_crc32(p_ntci, sizeof(ib_mad_notice_attr_t));
212219820Sjeff	*trap_key = ((uint64_t) port_num << 48) | ((uint64_t) lid << 32) | crc;
213219820Sjeff}
214219820Sjeff
215219820Sjeff/**********************************************************************
216219820Sjeff **********************************************************************/
217219820Sjeffstatic int __print_num_received(IN uint32_t num_received)
218219820Sjeff{
219219820Sjeff	uint32_t i;
220219820Sjeff
221219820Sjeff	/* Series is 10, 20, 50, 100, 200, 500, ... */
222219820Sjeff	i = num_received;
223219820Sjeff	while (i >= 10) {
224219820Sjeff		if (i % 10)
225219820Sjeff			break;
226219820Sjeff		i = i / 10;
227219820Sjeff	}
228219820Sjeff
229219820Sjeff	if (i == 1 || i == 2 || i == 5)
230219820Sjeff		return 1;
231219820Sjeff	else
232219820Sjeff		return 0;
233219820Sjeff}
234219820Sjeff
235219820Sjeffstatic int disable_port(osm_sm_t *sm, osm_physp_t *p)
236219820Sjeff{
237219820Sjeff	uint8_t payload[IB_SMP_DATA_SIZE];
238219820Sjeff	osm_madw_context_t context;
239219820Sjeff	ib_port_info_t *pi = (ib_port_info_t *)payload;
240219820Sjeff	int ret;
241219820Sjeff
242219820Sjeff	/* select the nearest port to master opensm */
243219820Sjeff	if (p->p_remote_physp &&
244219820Sjeff	    p->dr_path.hop_count > p->p_remote_physp->dr_path.hop_count)
245219820Sjeff		p = p->p_remote_physp;
246219820Sjeff
247219820Sjeff	/* If trap 131, might want to disable peer port if available */
248219820Sjeff	/* but peer port has been observed not to respond to SM requests */
249219820Sjeff
250219820Sjeff	OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3810: "
251219820Sjeff		"Disabling physical port 0x%016" PRIx64 " num:%u\n",
252219820Sjeff		cl_ntoh64(osm_physp_get_port_guid(p)), p->port_num);
253219820Sjeff
254219820Sjeff	memcpy(payload, &p->port_info, sizeof(ib_port_info_t));
255219820Sjeff
256219820Sjeff	/* Set port to disabled/down */
257219820Sjeff	ib_port_info_set_port_state(pi, IB_LINK_DOWN);
258219820Sjeff	ib_port_info_set_port_phys_state(IB_PORT_PHYS_STATE_DISABLED, pi);
259219820Sjeff
260219820Sjeff	/* Issue set of PortInfo */
261219820Sjeff	context.pi_context.node_guid = osm_node_get_node_guid(p->p_node);
262219820Sjeff	context.pi_context.port_guid = osm_physp_get_port_guid(p);
263219820Sjeff	context.pi_context.set_method = TRUE;
264219820Sjeff	context.pi_context.light_sweep = FALSE;
265219820Sjeff	context.pi_context.active_transition = FALSE;
266219820Sjeff
267219820Sjeff	ret = osm_req_set(sm, osm_physp_get_dr_path_ptr(p),
268219820Sjeff			  payload, sizeof(payload), IB_MAD_ATTR_PORT_INFO,
269219820Sjeff			  cl_hton32(osm_physp_get_port_num(p)),
270219820Sjeff			  CL_DISP_MSGID_NONE, &context);
271219820Sjeff	if (ret)
272219820Sjeff		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3811: "
273219820Sjeff			"Request to set PortInfo failed\n");
274219820Sjeff
275219820Sjeff	return ret;
276219820Sjeff}
277219820Sjeff
278219820Sjeff/**********************************************************************
279219820Sjeff **********************************************************************/
280219820Sjeffstatic void
281219820Sjeff__osm_trap_rcv_process_request(IN osm_sm_t * sm,
282219820Sjeff			       IN const osm_madw_t * const p_madw)
283219820Sjeff{
284219820Sjeff	uint8_t payload[sizeof(ib_mad_notice_attr_t)];
285219820Sjeff	ib_smp_t *p_smp;
286219820Sjeff	ib_mad_notice_attr_t *p_ntci = (ib_mad_notice_attr_t *) payload;
287219820Sjeff	ib_api_status_t status;
288219820Sjeff	osm_madw_t tmp_madw;	/* we need a copy to last after repress */
289219820Sjeff	uint64_t trap_key;
290219820Sjeff	uint32_t num_received;
291219820Sjeff	osm_physp_t *p_physp;
292219820Sjeff	cl_ptr_vector_t *p_tbl;
293219820Sjeff	osm_port_t *p_port;
294219820Sjeff	ib_net16_t source_lid = 0;
295219820Sjeff	boolean_t is_gsi = TRUE;
296219820Sjeff	uint8_t port_num = 0;
297219820Sjeff	boolean_t physp_change_trap = FALSE;
298219820Sjeff	uint64_t event_wheel_timeout = OSM_DEFAULT_TRAP_SUPRESSION_TIMEOUT;
299219820Sjeff	boolean_t run_heavy_sweep = FALSE;
300219820Sjeff
301219820Sjeff	OSM_LOG_ENTER(sm->p_log);
302219820Sjeff
303219820Sjeff	CL_ASSERT(p_madw);
304219820Sjeff
305219820Sjeff	if (osm_exit_flag)
306219820Sjeff		/*
307219820Sjeff		   We got an exit flag - do nothing
308219820Sjeff		   Otherwise we start a sweep on the trap 144 caused by
309219820Sjeff		   cleaning up SM Cap bit...
310219820Sjeff		 */
311219820Sjeff		goto Exit;
312219820Sjeff
313219820Sjeff	/* update the is_gsi flag according to the mgmt_class field */
314219820Sjeff	if (p_madw->p_mad->mgmt_class == IB_MCLASS_SUBN_LID ||
315219820Sjeff	    p_madw->p_mad->mgmt_class == IB_MCLASS_SUBN_DIR)
316219820Sjeff		is_gsi = FALSE;
317219820Sjeff
318219820Sjeff	/* No real need to grab the lock for this function. */
319219820Sjeff	memset(payload, 0, sizeof(payload));
320219820Sjeff	memset(&tmp_madw, 0, sizeof(tmp_madw));
321219820Sjeff
322219820Sjeff	p_smp = osm_madw_get_smp_ptr(p_madw);
323219820Sjeff
324219820Sjeff	if (p_smp->method != IB_MAD_METHOD_TRAP) {
325219820Sjeff		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3801: "
326219820Sjeff			"Unsupported method 0x%X\n", p_smp->method);
327219820Sjeff		goto Exit;
328219820Sjeff	}
329219820Sjeff
330219820Sjeff	/*
331219820Sjeff	 * The NOTICE Attribute is part of the SMP CLASS attributes
332219820Sjeff	 * As such the actual attribute data resides inside the SMP
333219820Sjeff	 * payload.
334219820Sjeff	 */
335219820Sjeff
336219820Sjeff	memcpy(payload, &(p_smp->data), IB_SMP_DATA_SIZE);
337219820Sjeff	memcpy(&tmp_madw, p_madw, sizeof(tmp_madw));
338219820Sjeff
339219820Sjeff	if (is_gsi == FALSE) {
340219820Sjeff		/* We are in smi flow */
341219820Sjeff		/*
342219820Sjeff		 * When we received a TRAP with dlid = 0 - it means it
343219820Sjeff		 * came from our own node. So we need to fix it.
344219820Sjeff		 */
345219820Sjeff
346219820Sjeff		if (p_madw->mad_addr.addr_type.smi.source_lid == 0) {
347219820Sjeff			/* Check if the sm_base_lid is 0. If yes - this means
348219820Sjeff			   that the local lid wasn't configured yet. Don't send
349219820Sjeff			   a response to the trap. */
350219820Sjeff			if (sm->p_subn->sm_base_lid == 0) {
351219820Sjeff				OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
352219820Sjeff					"Received SLID=0 Trap with local LID=0. Ignoring MAD\n");
353219820Sjeff				goto Exit;
354219820Sjeff			}
355219820Sjeff			OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
356219820Sjeff				"Received SLID=0 Trap. Using local LID:%u instead\n",
357219820Sjeff				cl_ntoh16(sm->p_subn->sm_base_lid));
358219820Sjeff			tmp_madw.mad_addr.addr_type.smi.source_lid =
359219820Sjeff			    sm->p_subn->sm_base_lid;
360219820Sjeff		}
361219820Sjeff
362219820Sjeff		source_lid = tmp_madw.mad_addr.addr_type.smi.source_lid;
363219820Sjeff
364219820Sjeff		/* Print some info about the incoming Trap */
365219820Sjeff		if (ib_notice_is_generic(p_ntci)) {
366219820Sjeff			if ((p_ntci->g_or_v.generic.trap_num == CL_HTON16(129))
367219820Sjeff			    || (p_ntci->g_or_v.generic.trap_num ==
368219820Sjeff				CL_HTON16(130))
369219820Sjeff			    || (p_ntci->g_or_v.generic.trap_num ==
370219820Sjeff				CL_HTON16(131)))
371219820Sjeff				OSM_LOG(sm->p_log, OSM_LOG_ERROR,
372219820Sjeff					"Received Generic Notice type:%u "
373219820Sjeff					"num:%u (%s) Producer:%u (%s) "
374219820Sjeff					"from LID:%u Port %d TID:0x%016"
375219820Sjeff					PRIx64 "\n", ib_notice_get_type(p_ntci),
376219820Sjeff					cl_ntoh16(p_ntci->g_or_v.generic.
377219820Sjeff						  trap_num),
378219820Sjeff					ib_get_trap_str(p_ntci->g_or_v.generic.
379219820Sjeff							trap_num),
380219820Sjeff					cl_ntoh32(ib_notice_get_prod_type
381219820Sjeff						  (p_ntci)),
382219820Sjeff					ib_get_producer_type_str
383219820Sjeff					(ib_notice_get_prod_type(p_ntci)),
384219820Sjeff					cl_hton16(source_lid),
385219820Sjeff					p_ntci->data_details.ntc_129_131.
386219820Sjeff					port_num, cl_ntoh64(p_smp->trans_id));
387219820Sjeff			else
388219820Sjeff				OSM_LOG(sm->p_log, OSM_LOG_ERROR,
389219820Sjeff					"Received Generic Notice type:%u "
390219820Sjeff					"num:%u (%s) Producer:%u (%s) "
391219820Sjeff					"from LID:%u TID:0x%016" PRIx64
392219820Sjeff					"\n", ib_notice_get_type(p_ntci),
393219820Sjeff					cl_ntoh16(p_ntci->g_or_v.generic.
394219820Sjeff						  trap_num),
395219820Sjeff					ib_get_trap_str(p_ntci->g_or_v.generic.
396219820Sjeff							trap_num),
397219820Sjeff					cl_ntoh32(ib_notice_get_prod_type
398219820Sjeff						  (p_ntci)),
399219820Sjeff					ib_get_producer_type_str
400219820Sjeff					(ib_notice_get_prod_type(p_ntci)),
401219820Sjeff					cl_hton16(source_lid),
402219820Sjeff					cl_ntoh64(p_smp->trans_id));
403219820Sjeff		} else
404219820Sjeff			OSM_LOG(sm->p_log, OSM_LOG_ERROR,
405219820Sjeff				"Received Vendor Notice type:%u vend:0x%06X "
406219820Sjeff				"dev:%u from LID:%u TID:0x%016" PRIx64 "\n",
407219820Sjeff				ib_notice_get_type(p_ntci),
408219820Sjeff				cl_ntoh32(ib_notice_get_vend_id(p_ntci)),
409219820Sjeff				cl_ntoh16(p_ntci->g_or_v.vend.dev_id),
410219820Sjeff				cl_ntoh16(source_lid),
411219820Sjeff				cl_ntoh64(p_smp->trans_id));
412219820Sjeff	}
413219820Sjeff
414219820Sjeff	osm_dump_notice(sm->p_log, p_ntci, OSM_LOG_VERBOSE);
415219820Sjeff
416219820Sjeff	p_physp = osm_get_physp_by_mad_addr(sm->p_log,
417219820Sjeff					    sm->p_subn, &tmp_madw.mad_addr);
418219820Sjeff	if (p_physp)
419219820Sjeff		p_smp->m_key = p_physp->port_info.m_key;
420219820Sjeff	else
421219820Sjeff		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3809: "
422219820Sjeff			"Failed to find source physical port for trap\n");
423219820Sjeff
424219820Sjeff	status = osm_resp_send(sm, &tmp_madw, 0, payload);
425219820Sjeff	if (status != IB_SUCCESS) {
426219820Sjeff		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3802: "
427219820Sjeff			"Error sending response (%s)\n",
428219820Sjeff			ib_get_err_str(status));
429219820Sjeff		goto Exit;
430219820Sjeff	}
431219820Sjeff
432219820Sjeff	/*
433219820Sjeff	 * We would like to filter out recurring Traps so we track them by
434219820Sjeff	 * their source lid and content. If the same trap was already
435219820Sjeff	 * received within the aging time window more than 10 times,
436219820Sjeff	 * we simply ignore it. This is done only if we are in smi mode
437219820Sjeff	 */
438219820Sjeff
439219820Sjeff	if (is_gsi == FALSE) {
440219820Sjeff		if (ib_notice_is_generic(p_ntci) &&
441219820Sjeff		    ((p_ntci->g_or_v.generic.trap_num == CL_HTON16(129)) ||
442219820Sjeff		     (p_ntci->g_or_v.generic.trap_num == CL_HTON16(130)) ||
443219820Sjeff		     (p_ntci->g_or_v.generic.trap_num == CL_HTON16(131)))) {
444219820Sjeff			/* If this is a trap 129, 130, or 131 - then this is a
445219820Sjeff			 * trap signaling a change on a physical port.
446219820Sjeff			 * Mark the physp_change_trap flag as TRUE.
447219820Sjeff			 */
448219820Sjeff			physp_change_trap = TRUE;
449219820Sjeff			/* The source_lid should be based on the source_lid from the trap */
450219820Sjeff			source_lid = p_ntci->data_details.ntc_129_131.lid;
451219820Sjeff		}
452219820Sjeff
453219820Sjeff		/* If physp_change_trap is TRUE - the key will include the port number.
454219820Sjeff		   If not - the port_number in the key will be zero. */
455219820Sjeff		if (physp_change_trap == TRUE) {
456219820Sjeff			port_num = p_ntci->data_details.ntc_129_131.port_num;
457219820Sjeff			__osm_trap_get_key(source_lid, port_num, p_ntci,
458219820Sjeff					   &trap_key);
459219820Sjeff		} else
460219820Sjeff			__osm_trap_get_key(source_lid, 0, p_ntci, &trap_key);
461219820Sjeff
462219820Sjeff		/* try to find it in the aging tracker */
463219820Sjeff		num_received =
464219820Sjeff		    cl_event_wheel_num_regs(&sm->trap_aging_tracker,
465219820Sjeff					    trap_key);
466219820Sjeff
467219820Sjeff		/* Now we know how many times it provided this trap */
468219820Sjeff		if (num_received > 10) {
469219820Sjeff			if (__print_num_received(num_received))
470219820Sjeff				OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3804: "
471219820Sjeff					"Received trap %u times consecutively\n",
472219820Sjeff					num_received);
473219820Sjeff			/*
474219820Sjeff			 * If the trap provides info about a bad port
475219820Sjeff			 * we mark it as unhealthy.
476219820Sjeff			 */
477219820Sjeff			if (physp_change_trap == TRUE) {
478219820Sjeff				/* get the port */
479219820Sjeff				p_physp = get_physp_by_lid_and_num(sm,
480219820Sjeff								   cl_ntoh16
481219820Sjeff								   (p_ntci->
482219820Sjeff								    data_details.
483219820Sjeff								    ntc_129_131.
484219820Sjeff								    lid),
485219820Sjeff								   port_num);
486219820Sjeff
487219820Sjeff				if (!p_physp)
488219820Sjeff					OSM_LOG(sm->p_log, OSM_LOG_ERROR,
489219820Sjeff						"ERR 3805: "
490219820Sjeff						"Failed to find physical port by lid:%u num:%u\n",
491219820Sjeff						cl_ntoh16(p_ntci->data_details.
492219820Sjeff							  ntc_129_131.lid),
493219820Sjeff						p_ntci->data_details.
494219820Sjeff						ntc_129_131.port_num);
495219820Sjeff				else {
496219820Sjeff					/* When babbling port policy option is enabled and
497219820Sjeff					   Threshold for disabling a "babbling" port is exceeded */
498219820Sjeff					if (sm->p_subn->opt.
499219820Sjeff					    babbling_port_policy
500219820Sjeff					    && num_received >= 250
501219820Sjeff					    && disable_port(sm, p_physp) == 0)
502219820Sjeff						goto Exit;
503219820Sjeff
504219820Sjeff					OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
505219820Sjeff						"Marking unhealthy physical port by lid:%u num:%u\n",
506219820Sjeff						cl_ntoh16(p_ntci->data_details.
507219820Sjeff							  ntc_129_131.lid),
508219820Sjeff						p_ntci->data_details.
509219820Sjeff						ntc_129_131.port_num);
510219820Sjeff					/* check if the current state of the p_physp is healthy. If
511219820Sjeff					   it is - then this is a first change of state. Run a heavy sweep.
512219820Sjeff					   if it is not - no need to mark it again - just restart the timer. */
513219820Sjeff					if (osm_physp_is_healthy(p_physp)) {
514219820Sjeff						osm_physp_set_health(p_physp,
515219820Sjeff								     FALSE);
516219820Sjeff						/* Make sure we sweep again - force a heavy sweep. */
517219820Sjeff						/* The sweep should be done only after the re-registration, or
518219820Sjeff						   else we'll be losing track of the timer. */
519219820Sjeff						run_heavy_sweep = TRUE;
520219820Sjeff					}
521219820Sjeff					/* If we are marking the port as unhealthy - we want to
522219820Sjeff					   keep this for a longer period of time than the
523219820Sjeff					   OSM_DEFAULT_TRAP_SUPRESSION_TIMEOUT. Use the
524219820Sjeff					   OSM_DEFAULT_UNHEALTHY_TIMEOUT */
525219820Sjeff					event_wheel_timeout =
526219820Sjeff					    OSM_DEFAULT_UNHEALTHY_TIMEOUT;
527219820Sjeff				}
528219820Sjeff			}
529219820Sjeff		}
530219820Sjeff
531219820Sjeff		/* restart the aging anyway */
532219820Sjeff		/* If physp_change_trap is TRUE - then use a callback to unset the
533219820Sjeff		   healthy bit. If not - no need to use a callback. */
534219820Sjeff		if (physp_change_trap == TRUE)
535219820Sjeff			cl_event_wheel_reg(&sm->trap_aging_tracker, trap_key, cl_get_time_stamp() + event_wheel_timeout, osm_trap_rcv_aging_tracker_callback,	/* no callback */
536219820Sjeff					   sm	/* no context */ );
537219820Sjeff		else
538219820Sjeff			cl_event_wheel_reg(&sm->trap_aging_tracker, trap_key, cl_get_time_stamp() + event_wheel_timeout, NULL,	/* no callback */
539219820Sjeff					   NULL	/* no context */ );
540219820Sjeff
541219820Sjeff		/* If was already registered do nothing more */
542219820Sjeff		if (num_received > 10 && run_heavy_sweep == FALSE) {
543219820Sjeff			if (__print_num_received(num_received))
544219820Sjeff				OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
545219820Sjeff					"Continuously received this trap %u times. Ignoring\n",
546219820Sjeff					num_received);
547219820Sjeff			goto Exit;
548219820Sjeff		}
549219820Sjeff	}
550219820Sjeff
551219820Sjeff	/* Check for node description update. IB Spec v1.2.1 pg 823 */
552219820Sjeff	if ((p_ntci->data_details.ntc_144.local_changes & TRAP_144_MASK_OTHER_LOCAL_CHANGES) &&
553219820Sjeff		(p_ntci->data_details.ntc_144.change_flgs & TRAP_144_MASK_NODE_DESCRIPTION_CHANGE)
554219820Sjeff		) {
555219820Sjeff		OSM_LOG(sm->p_log, OSM_LOG_INFO, "Trap 144 Node description update\n");
556219820Sjeff
557219820Sjeff		if (p_physp) {
558219820Sjeff			CL_PLOCK_ACQUIRE(sm->p_lock);
559219820Sjeff			osm_req_get_node_desc(sm, p_physp);
560219820Sjeff			CL_PLOCK_RELEASE(sm->p_lock);
561219820Sjeff		} else {
562219820Sjeff			OSM_LOG(sm->p_log, OSM_LOG_ERROR,
563219820Sjeff				"ERR 3812: No physical port found for "
564219820Sjeff				"trap 144: \"node description update\"\n");
565219820Sjeff		}
566219820Sjeff	}
567219820Sjeff
568219820Sjeff	/* do a sweep if we received a trap */
569219820Sjeff	if (sm->p_subn->opt.sweep_on_trap) {
570219820Sjeff		/* if this is trap number 128 or run_heavy_sweep is TRUE - update the
571219820Sjeff		   force_single_heavy_sweep flag of the subnet.
572219820Sjeff		   Sweep also on traps 144/145 - these traps signal a change of a certain
573219820Sjeff		   port capability/system image guid.
574219820Sjeff		   TODO: In the future we can change this to just getting PortInfo on
575219820Sjeff		   this port instead of sweeping the entire subnet. */
576219820Sjeff		if (ib_notice_is_generic(p_ntci) &&
577219820Sjeff		    ((cl_ntoh16(p_ntci->g_or_v.generic.trap_num) == 128) ||
578219820Sjeff		     (cl_ntoh16(p_ntci->g_or_v.generic.trap_num) == 144) ||
579219820Sjeff		     (cl_ntoh16(p_ntci->g_or_v.generic.trap_num) == 145) ||
580219820Sjeff		     run_heavy_sweep)) {
581219820Sjeff			OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
582219820Sjeff				"Forcing heavy sweep. Received trap:%u\n",
583219820Sjeff				cl_ntoh16(p_ntci->g_or_v.generic.trap_num));
584219820Sjeff
585219820Sjeff			sm->p_subn->force_heavy_sweep = TRUE;
586219820Sjeff		}
587219820Sjeff		osm_sm_signal(sm, OSM_SIGNAL_SWEEP);
588219820Sjeff	}
589219820Sjeff
590219820Sjeff	/* If we reached here due to trap 129/130/131 - do not need to do
591219820Sjeff	   the notice report. Just goto exit. We know this is the case
592219820Sjeff	   if physp_change_trap is TRUE. */
593219820Sjeff	if (physp_change_trap == TRUE)
594219820Sjeff		goto Exit;
595219820Sjeff
596219820Sjeff	/* Add a call to osm_report_notice */
597219820Sjeff	/* We are going to report the notice - so need to fix the IssuerGID
598219820Sjeff	   accordingly. See IBA 1.2 p.739 or IBA 1.1 p.653 for details. */
599219820Sjeff	if (is_gsi) {
600219820Sjeff		if (!tmp_madw.mad_addr.addr_type.gsi.global_route) {
601219820Sjeff			OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3806: "
602219820Sjeff				"Received gsi trap with global_route FALSE. "
603219820Sjeff				"Cannot update issuer_gid!\n");
604219820Sjeff			goto Exit;
605219820Sjeff		}
606219820Sjeff		memcpy(&(p_ntci->issuer_gid),
607219820Sjeff		       &(tmp_madw.mad_addr.addr_type.gsi.grh_info.src_gid),
608219820Sjeff		       sizeof(ib_gid_t));
609219820Sjeff	} else {
610219820Sjeff		/* Need to use the IssuerLID */
611219820Sjeff		p_tbl = &sm->p_subn->port_lid_tbl;
612219820Sjeff
613219820Sjeff		CL_ASSERT(cl_ptr_vector_get_size(p_tbl) < 0x10000);
614219820Sjeff
615219820Sjeff		if ((uint16_t) cl_ptr_vector_get_size(p_tbl) <=
616219820Sjeff		    cl_ntoh16(source_lid)) {
617219820Sjeff			/*  the source lid is out of range */
618219820Sjeff			OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
619219820Sjeff				"source lid is out of range:%u\n",
620219820Sjeff				cl_ntoh16(source_lid));
621219820Sjeff
622219820Sjeff			goto Exit;
623219820Sjeff		}
624219820Sjeff		p_port = cl_ptr_vector_get(p_tbl, cl_ntoh16(source_lid));
625219820Sjeff		if (p_port == 0) {
626219820Sjeff			/* We have the lid - but no corresponding port */
627219820Sjeff			OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
628219820Sjeff				"Cannot find port corresponding to lid:%u\n",
629219820Sjeff				cl_ntoh16(source_lid));
630219820Sjeff
631219820Sjeff			goto Exit;
632219820Sjeff		}
633219820Sjeff
634219820Sjeff		p_ntci->issuer_gid.unicast.prefix =
635219820Sjeff		    sm->p_subn->opt.subnet_prefix;
636219820Sjeff		p_ntci->issuer_gid.unicast.interface_id = p_port->guid;
637219820Sjeff	}
638219820Sjeff
639219820Sjeff	/* we need a lock here as the InformInfo DB must be stable */
640219820Sjeff	CL_PLOCK_ACQUIRE(sm->p_lock);
641219820Sjeff	status = osm_report_notice(sm->p_log, sm->p_subn, p_ntci);
642219820Sjeff	CL_PLOCK_RELEASE(sm->p_lock);
643219820Sjeff	if (status != IB_SUCCESS) {
644219820Sjeff		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3803: "
645219820Sjeff			"Error sending trap reports (%s)\n",
646219820Sjeff			ib_get_err_str(status));
647219820Sjeff		goto Exit;
648219820Sjeff	}
649219820Sjeff
650219820SjeffExit:
651219820Sjeff	OSM_LOG_EXIT(sm->p_log);
652219820Sjeff}
653219820Sjeff
654219820Sjeff#if 0
655219820Sjeff/**********************************************************************
656219820Sjeff CURRENTLY WE ARE NOT CREATING TRAPS - SO THIS CALL IS AN ERROR
657219820Sjeff**********************************************************************/
658219820Sjeffstatic void
659219820Sjeff__osm_trap_rcv_process_sm(IN osm_sm_t * sm,
660219820Sjeff			  IN const osm_remote_sm_t * const p_sm)
661219820Sjeff{
662219820Sjeff	/* const ib_sm_info_t*        p_smi; */
663219820Sjeff
664219820Sjeff	OSM_LOG_ENTER(sm->p_log);
665219820Sjeff
666219820Sjeff	OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3807: "
667219820Sjeff		"This function is not supported yet\n");
668219820Sjeff
669219820Sjeff	OSM_LOG_EXIT(sm->p_log);
670219820Sjeff}
671219820Sjeff#endif
672219820Sjeff
673219820Sjeff/**********************************************************************
674219820Sjeff CURRENTLY WE ARE NOT CREATING TRAPS - SO THIS CALL IN AN ERROR
675219820Sjeff**********************************************************************/
676219820Sjeffstatic void
677219820Sjeff__osm_trap_rcv_process_response(IN osm_sm_t * sm,
678219820Sjeff				IN const osm_madw_t * const p_madw)
679219820Sjeff{
680219820Sjeff
681219820Sjeff	OSM_LOG_ENTER(sm->p_log);
682219820Sjeff
683219820Sjeff	OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3808: "
684219820Sjeff		"This function is not supported yet\n");
685219820Sjeff
686219820Sjeff	OSM_LOG_EXIT(sm->p_log);
687219820Sjeff}
688219820Sjeff
689219820Sjeff/**********************************************************************
690219820Sjeff **********************************************************************/
691219820Sjeffvoid osm_trap_rcv_process(IN void *context, IN void *data)
692219820Sjeff{
693219820Sjeff	osm_sm_t *sm = context;
694219820Sjeff	osm_madw_t *p_madw = data;
695219820Sjeff	ib_smp_t *p_smp;
696219820Sjeff
697219820Sjeff	OSM_LOG_ENTER(sm->p_log);
698219820Sjeff
699219820Sjeff	CL_ASSERT(p_madw);
700219820Sjeff
701219820Sjeff	p_smp = osm_madw_get_smp_ptr(p_madw);
702219820Sjeff
703219820Sjeff	/*
704219820Sjeff	   Determine if this is a request for our own Trap
705219820Sjeff	   or if this is a response to our request for another
706219820Sjeff	   SM's Trap.
707219820Sjeff	 */
708219820Sjeff	if (ib_smp_is_response(p_smp))
709219820Sjeff		__osm_trap_rcv_process_response(sm, p_madw);
710219820Sjeff	else
711219820Sjeff		__osm_trap_rcv_process_request(sm, p_madw);
712219820Sjeff
713219820Sjeff	OSM_LOG_EXIT(sm->p_log);
714219820Sjeff}
715