1/*
2 * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved.
3 * Copyright (c) 2002-2015 Mellanox Technologies LTD. All rights reserved.
4 * Copyright (c) 1996-2003 Intel Corporation. All rights reserved.
5 * Copyright (c) 2009 HNR Consulting. All rights reserved.
6 *
7 * This software is available to you under a choice of one of two
8 * licenses.  You may choose to be licensed under the terms of the GNU
9 * General Public License (GPL) Version 2, available from the file
10 * COPYING in the main directory of this source tree, or the
11 * OpenIB.org BSD license below:
12 *
13 *     Redistribution and use in source and binary forms, with or
14 *     without modification, are permitted provided that the following
15 *     conditions are met:
16 *
17 *      - Redistributions of source code must retain the above
18 *        copyright notice, this list of conditions and the following
19 *        disclaimer.
20 *
21 *      - Redistributions in binary form must reproduce the above
22 *        copyright notice, this list of conditions and the following
23 *        disclaimer in the documentation and/or other materials
24 *        provided with the distribution.
25 *
26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 * SOFTWARE.
34 *
35 */
36
37/*
38 * Abstract:
39 *    Implementation of osm_ni_rcv_t.
40 * This object represents the NodeInfo Receiver object.
41 * This object is part of the opensm family of objects.
42 */
43
44#if HAVE_CONFIG_H
45#  include <config.h>
46#endif				/* HAVE_CONFIG_H */
47
48#include <stdlib.h>
49#include <string.h>
50#include <iba/ib_types.h>
51#include <complib/cl_qmap.h>
52#include <complib/cl_passivelock.h>
53#include <complib/cl_debug.h>
54#include <opensm/osm_file_ids.h>
55#define FILE_ID OSM_FILE_NODE_INFO_RCV_C
56#include <opensm/osm_madw.h>
57#include <opensm/osm_log.h>
58#include <opensm/osm_node.h>
59#include <opensm/osm_subnet.h>
60#include <opensm/osm_router.h>
61#include <opensm/osm_mad_pool.h>
62#include <opensm/osm_helper.h>
63#include <opensm/osm_msgdef.h>
64#include <opensm/osm_opensm.h>
65#include <opensm/osm_ucast_mgr.h>
66#include <opensm/osm_db_pack.h>
67
68static void report_duplicated_guid(IN osm_sm_t * sm, osm_physp_t * p_physp,
69				   osm_node_t * p_neighbor_node,
70				   const uint8_t port_num)
71{
72	osm_physp_t *p_old, *p_new;
73	osm_dr_path_t path;
74
75	p_old = p_physp->p_remote_physp;
76	p_new = osm_node_get_physp_ptr(p_neighbor_node, port_num);
77
78	OSM_LOG(sm->p_log, OSM_LOG_SYS | OSM_LOG_ERROR, "ERR 0D01: "
79		"Found duplicated node GUID.\n"
80		"Node 0x%" PRIx64 " port %u is reachable from remote node "
81		"0x%" PRIx64 " port %u and remote node 0x%" PRIx64 " port %u.\n"
82		"Paths are:\n",
83		cl_ntoh64(p_physp->p_node->node_info.node_guid),
84		p_physp->port_num,
85		p_old ? cl_ntoh64(p_old->p_node->node_info.node_guid) : 0,
86		p_old ? p_old->port_num : 0,
87		p_new ? cl_ntoh64(p_new->p_node->node_info.node_guid) : 0,
88		p_new ? p_new->port_num : 0);
89
90	osm_dump_dr_path_v2(sm->p_log, osm_physp_get_dr_path_ptr(p_physp),
91			    FILE_ID, OSM_LOG_ERROR);
92
93	path = *osm_physp_get_dr_path_ptr(p_new);
94	if (osm_dr_path_extend(&path, port_num))
95		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D05: "
96			"DR path with hop count %d couldn't be extended\n",
97			path.hop_count);
98	osm_dump_dr_path_v2(sm->p_log, &path, FILE_ID, OSM_LOG_ERROR);
99}
100
101static void requery_dup_node_info(IN osm_sm_t * sm, osm_physp_t * p_physp,
102				  unsigned count)
103{
104	osm_madw_context_t context;
105	osm_dr_path_t path;
106	cl_status_t status;
107
108	if (!p_physp->p_remote_physp) {
109		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D0D: "
110			"DR path couldn't be extended due to NULL remote physp\n");
111		return;
112	}
113
114	path = *osm_physp_get_dr_path_ptr(p_physp->p_remote_physp);
115	if (osm_dr_path_extend(&path, p_physp->p_remote_physp->port_num)) {
116		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D08: "
117			"DR path with hop count %d couldn't be extended\n",
118			path.hop_count);
119		return;
120	}
121
122	context.ni_context.node_guid =
123	    p_physp->p_remote_physp->p_node->node_info.port_guid;
124	context.ni_context.port_num = p_physp->p_remote_physp->port_num;
125	context.ni_context.dup_node_guid = p_physp->p_node->node_info.node_guid;
126	context.ni_context.dup_port_num = p_physp->port_num;
127	context.ni_context.dup_count = count;
128
129	status = osm_req_get(sm, &path, IB_MAD_ATTR_NODE_INFO, 0,
130			     TRUE, 0, CL_DISP_MSGID_NONE, &context);
131
132	if (status != IB_SUCCESS)
133		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D02: "
134			"Failure initiating NodeInfo request (%s)\n",
135			ib_get_err_str(status));
136}
137
138/**********************************************************************
139 The plock must be held before calling this function.
140**********************************************************************/
141static void ni_rcv_set_links(IN osm_sm_t * sm, osm_node_t * p_node,
142			     const uint8_t port_num,
143			     const osm_ni_context_t * p_ni_context)
144{
145	osm_node_t *p_neighbor_node;
146	osm_physp_t *p_physp, *p_remote_physp;
147
148	OSM_LOG_ENTER(sm->p_log);
149
150	/*
151	   A special case exists in which the node we're trying to
152	   link is our own node.  In this case, the guid value in
153	   the ni_context will be zero.
154	 */
155	if (p_ni_context->node_guid == 0) {
156		OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
157			"Nothing to link for our own node 0x%" PRIx64 "\n",
158			cl_ntoh64(osm_node_get_node_guid(p_node)));
159		goto _exit;
160	}
161
162	p_neighbor_node = osm_get_node_by_guid(sm->p_subn,
163					       p_ni_context->node_guid);
164	if (PF(!p_neighbor_node)) {
165		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D10: "
166			"Unexpected removal of neighbor node 0x%" PRIx64 "\n",
167			cl_ntoh64(p_ni_context->node_guid));
168		goto _exit;
169	}
170
171	/* When setting the link, ports on both
172	   sides of the link should be initialized */
173	CL_ASSERT(osm_node_link_has_valid_ports(p_node, port_num,
174						p_neighbor_node,
175						p_ni_context->port_num));
176
177	if (osm_node_link_exists(p_node, port_num,
178				 p_neighbor_node, p_ni_context->port_num)) {
179		OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "Link already exists\n");
180		goto _exit;
181	}
182
183	p_physp = osm_node_get_physp_ptr(p_node, port_num);
184	if (!p_physp) {
185		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR OD0E: "
186			"Failed to find physp for port %d of Node GUID 0x%"
187			PRIx64 "\n", port_num,
188			cl_ntoh64(osm_node_get_node_guid(p_node)));
189		goto _exit;
190	}
191
192	/*
193	 * If the link went UP, after we already discovered it, we shouldn't
194	 * set the link between the ports and resweep.
195	 */
196	if (osm_physp_get_port_state(p_physp) == IB_LINK_DOWN &&
197	    p_node->physp_discovered[port_num]) {
198		/* Link down on another side. Don't create a link*/
199		p_node->physp_discovered[port_num] = 0;
200		sm->p_subn->force_heavy_sweep = TRUE;
201		goto _exit;
202	}
203
204	if (osm_node_has_any_link(p_node, port_num) &&
205	    sm->p_subn->force_heavy_sweep == FALSE &&
206	    (!p_ni_context->dup_count ||
207	     (p_ni_context->dup_node_guid == osm_node_get_node_guid(p_node) &&
208	      p_ni_context->dup_port_num == port_num))) {
209		/*
210		   Uh oh...
211		   This could be reconnected ports, but also duplicated GUID
212		   (2 nodes have the same guid) or a 12x link with lane reversal
213		   that is not configured correctly.
214		   We will try to recover by querying NodeInfo again.
215		   In order to catch even fast port moving to new location(s)
216		   and back we will count up to 5.
217		   Some crazy reconnections (newly created switch loop right
218		   before targeted CA) will not be catched this way. So in worst
219		   case - report GUID duplication and request new discovery.
220		   When switch node is targeted NodeInfo querying will be done
221		   in opposite order, this is much stronger check, unfortunately
222		   it is impossible with CAs.
223		 */
224		p_physp = osm_node_get_physp_ptr(p_node, port_num);
225		if (!p_physp) {
226			OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR OD0F: "
227				"Failed to find physp for port %d of Node GUID 0x%"
228				PRIx64 "\n", port_num,
229				cl_ntoh64(osm_node_get_node_guid(p_node)));
230			goto _exit;
231		}
232
233		if (p_ni_context->dup_count > 5) {
234			report_duplicated_guid(sm, p_physp, p_neighbor_node,
235					       p_ni_context->port_num);
236			sm->p_subn->force_heavy_sweep = TRUE;
237		} else if (p_node->sw)
238			requery_dup_node_info(sm, p_physp->p_remote_physp,
239					      p_ni_context->dup_count + 1);
240		else
241			requery_dup_node_info(sm, p_physp,
242					      p_ni_context->dup_count + 1);
243	}
244
245	/*
246	   When there are only two nodes with exact same guids (connected back
247	   to back) - the previous check for duplicated guid will not catch
248	   them. But the link will be from the port to itself...
249	   Enhanced Port 0 is an exception to this
250	 */
251	if (osm_node_get_node_guid(p_node) == p_ni_context->node_guid &&
252	    port_num == p_ni_context->port_num &&
253	    port_num != 0 && cl_qmap_count(&sm->p_subn->sw_guid_tbl) == 0) {
254		OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
255			"Duplicate GUID found by link from a port to itself:"
256			"node 0x%" PRIx64 ", port number %u\n",
257			cl_ntoh64(osm_node_get_node_guid(p_node)), port_num);
258		p_physp = osm_node_get_physp_ptr(p_node, port_num);
259		if (!p_physp) {
260			OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR OD1D: "
261				"Failed to find physp for port %d of Node GUID 0x%"
262				PRIx64 "\n", port_num,
263				cl_ntoh64(osm_node_get_node_guid(p_node)));
264			goto _exit;
265		}
266
267		osm_dump_dr_path_v2(sm->p_log, osm_physp_get_dr_path_ptr(p_physp),
268				    FILE_ID, OSM_LOG_VERBOSE);
269
270		if (sm->p_subn->opt.exit_on_fatal == TRUE) {
271			osm_log_v2(sm->p_log, OSM_LOG_SYS, FILE_ID,
272				   "Errors on subnet. Duplicate GUID found "
273				   "by link from a port to itself. "
274				   "See verbose opensm.log for more details\n");
275			exit(1);
276		}
277	}
278
279	OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
280		"Creating new link between:\n\t\t\t\tnode 0x%" PRIx64
281		", port number %u and\n\t\t\t\tnode 0x%" PRIx64
282		", port number %u\n",
283		cl_ntoh64(osm_node_get_node_guid(p_node)), port_num,
284		cl_ntoh64(p_ni_context->node_guid), p_ni_context->port_num);
285
286	if (sm->ucast_mgr.cache_valid)
287		osm_ucast_cache_check_new_link(&sm->ucast_mgr, p_node, port_num,
288					       p_neighbor_node,
289					       p_ni_context->port_num);
290
291	p_physp = osm_node_get_physp_ptr(p_node, port_num);
292	p_remote_physp = osm_node_get_physp_ptr(p_neighbor_node,
293						p_ni_context->port_num);
294	if (!p_physp || !p_remote_physp)
295		goto _exit;
296
297	osm_node_link(p_node, port_num, p_neighbor_node, p_ni_context->port_num);
298
299	osm_db_neighbor_set(sm->p_subn->p_neighbor,
300			    cl_ntoh64(osm_physp_get_port_guid(p_physp)),
301			    port_num,
302			    cl_ntoh64(osm_physp_get_port_guid(p_remote_physp)),
303			    p_ni_context->port_num);
304	osm_db_neighbor_set(sm->p_subn->p_neighbor,
305			    cl_ntoh64(osm_physp_get_port_guid(p_remote_physp)),
306			    p_ni_context->port_num,
307			    cl_ntoh64(osm_physp_get_port_guid(p_physp)),
308			    port_num);
309
310_exit:
311	OSM_LOG_EXIT(sm->p_log);
312}
313
314static void ni_rcv_get_port_info(IN osm_sm_t * sm, IN osm_node_t * node,
315				 IN const osm_madw_t * madw)
316{
317	osm_madw_context_t context;
318	osm_physp_t *physp;
319	ib_node_info_t *ni;
320	unsigned port;
321	ib_api_status_t status;
322	int mlnx_epi_supported = 0;
323
324	ni = ib_smp_get_payload_ptr(osm_madw_get_smp_ptr(madw));
325
326	port = ib_node_info_get_local_port_num(ni);
327
328	if (sm->p_subn->opt.fdr10)
329		mlnx_epi_supported = is_mlnx_ext_port_info_supported(
330						ib_node_info_get_vendor_id(ni),
331						ni->device_id);
332
333	physp = osm_node_get_physp_ptr(node, port);
334	if (!physp) {
335		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR OD1E: "
336			"Failed to find physp for port %d of Node GUID 0x%"
337			PRIx64 "\n", port,
338			cl_ntoh64(osm_node_get_node_guid(node)));
339		return;
340	}
341
342	context.pi_context.node_guid = osm_node_get_node_guid(node);
343	context.pi_context.port_guid = osm_physp_get_port_guid(physp);
344	context.pi_context.set_method = FALSE;
345	context.pi_context.light_sweep = FALSE;
346	context.pi_context.active_transition = FALSE;
347	context.pi_context.client_rereg = FALSE;
348
349	status = osm_req_get(sm, osm_physp_get_dr_path_ptr(physp),
350			     IB_MAD_ATTR_PORT_INFO, cl_hton32(port),
351			     TRUE, 0, CL_DISP_MSGID_NONE, &context);
352	if (status != IB_SUCCESS)
353		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR OD02: "
354			"Failure initiating PortInfo request (%s)\n",
355			ib_get_err_str(status));
356	if (mlnx_epi_supported) {
357		status = osm_req_get(sm,
358				     osm_physp_get_dr_path_ptr(physp),
359				     IB_MAD_ATTR_MLNX_EXTENDED_PORT_INFO,
360				     cl_hton32(port),
361				     TRUE, 0, CL_DISP_MSGID_NONE, &context);
362		if (status != IB_SUCCESS)
363			OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D0B: "
364				"Failure initiating MLNX ExtPortInfo request (%s)\n",
365				ib_get_err_str(status));
366	}
367}
368
369/**********************************************************************
370 The plock must be held before calling this function.
371**********************************************************************/
372void osm_req_get_node_desc(IN osm_sm_t * sm, osm_physp_t * p_physp)
373{
374	ib_api_status_t status = IB_SUCCESS;
375	osm_madw_context_t context;
376
377	OSM_LOG_ENTER(sm->p_log);
378
379	context.nd_context.node_guid =
380	    osm_node_get_node_guid(osm_physp_get_node_ptr(p_physp));
381
382	status = osm_req_get(sm, osm_physp_get_dr_path_ptr(p_physp),
383			     IB_MAD_ATTR_NODE_DESC, 0, TRUE, 0,
384			     CL_DISP_MSGID_NONE, &context);
385	if (status != IB_SUCCESS)
386		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D03: "
387			"Failure initiating NodeDescription request (%s)\n",
388			ib_get_err_str(status));
389
390	OSM_LOG_EXIT(sm->p_log);
391}
392
393/**********************************************************************
394 The plock must be held before calling this function.
395**********************************************************************/
396static void ni_rcv_get_node_desc(IN osm_sm_t * sm, IN osm_node_t * p_node,
397				 IN const osm_madw_t * p_madw)
398{
399	ib_node_info_t *p_ni;
400	ib_smp_t *p_smp;
401	uint8_t port_num;
402	osm_physp_t *p_physp = NULL;
403
404	OSM_LOG_ENTER(sm->p_log);
405
406	p_smp = osm_madw_get_smp_ptr(p_madw);
407	p_ni = ib_smp_get_payload_ptr(p_smp);
408	port_num = ib_node_info_get_local_port_num(p_ni);
409
410	/*
411	   Request PortInfo & NodeDescription attributes for the port
412	   that responded to the NodeInfo attribute.
413	   Because this is a channel adapter or router, we are
414	   not allowed to request PortInfo for the other ports.
415	   Set the context union properly, so the recipient
416	   knows which node & port are relevant.
417	 */
418	p_physp = osm_node_get_physp_ptr(p_node, port_num);
419	if (!p_physp) {
420		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR OD1F: "
421			"Failed to find physp for port %d of Node GUID 0x%"
422			PRIx64 "\n", port_num,
423			cl_ntoh64(osm_node_get_node_guid(p_node)));
424		return;
425	}
426
427	osm_req_get_node_desc(sm, p_physp);
428
429	OSM_LOG_EXIT(sm->p_log);
430}
431
432/**********************************************************************
433 The plock must be held before calling this function.
434**********************************************************************/
435static void ni_rcv_process_new_ca_or_router(IN osm_sm_t * sm,
436					    IN osm_node_t * p_node,
437					    IN const osm_madw_t * p_madw)
438{
439	OSM_LOG_ENTER(sm->p_log);
440
441	ni_rcv_get_port_info(sm, p_node, p_madw);
442
443	/*
444	   A node guid of 0 is the corner case that indicates
445	   we discovered our own node.  Initialize the subnet
446	   object with the SM's own port guid.
447	 */
448	if (osm_madw_get_ni_context_ptr(p_madw)->node_guid == 0)
449		sm->p_subn->sm_port_guid = p_node->node_info.port_guid;
450
451	OSM_LOG_EXIT(sm->p_log);
452}
453
454/**********************************************************************
455 The plock must be held before calling this function.
456**********************************************************************/
457static void ni_rcv_process_existing_ca_or_router(IN osm_sm_t * sm,
458						 IN osm_node_t * p_node,
459						 IN const osm_madw_t * p_madw)
460{
461	ib_node_info_t *p_ni;
462	ib_smp_t *p_smp;
463	osm_port_t *p_port;
464	osm_port_t *p_port_check;
465	uint8_t port_num;
466	osm_dr_path_t *p_dr_path;
467	osm_alias_guid_t *p_alias_guid, *p_alias_guid_check;
468
469	OSM_LOG_ENTER(sm->p_log);
470
471	p_smp = osm_madw_get_smp_ptr(p_madw);
472	p_ni = ib_smp_get_payload_ptr(p_smp);
473	port_num = ib_node_info_get_local_port_num(p_ni);
474
475	/*
476	   Determine if we have encountered this node through a
477	   previously undiscovered port.  If so, build the new
478	   port object.
479	 */
480	p_port = osm_get_port_by_guid(sm->p_subn, p_ni->port_guid);
481	if (!p_port) {
482		OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
483			"Creating new port object with GUID 0x%" PRIx64 "\n",
484			cl_ntoh64(p_ni->port_guid));
485
486		osm_node_init_physp(p_node, port_num, p_madw);
487
488		p_port = osm_port_new(p_ni, p_node);
489		if (PF(p_port == NULL)) {
490			OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D04: "
491				"Unable to create new port object\n");
492			goto Exit;
493		}
494
495		/*
496		   Add the new port object to the database.
497		 */
498		p_port_check =
499		    (osm_port_t *) cl_qmap_insert(&sm->p_subn->port_guid_tbl,
500						  p_ni->port_guid,
501						  &p_port->map_item);
502		if (PF(p_port_check != p_port)) {
503			/*
504			   We should never be here!
505			   Somehow, this port GUID already exists in the table.
506			 */
507			OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D12: "
508				"Port 0x%" PRIx64 " already in the database!\n",
509				cl_ntoh64(p_ni->port_guid));
510
511			osm_port_delete(&p_port);
512			goto Exit;
513		}
514
515		p_alias_guid = osm_alias_guid_new(p_ni->port_guid,
516						  p_port);
517		if (PF(!p_alias_guid)) {
518			OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D11: "
519				"alias guid memory allocation failed"
520				" for port GUID 0x%" PRIx64 "\n",
521				cl_ntoh64(p_ni->port_guid));
522			goto alias_done;
523		}
524
525		/* insert into alias guid table */
526		p_alias_guid_check =
527			(osm_alias_guid_t *) cl_qmap_insert(&sm->p_subn->alias_port_guid_tbl,
528							    p_alias_guid->alias_guid,
529							    &p_alias_guid->map_item);
530		if (p_alias_guid_check != p_alias_guid) {
531			/* alias GUID is a duplicate */
532			OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D13: "
533				"Duplicate alias port GUID 0x%" PRIx64 "\n",
534				cl_ntoh64(p_ni->port_guid));
535			osm_alias_guid_delete(&p_alias_guid);
536			osm_port_delete(&p_port);
537			goto Exit;
538		}
539
540alias_done:
541		/* If we are a master, then this means the port is new on the subnet.
542		   Mark it as new - need to send trap 64 for these ports.
543		   The condition that we are master is true, since if we are in discovering
544		   state (meaning we woke up from standby or we are just initializing),
545		   then these ports may be new to us, but are not new on the subnet.
546		   If we are master, then the subnet as we know it is the updated one,
547		   and any new ports we encounter should cause trap 64. C14-72.1.1 */
548		if (sm->p_subn->sm_state == IB_SMINFO_STATE_MASTER)
549			p_port->is_new = 1;
550
551	} else {
552		osm_physp_t *p_physp = osm_node_get_physp_ptr(p_node, port_num);
553
554		if (PF(p_physp == NULL)) {
555			OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D1C: "
556				"No physical port found for node GUID 0x%"
557				PRIx64 " port %u. Might be duplicate port GUID\n",
558				cl_ntoh64(p_node->node_info.node_guid),
559				port_num);
560			goto Exit;
561		}
562
563		/*
564		   Update the DR Path to the port,
565		   in case the old one is no longer available.
566		 */
567		p_dr_path = osm_physp_get_dr_path_ptr(p_physp);
568
569		osm_dr_path_init(p_dr_path, p_smp->hop_count,
570				 p_smp->initial_path);
571	}
572
573	ni_rcv_get_port_info(sm, p_node, p_madw);
574
575Exit:
576	OSM_LOG_EXIT(sm->p_log);
577}
578
579static void ni_rcv_process_switch(IN osm_sm_t * sm, IN osm_node_t * p_node,
580				  IN const osm_madw_t * p_madw)
581{
582	ib_api_status_t status = IB_SUCCESS;
583	osm_physp_t *p_physp;
584	osm_madw_context_t context;
585	osm_dr_path_t *path;
586	ib_smp_t *p_smp;
587
588	OSM_LOG_ENTER(sm->p_log);
589
590	p_smp = osm_madw_get_smp_ptr(p_madw);
591
592	p_physp = osm_node_get_physp_ptr(p_node, 0);
593	/* update DR path of already initialized switch port 0 */
594	path = osm_physp_get_dr_path_ptr(p_physp);
595	osm_dr_path_init(path, p_smp->hop_count, p_smp->initial_path);
596
597	context.si_context.node_guid = osm_node_get_node_guid(p_node);
598	context.si_context.set_method = FALSE;
599	context.si_context.light_sweep = FALSE;
600	context.si_context.lft_top_change = FALSE;
601
602	/* Request a SwitchInfo attribute */
603	status = osm_req_get(sm, path, IB_MAD_ATTR_SWITCH_INFO, 0, TRUE, 0,
604			     CL_DISP_MSGID_NONE, &context);
605	if (status != IB_SUCCESS)
606		/* continue despite error */
607		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D06: "
608			"Failure initiating SwitchInfo request (%s)\n",
609			ib_get_err_str(status));
610
611	OSM_LOG_EXIT(sm->p_log);
612}
613
614/**********************************************************************
615 The plock must be held before calling this function.
616**********************************************************************/
617static void ni_rcv_process_existing_switch(IN osm_sm_t * sm,
618					   IN osm_node_t * p_node,
619					   IN const osm_madw_t * p_madw)
620{
621	OSM_LOG_ENTER(sm->p_log);
622
623	/*
624	   If this switch has already been probed during this sweep,
625	   then don't bother reprobing it.
626	 */
627	if (p_node->discovery_count == 1)
628		ni_rcv_process_switch(sm, p_node, p_madw);
629
630	OSM_LOG_EXIT(sm->p_log);
631}
632
633/**********************************************************************
634 The plock must be held before calling this function.
635**********************************************************************/
636static void ni_rcv_process_new_switch(IN osm_sm_t * sm, IN osm_node_t * p_node,
637				      IN const osm_madw_t * p_madw)
638{
639	OSM_LOG_ENTER(sm->p_log);
640
641	ni_rcv_process_switch(sm, p_node, p_madw);
642
643	/*
644	   A node guid of 0 is the corner case that indicates
645	   we discovered our own node.  Initialize the subnet
646	   object with the SM's own port guid.
647	 */
648	if (osm_madw_get_ni_context_ptr(p_madw)->node_guid == 0)
649		sm->p_subn->sm_port_guid = p_node->node_info.port_guid;
650
651	OSM_LOG_EXIT(sm->p_log);
652}
653
654/**********************************************************************
655 The plock must NOT be held before calling this function.
656**********************************************************************/
657static void ni_rcv_process_new(IN osm_sm_t * sm, IN const osm_madw_t * p_madw)
658{
659	osm_node_t *p_node;
660	osm_node_t *p_node_check;
661	osm_port_t *p_port;
662	osm_port_t *p_port_check;
663	osm_router_t *p_rtr = NULL;
664	osm_router_t *p_rtr_check;
665	cl_qmap_t *p_rtr_guid_tbl;
666	ib_node_info_t *p_ni;
667	ib_smp_t *p_smp;
668	osm_ni_context_t *p_ni_context;
669	osm_alias_guid_t *p_alias_guid, *p_alias_guid_check;
670	uint8_t port_num;
671
672	OSM_LOG_ENTER(sm->p_log);
673
674	p_smp = osm_madw_get_smp_ptr(p_madw);
675	p_ni = ib_smp_get_payload_ptr(p_smp);
676	p_ni_context = osm_madw_get_ni_context_ptr(p_madw);
677	port_num = ib_node_info_get_local_port_num(p_ni);
678
679	osm_dump_smp_dr_path_v2(sm->p_log, p_smp, FILE_ID, OSM_LOG_VERBOSE);
680
681	OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
682		"Discovered new %s node,"
683		"\n\t\t\t\tGUID 0x%" PRIx64 ", TID 0x%" PRIx64 "\n",
684		ib_get_node_type_str(p_ni->node_type),
685		cl_ntoh64(p_ni->node_guid), cl_ntoh64(p_smp->trans_id));
686
687	if (PF(port_num > p_ni->num_ports)) {
688		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D0A: "
689			"New %s node GUID 0x%" PRIx64 "is non-compliant and "
690			"is being ignored since the "
691			"local port num %u > num ports %u\n",
692			ib_get_node_type_str(p_ni->node_type),
693			cl_ntoh64(p_ni->node_guid), port_num,
694			p_ni->num_ports);
695		goto Exit;
696	}
697
698	p_node = osm_node_new(p_madw);
699	if (PF(p_node == NULL)) {
700		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D07: "
701			"Unable to create new node object\n");
702		goto Exit;
703	}
704
705	/*
706	   Create a new port object to represent this node's physical
707	   ports in the port table.
708	 */
709	p_port = osm_port_new(p_ni, p_node);
710	if (PF(p_port == NULL)) {
711		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D14: "
712			"Unable to create new port object\n");
713		osm_node_delete(&p_node);
714		goto Exit;
715	}
716
717	/*
718	   Add the new port object to the database.
719	 */
720	p_port_check =
721	    (osm_port_t *) cl_qmap_insert(&sm->p_subn->port_guid_tbl,
722					  p_ni->port_guid, &p_port->map_item);
723	if (PF(p_port_check != p_port)) {
724		/*
725		   We should never be here!
726		   Somehow, this port GUID already exists in the table.
727		 */
728		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D15: "
729			"Duplicate Port GUID 0x%" PRIx64
730			"! Found by the two directed routes:\n",
731			cl_ntoh64(p_ni->port_guid));
732		osm_dump_dr_path_v2(sm->p_log,
733				    osm_physp_get_dr_path_ptr(p_port->p_physp),
734				    FILE_ID, OSM_LOG_ERROR);
735		osm_dump_dr_path_v2(sm->p_log,
736				    osm_physp_get_dr_path_ptr(p_port_check->
737							   p_physp),
738				    FILE_ID, OSM_LOG_ERROR);
739		osm_port_delete(&p_port);
740		osm_node_delete(&p_node);
741		goto Exit;
742	}
743
744	p_alias_guid = osm_alias_guid_new(p_ni->port_guid,
745					  p_port);
746	if (PF(!p_alias_guid)) {
747		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D18: "
748			"alias guid memory allocation failed"
749			" for port GUID 0x%" PRIx64 "\n",
750			cl_ntoh64(p_ni->port_guid));
751		goto alias_done2;
752	}
753
754	/* insert into alias guid table */
755	p_alias_guid_check =
756		(osm_alias_guid_t *) cl_qmap_insert(&sm->p_subn->alias_port_guid_tbl,
757						    p_alias_guid->alias_guid,
758						    &p_alias_guid->map_item);
759	if (p_alias_guid_check != p_alias_guid) {
760		/* alias GUID is a duplicate */
761		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D19: "
762			"Duplicate alias port GUID 0x%" PRIx64 "\n",
763			cl_ntoh64(p_ni->port_guid));
764		osm_alias_guid_delete(&p_alias_guid);
765	}
766
767alias_done2:
768	/* If we are a master, then this means the port is new on the subnet.
769	   Mark it as new - need to send trap 64 on these ports.
770	   The condition that we are master is true, since if we are in discovering
771	   state (meaning we woke up from standby or we are just initializing),
772	   then these ports may be new to us, but are not new on the subnet.
773	   If we are master, then the subnet as we know it is the updated one,
774	   and any new ports we encounter should cause trap 64. C14-72.1.1 */
775	if (sm->p_subn->sm_state == IB_SMINFO_STATE_MASTER)
776		p_port->is_new = 1;
777
778	/* If there were RouterInfo or other router attribute,
779	   this would be elsewhere */
780	if (p_ni->node_type == IB_NODE_TYPE_ROUTER) {
781		if (PF((p_rtr = osm_router_new(p_port)) == NULL))
782			OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D1A: "
783				"Unable to create new router object\n");
784		else {
785			p_rtr_guid_tbl = &sm->p_subn->rtr_guid_tbl;
786			p_rtr_check =
787			    (osm_router_t *) cl_qmap_insert(p_rtr_guid_tbl,
788							    p_ni->port_guid,
789							    &p_rtr->map_item);
790			if (PF(p_rtr_check != p_rtr))
791				OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D1B: "
792					"Unable to add port GUID:0x%016" PRIx64
793					" to router table\n",
794					cl_ntoh64(p_ni->port_guid));
795		}
796	}
797
798	p_node_check =
799	    (osm_node_t *) cl_qmap_insert(&sm->p_subn->node_guid_tbl,
800					  p_ni->node_guid, &p_node->map_item);
801	if (PF(p_node_check != p_node)) {
802		/*
803		   This node must have been inserted by another thread.
804		   This is unexpected, but is not an error.
805		   We can simply clean-up, since the other thread will
806		   see this processing through to completion.
807		 */
808		OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
809			"Discovery race detected at node 0x%" PRIx64 "\n",
810			cl_ntoh64(p_ni->node_guid));
811		osm_node_delete(&p_node);
812		p_node = p_node_check;
813		ni_rcv_set_links(sm, p_node, port_num, p_ni_context);
814		goto Exit;
815	} else
816		ni_rcv_set_links(sm, p_node, port_num, p_ni_context);
817
818	p_node->discovery_count++;
819	ni_rcv_get_node_desc(sm, p_node, p_madw);
820
821	switch (p_ni->node_type) {
822	case IB_NODE_TYPE_CA:
823	case IB_NODE_TYPE_ROUTER:
824		ni_rcv_process_new_ca_or_router(sm, p_node, p_madw);
825		break;
826	case IB_NODE_TYPE_SWITCH:
827		ni_rcv_process_new_switch(sm, p_node, p_madw);
828		break;
829	default:
830		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D16: "
831			"Unknown node type %u with GUID 0x%" PRIx64 "\n",
832			p_ni->node_type, cl_ntoh64(p_ni->node_guid));
833		break;
834	}
835
836Exit:
837	OSM_LOG_EXIT(sm->p_log);
838}
839
840/**********************************************************************
841 The plock must be held before calling this function.
842**********************************************************************/
843static void ni_rcv_process_existing(IN osm_sm_t * sm, IN osm_node_t * p_node,
844				    IN const osm_madw_t * p_madw)
845{
846	ib_node_info_t *p_ni;
847	ib_smp_t *p_smp;
848	osm_ni_context_t *p_ni_context;
849	uint8_t port_num;
850
851	OSM_LOG_ENTER(sm->p_log);
852
853	p_smp = osm_madw_get_smp_ptr(p_madw);
854	p_ni = ib_smp_get_payload_ptr(p_smp);
855	p_ni_context = osm_madw_get_ni_context_ptr(p_madw);
856	port_num = ib_node_info_get_local_port_num(p_ni);
857
858	OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
859		"Rediscovered %s node 0x%" PRIx64 " TID 0x%" PRIx64
860		", discovered %u times already\n",
861		ib_get_node_type_str(p_ni->node_type),
862		cl_ntoh64(p_ni->node_guid),
863		cl_ntoh64(p_smp->trans_id), p_node->discovery_count);
864
865	if (PF(port_num > p_ni->num_ports)) {
866		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D0C: "
867			"Existing %s node GUID 0x%" PRIx64 "is non-compliant "
868			"and is being ignored since the "
869			"local port num %u > num ports %u\n",
870			ib_get_node_type_str(p_ni->node_type),
871			cl_ntoh64(p_ni->node_guid), port_num,
872			p_ni->num_ports);
873		goto Exit;
874	}
875
876	/*
877	   If we haven't already encountered this existing node
878	   on this particular sweep, then process further.
879	 */
880	p_node->discovery_count++;
881
882	switch (p_ni->node_type) {
883	case IB_NODE_TYPE_CA:
884	case IB_NODE_TYPE_ROUTER:
885		ni_rcv_process_existing_ca_or_router(sm, p_node, p_madw);
886		break;
887
888	case IB_NODE_TYPE_SWITCH:
889		ni_rcv_process_existing_switch(sm, p_node, p_madw);
890		break;
891
892	default:
893		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D09: "
894			"Unknown node type %u with GUID 0x%" PRIx64 "\n",
895			p_ni->node_type, cl_ntoh64(p_ni->node_guid));
896		break;
897	}
898
899	if ( p_ni->sys_guid != p_node->node_info.sys_guid) {
900		OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "Updated SysImageGUID: 0x%"
901			PRIx64 " for node 0x%" PRIx64 "\n",
902			cl_ntoh64(p_ni->sys_guid),
903			cl_ntoh64(p_ni->node_guid));
904	}
905	ni_rcv_set_links(sm, p_node, port_num, p_ni_context);
906	p_node->node_info = *p_ni;
907
908Exit:
909	OSM_LOG_EXIT(sm->p_log);
910}
911
912void osm_ni_rcv_process(IN void *context, IN void *data)
913{
914	osm_sm_t *sm = context;
915	osm_madw_t *p_madw = data;
916	ib_node_info_t *p_ni;
917	ib_smp_t *p_smp;
918	osm_node_t *p_node;
919
920	CL_ASSERT(sm);
921
922	OSM_LOG_ENTER(sm->p_log);
923
924	CL_ASSERT(p_madw);
925
926	p_smp = osm_madw_get_smp_ptr(p_madw);
927	p_ni = ib_smp_get_payload_ptr(p_smp);
928
929	CL_ASSERT(p_smp->attr_id == IB_MAD_ATTR_NODE_INFO);
930
931	if (PF(p_ni->node_guid == 0)) {
932		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D16: "
933			"Got Zero Node GUID! Found on the directed route:\n");
934		osm_dump_smp_dr_path_v2(sm->p_log, p_smp, FILE_ID, OSM_LOG_ERROR);
935		goto Exit;
936	}
937
938	if (PF(p_ni->port_guid == 0)) {
939		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D17: "
940			"Got Zero Port GUID! Found on the directed route:\n");
941		osm_dump_smp_dr_path_v2(sm->p_log, p_smp, FILE_ID, OSM_LOG_ERROR);
942		goto Exit;
943	}
944
945	if (ib_smp_get_status(p_smp)) {
946		OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
947			"MAD status 0x%x received\n",
948			cl_ntoh16(ib_smp_get_status(p_smp)));
949		goto Exit;
950	}
951
952	/*
953	   Determine if this node has already been discovered,
954	   and process accordingly.
955	   During processing of this node, hold the shared lock.
956	 */
957
958	CL_PLOCK_EXCL_ACQUIRE(sm->p_lock);
959	p_node = osm_get_node_by_guid(sm->p_subn, p_ni->node_guid);
960
961	osm_dump_node_info_v2(sm->p_log, p_ni, FILE_ID, OSM_LOG_DEBUG);
962
963	if (!p_node)
964		ni_rcv_process_new(sm, p_madw);
965	else
966		ni_rcv_process_existing(sm, p_node, p_madw);
967
968	CL_PLOCK_RELEASE(sm->p_lock);
969
970Exit:
971	OSM_LOG_EXIT(sm->p_log);
972}
973