1/*
2 * Copyright (c) 2004-2008 Voltaire, Inc. All rights reserved.
3 * Copyright (c) 2002-2008 Mellanox Technologies LTD. All rights reserved.
4 * Copyright (c) 1996-2003 Intel Corporation. All rights reserved.
5 *
6 * This software is available to you under a choice of one of two
7 * licenses.  You may choose to be licensed under the terms of the GNU
8 * General Public License (GPL) Version 2, available from the file
9 * COPYING in the main directory of this source tree, or the
10 * OpenIB.org BSD license below:
11 *
12 *     Redistribution and use in source and binary forms, with or
13 *     without modification, are permitted provided that the following
14 *     conditions are met:
15 *
16 *      - Redistributions of source code must retain the above
17 *        copyright notice, this list of conditions and the following
18 *        disclaimer.
19 *
20 *      - Redistributions in binary form must reproduce the above
21 *        copyright notice, this list of conditions and the following
22 *        disclaimer in the documentation and/or other materials
23 *        provided with the distribution.
24 *
25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32 * SOFTWARE.
33 *
34 */
35
36/*
37 * Abstract:
38 *    Implementation of osm_ni_rcv_t.
39 * This object represents the NodeInfo Receiver object.
40 * This object is part of the opensm family of objects.
41 */
42
43#if HAVE_CONFIG_H
44#  include <config.h>
45#endif				/* HAVE_CONFIG_H */
46
47#include <stdlib.h>
48#include <string.h>
49#include <iba/ib_types.h>
50#include <complib/cl_qmap.h>
51#include <complib/cl_passivelock.h>
52#include <complib/cl_debug.h>
53#include <opensm/osm_madw.h>
54#include <opensm/osm_log.h>
55#include <opensm/osm_node.h>
56#include <opensm/osm_subnet.h>
57#include <opensm/osm_router.h>
58#include <opensm/osm_mad_pool.h>
59#include <opensm/osm_helper.h>
60#include <opensm/osm_msgdef.h>
61#include <opensm/osm_opensm.h>
62#include <opensm/osm_ucast_mgr.h>
63
64static void
65report_duplicated_guid(IN osm_sm_t * sm,
66		       osm_physp_t * p_physp,
67		       osm_node_t * p_neighbor_node, const uint8_t port_num)
68{
69	osm_physp_t *p_old, *p_new;
70	osm_dr_path_t path;
71
72	p_old = p_physp->p_remote_physp;
73	p_new = osm_node_get_physp_ptr(p_neighbor_node, port_num);
74
75	OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D01: "
76		"Found duplicated node.\n"
77		"Node 0x%" PRIx64 " port %u is reachable from remote node "
78		"0x%" PRIx64 " port %u and remote node 0x%" PRIx64 " port %u.\n"
79		"Paths are:\n",
80		cl_ntoh64(p_physp->p_node->node_info.node_guid),
81		p_physp->port_num,
82		cl_ntoh64(p_old->p_node->node_info.node_guid), p_old->port_num,
83		cl_ntoh64(p_new->p_node->node_info.node_guid), p_new->port_num);
84
85	osm_dump_dr_path(sm->p_log, osm_physp_get_dr_path_ptr(p_physp),
86			 OSM_LOG_ERROR);
87
88	path = *osm_physp_get_dr_path_ptr(p_new);
89	osm_dr_path_extend(&path, port_num);
90	osm_dump_dr_path(sm->p_log, &path, OSM_LOG_ERROR);
91
92	osm_log(sm->p_log, OSM_LOG_SYS,
93		"FATAL: duplicated guids or 12x lane reversal\n");
94}
95
96static void requery_dup_node_info(IN osm_sm_t * sm,
97				  osm_physp_t * p_physp, unsigned count)
98{
99	osm_madw_context_t context;
100	osm_dr_path_t path;
101	cl_status_t status;
102
103	path = *osm_physp_get_dr_path_ptr(p_physp->p_remote_physp);
104	osm_dr_path_extend(&path, p_physp->p_remote_physp->port_num);
105
106	context.ni_context.node_guid =
107	    p_physp->p_remote_physp->p_node->node_info.port_guid;
108	context.ni_context.port_num = p_physp->p_remote_physp->port_num;
109	context.ni_context.dup_node_guid = p_physp->p_node->node_info.node_guid;
110	context.ni_context.dup_port_num = p_physp->port_num;
111	context.ni_context.dup_count = count;
112
113	status = osm_req_get(sm, &path, IB_MAD_ATTR_NODE_INFO,
114			     0, CL_DISP_MSGID_NONE, &context);
115
116	if (status != IB_SUCCESS)
117		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D02: "
118			"Failure initiating NodeInfo request (%s)\n",
119			ib_get_err_str(status));
120}
121
122/**********************************************************************
123 The plock must be held before calling this function.
124**********************************************************************/
125static void
126__osm_ni_rcv_set_links(IN osm_sm_t * sm,
127		       osm_node_t * p_node,
128		       const uint8_t port_num,
129		       const osm_ni_context_t * const p_ni_context)
130{
131	osm_node_t *p_neighbor_node;
132	osm_physp_t *p_physp;
133
134	OSM_LOG_ENTER(sm->p_log);
135
136	/*
137	   A special case exists in which the node we're trying to
138	   link is our own node.  In this case, the guid value in
139	   the ni_context will be zero.
140	 */
141	if (p_ni_context->node_guid == 0) {
142		OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
143			"Nothing to link for our own node 0x%" PRIx64 "\n",
144			cl_ntoh64(osm_node_get_node_guid(p_node)));
145		goto _exit;
146	}
147
148	p_neighbor_node = osm_get_node_by_guid(sm->p_subn,
149					       p_ni_context->node_guid);
150	if (!p_neighbor_node) {
151		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D10: "
152			"Unexpected removal of neighbor node "
153			"0x%" PRIx64 "\n", cl_ntoh64(p_ni_context->node_guid));
154		goto _exit;
155	}
156
157	/*
158	   We have seen this neighbor node before, but we might
159	   not have seen this port on the neighbor node before.
160	   We should not set links to an uninitialized port on the
161	   neighbor, so check validity up front.  If it's not
162	   valid, do nothing, since we'll see this link again
163	   when we probe the neighbor.
164	 */
165	if (!osm_node_link_has_valid_ports(p_node, port_num,
166					   p_neighbor_node,
167					   p_ni_context->port_num))
168		goto _exit;
169
170	if (osm_node_link_exists(p_node, port_num,
171				 p_neighbor_node, p_ni_context->port_num)) {
172		OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "Link already exists\n");
173		goto _exit;
174	}
175
176	if (osm_node_has_any_link(p_node, port_num) &&
177	    sm->p_subn->force_heavy_sweep == FALSE &&
178	    (!p_ni_context->dup_count ||
179	     (p_ni_context->dup_node_guid == osm_node_get_node_guid(p_node) &&
180	      p_ni_context->dup_port_num == port_num))) {
181		/*
182		   Uh oh...
183		   This could be reconnected ports, but also duplicated GUID
184		   (2 nodes have the same guid) or a 12x link with lane reversal
185		   that is not configured correctly.
186		   We will try to recover by querying NodeInfo again.
187		   In order to catch even fast port moving to new location(s) and
188		   back we will count up to 5.
189		   Some crazy reconnections (newly created switch loop right before
190		   targeted CA) will not be catched this way. So in worst case -
191		   report GUID duplication and request new discovery.
192		   When switch node is targeted NodeInfo querying will be done in
193		   opposite order, this is much stronger check, unfortunately it is
194		   impossible with CAs.
195		 */
196		p_physp = osm_node_get_physp_ptr(p_node, port_num);
197		if (p_ni_context->dup_count > 5) {
198			report_duplicated_guid(sm, p_physp,
199					       p_neighbor_node,
200					       p_ni_context->port_num);
201			sm->p_subn->force_heavy_sweep = TRUE;
202		} else if (p_node->sw)
203			requery_dup_node_info(sm, p_physp->p_remote_physp,
204					      p_ni_context->dup_count + 1);
205		else
206			requery_dup_node_info(sm, p_physp,
207					      p_ni_context->dup_count + 1);
208	}
209
210	/*
211	   When there are only two nodes with exact same guids (connected back
212	   to back) - the previous check for duplicated guid will not catch
213	   them. But the link will be from the port to itself...
214	   Enhanced Port 0 is an exception to this
215	 */
216	if ((osm_node_get_node_guid(p_node) == p_ni_context->node_guid) &&
217	    (port_num == p_ni_context->port_num) &&
218	    port_num != 0 && cl_qmap_count(&sm->p_subn->sw_guid_tbl) == 0) {
219		OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
220			"Duplicate GUID found by link from a port to itself:"
221			"node 0x%" PRIx64 ", port number %u\n",
222			cl_ntoh64(osm_node_get_node_guid(p_node)), port_num);
223		p_physp = osm_node_get_physp_ptr(p_node, port_num);
224		osm_dump_dr_path(sm->p_log,
225				 osm_physp_get_dr_path_ptr(p_physp),
226				 OSM_LOG_VERBOSE);
227
228		if (sm->p_subn->opt.exit_on_fatal == TRUE) {
229			osm_log(sm->p_log, OSM_LOG_SYS,
230				"Errors on subnet. Duplicate GUID found "
231				"by link from a port to itself. "
232				"See verbose opensm.log for more details\n");
233			exit(1);
234		}
235	}
236
237	OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
238		"Creating new link between:\n\t\t\t\tnode 0x%" PRIx64
239		", port number %u and\n\t\t\t\tnode 0x%" PRIx64
240		", port number %u\n",
241		cl_ntoh64(osm_node_get_node_guid(p_node)), port_num,
242		cl_ntoh64(p_ni_context->node_guid), p_ni_context->port_num);
243
244	if (sm->ucast_mgr.cache_valid)
245		osm_ucast_cache_check_new_link(&sm->ucast_mgr,
246					       p_node, port_num,
247					       p_neighbor_node,
248					       p_ni_context->port_num);
249
250	osm_node_link(p_node, port_num, p_neighbor_node,
251		      p_ni_context->port_num);
252
253_exit:
254	OSM_LOG_EXIT(sm->p_log);
255}
256
257/**********************************************************************
258 The plock must be held before calling this function.
259**********************************************************************/
260static void
261__osm_ni_rcv_process_new_node(IN osm_sm_t * sm,
262			      IN osm_node_t * const p_node,
263			      IN const osm_madw_t * const p_madw)
264{
265	ib_api_status_t status = IB_SUCCESS;
266	osm_madw_context_t context;
267	osm_physp_t *p_physp;
268	ib_node_info_t *p_ni;
269	ib_smp_t *p_smp;
270	uint8_t port_num;
271
272	OSM_LOG_ENTER(sm->p_log);
273
274	p_smp = osm_madw_get_smp_ptr(p_madw);
275	p_ni = (ib_node_info_t *) ib_smp_get_payload_ptr(p_smp);
276	port_num = ib_node_info_get_local_port_num(p_ni);
277
278	/*
279	   Request PortInfo & NodeDescription attributes for the port
280	   that responded to the NodeInfo attribute.
281	   Because this is a channel adapter or router, we are
282	   not allowed to request PortInfo for the other ports.
283	   Set the context union properly, so the recipient
284	   knows which node & port are relevant.
285	 */
286	p_physp = osm_node_get_physp_ptr(p_node, port_num);
287
288	context.pi_context.node_guid = p_ni->node_guid;
289	context.pi_context.port_guid = p_ni->port_guid;
290	context.pi_context.set_method = FALSE;
291	context.pi_context.light_sweep = FALSE;
292	context.pi_context.active_transition = FALSE;
293
294	status = osm_req_get(sm, osm_physp_get_dr_path_ptr(p_physp),
295			     IB_MAD_ATTR_PORT_INFO,
296			     cl_hton32(port_num), CL_DISP_MSGID_NONE, &context);
297	if (status != IB_SUCCESS)
298		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D02: "
299			"Failure initiating PortInfo request (%s)\n",
300			ib_get_err_str(status));
301
302	OSM_LOG_EXIT(sm->p_log);
303}
304
305/**********************************************************************
306 The plock must be held before calling this function.
307**********************************************************************/
308void
309osm_req_get_node_desc(IN osm_sm_t * sm,
310			osm_physp_t *p_physp)
311{
312	ib_api_status_t status = IB_SUCCESS;
313	osm_madw_context_t context;
314
315	OSM_LOG_ENTER(sm->p_log);
316
317	context.nd_context.node_guid =
318		osm_node_get_node_guid(osm_physp_get_node_ptr(p_physp));
319
320	status = osm_req_get(sm, osm_physp_get_dr_path_ptr(p_physp),
321			     IB_MAD_ATTR_NODE_DESC,
322			     0, CL_DISP_MSGID_NONE, &context);
323	if (status != IB_SUCCESS)
324		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D03: "
325			"Failure initiating NodeDescription request (%s)\n",
326			ib_get_err_str(status));
327
328	OSM_LOG_EXIT(sm->p_log);
329}
330
331/**********************************************************************
332 The plock must be held before calling this function.
333**********************************************************************/
334static void
335__osm_ni_rcv_get_node_desc(IN osm_sm_t * sm,
336			   IN osm_node_t * const p_node,
337			   IN const osm_madw_t * const p_madw)
338{
339	ib_node_info_t *p_ni;
340	ib_smp_t *p_smp;
341	uint8_t port_num;
342	osm_physp_t *p_physp = NULL;
343
344	OSM_LOG_ENTER(sm->p_log);
345
346	p_smp = osm_madw_get_smp_ptr(p_madw);
347	p_ni = (ib_node_info_t *) ib_smp_get_payload_ptr(p_smp);
348	port_num = ib_node_info_get_local_port_num(p_ni);
349
350	/*
351	   Request PortInfo & NodeDescription attributes for the port
352	   that responded to the NodeInfo attribute.
353	   Because this is a channel adapter or router, we are
354	   not allowed to request PortInfo for the other ports.
355	   Set the context union properly, so the recipient
356	   knows which node & port are relevant.
357	 */
358	p_physp = osm_node_get_physp_ptr(p_node, port_num);
359
360	osm_req_get_node_desc(sm, p_physp);
361
362	OSM_LOG_EXIT(sm->p_log);
363}
364
365/**********************************************************************
366 The plock must be held before calling this function.
367**********************************************************************/
368static void
369__osm_ni_rcv_process_new_ca_or_router(IN osm_sm_t * sm,
370				      IN osm_node_t * const p_node,
371				      IN const osm_madw_t * const p_madw)
372{
373	OSM_LOG_ENTER(sm->p_log);
374
375	__osm_ni_rcv_process_new_node(sm, p_node, p_madw);
376
377	/*
378	   A node guid of 0 is the corner case that indicates
379	   we discovered our own node.  Initialize the subnet
380	   object with the SM's own port guid.
381	 */
382	if (osm_madw_get_ni_context_ptr(p_madw)->node_guid == 0)
383		sm->p_subn->sm_port_guid = p_node->node_info.port_guid;
384
385	OSM_LOG_EXIT(sm->p_log);
386}
387
388/**********************************************************************
389 The plock must be held before calling this function.
390**********************************************************************/
391static void
392__osm_ni_rcv_process_existing_ca_or_router(IN osm_sm_t * sm,
393					   IN osm_node_t * const p_node,
394					   IN const osm_madw_t * const p_madw)
395{
396	ib_node_info_t *p_ni;
397	ib_smp_t *p_smp;
398	osm_port_t *p_port;
399	osm_port_t *p_port_check;
400	osm_madw_context_t context;
401	uint8_t port_num;
402	osm_physp_t *p_physp;
403	ib_api_status_t status;
404	osm_dr_path_t *p_dr_path;
405	osm_bind_handle_t h_bind;
406
407	OSM_LOG_ENTER(sm->p_log);
408
409	p_smp = osm_madw_get_smp_ptr(p_madw);
410	p_ni = (ib_node_info_t *) ib_smp_get_payload_ptr(p_smp);
411	port_num = ib_node_info_get_local_port_num(p_ni);
412	h_bind = osm_madw_get_bind_handle(p_madw);
413
414	/*
415	   Determine if we have encountered this node through a
416	   previously undiscovered port.  If so, build the new
417	   port object.
418	 */
419	p_port = osm_get_port_by_guid(sm->p_subn, p_ni->port_guid);
420	if (!p_port) {
421		OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
422			"Creating new port object with GUID 0x%" PRIx64 "\n",
423			cl_ntoh64(p_ni->port_guid));
424
425		osm_node_init_physp(p_node, p_madw);
426
427		p_port = osm_port_new(p_ni, p_node);
428		if (p_port == NULL) {
429			OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D04: "
430				"Unable to create new port object\n");
431			goto Exit;
432		}
433
434		/*
435		   Add the new port object to the database.
436		 */
437		p_port_check =
438		    (osm_port_t *) cl_qmap_insert(&sm->p_subn->port_guid_tbl,
439						  p_ni->port_guid,
440						  &p_port->map_item);
441		if (p_port_check != p_port) {
442			/*
443			   We should never be here!
444			   Somehow, this port GUID already exists in the table.
445			 */
446			OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D12: "
447				"Port 0x%" PRIx64 " already in the database!\n",
448				cl_ntoh64(p_ni->port_guid));
449
450			osm_port_delete(&p_port);
451			goto Exit;
452		}
453
454		/* If we are a master, then this means the port is new on the subnet.
455		   Mark it as new - need to send trap 64 on these ports.
456		   The condition that we are master is true, since if we are in discovering
457		   state (meaning we woke up from standby or we are just initializing),
458		   then these ports may be new to us, but are not new on the subnet.
459		   If we are master, then the subnet as we know it is the updated one,
460		   and any new ports we encounter should cause trap 64. C14-72.1.1 */
461		if (sm->p_subn->sm_state == IB_SMINFO_STATE_MASTER)
462			p_port->is_new = 1;
463
464		p_physp = osm_node_get_physp_ptr(p_node, port_num);
465	} else {
466		p_physp = osm_node_get_physp_ptr(p_node, port_num);
467		/*
468		   Update the DR Path to the port,
469		   in case the old one is no longer available.
470		 */
471		p_dr_path = osm_physp_get_dr_path_ptr(p_physp);
472
473		osm_dr_path_init(p_dr_path, h_bind, p_smp->hop_count,
474				 p_smp->initial_path);
475	}
476
477	context.pi_context.node_guid = p_ni->node_guid;
478	context.pi_context.port_guid = p_ni->port_guid;
479	context.pi_context.set_method = FALSE;
480	context.pi_context.light_sweep = FALSE;
481
482	status = osm_req_get(sm, osm_physp_get_dr_path_ptr(p_physp),
483			     IB_MAD_ATTR_PORT_INFO,
484			     cl_hton32(port_num), CL_DISP_MSGID_NONE, &context);
485
486	if (status != IB_SUCCESS)
487		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D13: "
488			"Failure initiating PortInfo request (%s)\n",
489			ib_get_err_str(status));
490
491Exit:
492	OSM_LOG_EXIT(sm->p_log);
493}
494
495/**********************************************************************
496 **********************************************************************/
497static void
498__osm_ni_rcv_process_switch(IN osm_sm_t * sm,
499			    IN osm_node_t * const p_node,
500			    IN const osm_madw_t * const p_madw)
501{
502	ib_api_status_t status = IB_SUCCESS;
503	osm_madw_context_t context;
504	osm_dr_path_t *path;
505	ib_smp_t *p_smp;
506
507	OSM_LOG_ENTER(sm->p_log);
508
509	p_smp = osm_madw_get_smp_ptr(p_madw);
510
511	/* update DR path of already initialized switch port 0 */
512	path = osm_physp_get_dr_path_ptr(osm_node_get_physp_ptr(p_node, 0));
513	osm_dr_path_init(path, osm_madw_get_bind_handle(p_madw),
514			 p_smp->hop_count, p_smp->initial_path);
515
516	context.si_context.node_guid = osm_node_get_node_guid(p_node);
517	context.si_context.set_method = FALSE;
518	context.si_context.light_sweep = FALSE;
519
520	/* Request a SwitchInfo attribute */
521	status = osm_req_get(sm, path, IB_MAD_ATTR_SWITCH_INFO,
522			     0, CL_DISP_MSGID_NONE, &context);
523	if (status != IB_SUCCESS)
524		/* continue despite error */
525		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D06: "
526			"Failure initiating SwitchInfo request (%s)\n",
527			ib_get_err_str(status));
528
529	OSM_LOG_EXIT(sm->p_log);
530}
531
532/**********************************************************************
533 The plock must be held before calling this function.
534**********************************************************************/
535static void
536__osm_ni_rcv_process_existing_switch(IN osm_sm_t * sm,
537				     IN osm_node_t * const p_node,
538				     IN const osm_madw_t * const p_madw)
539{
540	OSM_LOG_ENTER(sm->p_log);
541
542	/*
543	   If this switch has already been probed during this sweep,
544	   then don't bother reprobing it.
545	   There is one exception - if the node has been visited, but
546	   for some reason we don't have the switch object (this can happen
547	   if the SwitchInfo mad didn't reach the SM) then we want
548	   to retry to probe the switch.
549	 */
550	if (p_node->discovery_count == 1)
551		__osm_ni_rcv_process_switch(sm, p_node, p_madw);
552	else if (!p_node->sw || p_node->sw->discovery_count == 0) {
553		/* we don't have the SwitchInfo - retry to get it */
554		OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
555			"Retry to get SwitchInfo on node GUID:0x%"
556			PRIx64 "\n", cl_ntoh64(osm_node_get_node_guid(p_node)));
557		__osm_ni_rcv_process_switch(sm, p_node, p_madw);
558	}
559
560	OSM_LOG_EXIT(sm->p_log);
561}
562
563/**********************************************************************
564 The plock must be held before calling this function.
565**********************************************************************/
566static void
567__osm_ni_rcv_process_new_switch(IN osm_sm_t * sm,
568				IN osm_node_t * const p_node,
569				IN const osm_madw_t * const p_madw)
570{
571	OSM_LOG_ENTER(sm->p_log);
572
573	__osm_ni_rcv_process_switch(sm, p_node, p_madw);
574
575	/*
576	   A node guid of 0 is the corner case that indicates
577	   we discovered our own node.  Initialize the subnet
578	   object with the SM's own port guid.
579	 */
580	if (osm_madw_get_ni_context_ptr(p_madw)->node_guid == 0)
581		sm->p_subn->sm_port_guid = p_node->node_info.port_guid;
582
583	OSM_LOG_EXIT(sm->p_log);
584}
585
586/**********************************************************************
587 The plock must NOT be held before calling this function.
588**********************************************************************/
589static void
590__osm_ni_rcv_process_new(IN osm_sm_t * sm,
591			 IN const osm_madw_t * const p_madw)
592{
593	osm_node_t *p_node;
594	osm_node_t *p_node_check;
595	osm_port_t *p_port;
596	osm_port_t *p_port_check;
597	osm_router_t *p_rtr = NULL;
598	osm_router_t *p_rtr_check;
599	cl_qmap_t *p_rtr_guid_tbl;
600	ib_node_info_t *p_ni;
601	ib_smp_t *p_smp;
602	osm_ni_context_t *p_ni_context;
603	uint8_t port_num;
604
605	OSM_LOG_ENTER(sm->p_log);
606
607	p_smp = osm_madw_get_smp_ptr(p_madw);
608	p_ni = (ib_node_info_t *) ib_smp_get_payload_ptr(p_smp);
609	p_ni_context = osm_madw_get_ni_context_ptr(p_madw);
610	port_num = ib_node_info_get_local_port_num(p_ni);
611
612	osm_dump_smp_dr_path(sm->p_log, p_smp, OSM_LOG_VERBOSE);
613
614	OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
615		"Discovered new %s node,"
616		"\n\t\t\t\tGUID 0x%" PRIx64 ", TID 0x%" PRIx64 "\n",
617		ib_get_node_type_str(p_ni->node_type),
618		cl_ntoh64(p_ni->node_guid), cl_ntoh64(p_smp->trans_id));
619
620	p_node = osm_node_new(p_madw);
621	if (p_node == NULL) {
622		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D07: "
623			"Unable to create new node object\n");
624		goto Exit;
625	}
626
627	/*
628	   Create a new port object to represent this node's physical
629	   ports in the port table.
630	 */
631	p_port = osm_port_new(p_ni, p_node);
632	if (p_port == NULL) {
633		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D14: "
634			"Unable to create new port object\n");
635		osm_node_delete(&p_node);
636		goto Exit;
637	}
638
639	/*
640	   Add the new port object to the database.
641	 */
642	p_port_check =
643	    (osm_port_t *) cl_qmap_insert(&sm->p_subn->port_guid_tbl,
644					  p_ni->port_guid, &p_port->map_item);
645	if (p_port_check != p_port) {
646		/*
647		   We should never be here!
648		   Somehow, this port GUID already exists in the table.
649		 */
650		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D15: "
651			"Duplicate Port GUID 0x%" PRIx64
652			"! Found by the two directed routes:\n",
653			cl_ntoh64(p_ni->port_guid));
654		osm_dump_dr_path(sm->p_log,
655				 osm_physp_get_dr_path_ptr(p_port->p_physp),
656				 OSM_LOG_ERROR);
657		osm_dump_dr_path(sm->p_log,
658				 osm_physp_get_dr_path_ptr(p_port_check->
659							   p_physp),
660				 OSM_LOG_ERROR);
661		osm_port_delete(&p_port);
662		osm_node_delete(&p_node);
663		goto Exit;
664	}
665
666	/* If we are a master, then this means the port is new on the subnet.
667	   Mark it as new - need to send trap 64 on these ports.
668	   The condition that we are master is true, since if we are in discovering
669	   state (meaning we woke up from standby or we are just initializing),
670	   then these ports may be new to us, but are not new on the subnet.
671	   If we are master, then the subnet as we know it is the updated one,
672	   and any new ports we encounter should cause trap 64. C14-72.1.1 */
673	if (sm->p_subn->sm_state == IB_SMINFO_STATE_MASTER)
674		p_port->is_new = 1;
675
676	/* If there were RouterInfo or other router attribute,
677	   this would be elsewhere */
678	if (p_ni->node_type == IB_NODE_TYPE_ROUTER) {
679		if ((p_rtr = osm_router_new(p_port)) == NULL)
680			OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D1A: "
681				"Unable to create new router object\n");
682		else {
683			p_rtr_guid_tbl = &sm->p_subn->rtr_guid_tbl;
684			p_rtr_check =
685			    (osm_router_t *) cl_qmap_insert(p_rtr_guid_tbl,
686							    p_ni->port_guid,
687							    &p_rtr->map_item);
688			if (p_rtr_check != p_rtr)
689				OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D1B: "
690					"Unable to add port GUID:0x%016" PRIx64
691					" to router table\n",
692					cl_ntoh64(p_ni->port_guid));
693		}
694	}
695
696	p_node_check =
697	    (osm_node_t *) cl_qmap_insert(&sm->p_subn->node_guid_tbl,
698					  p_ni->node_guid, &p_node->map_item);
699	if (p_node_check != p_node) {
700		/*
701		   This node must have been inserted by another thread.
702		   This is unexpected, but is not an error.
703		   We can simply clean-up, since the other thread will
704		   see this processing through to completion.
705		 */
706		OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
707			"Discovery race detected at node 0x%" PRIx64 "\n",
708			cl_ntoh64(p_ni->node_guid));
709		osm_node_delete(&p_node);
710		p_node = p_node_check;
711		__osm_ni_rcv_set_links(sm, p_node, port_num, p_ni_context);
712		goto Exit;
713	} else
714		__osm_ni_rcv_set_links(sm, p_node, port_num, p_ni_context);
715
716	p_node->discovery_count++;
717	__osm_ni_rcv_get_node_desc(sm, p_node, p_madw);
718
719	switch (p_ni->node_type) {
720	case IB_NODE_TYPE_CA:
721	case IB_NODE_TYPE_ROUTER:
722		__osm_ni_rcv_process_new_ca_or_router(sm, p_node, p_madw);
723		break;
724	case IB_NODE_TYPE_SWITCH:
725		__osm_ni_rcv_process_new_switch(sm, p_node, p_madw);
726		break;
727	default:
728		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D16: "
729			"Unknown node type %u with GUID 0x%" PRIx64 "\n",
730			p_ni->node_type, cl_ntoh64(p_ni->node_guid));
731		break;
732	}
733
734Exit:
735	OSM_LOG_EXIT(sm->p_log);
736}
737
738/**********************************************************************
739 The plock must be held before calling this function.
740**********************************************************************/
741static void
742__osm_ni_rcv_process_existing(IN osm_sm_t * sm,
743			      IN osm_node_t * const p_node,
744			      IN const osm_madw_t * const p_madw)
745{
746	ib_node_info_t *p_ni;
747	ib_smp_t *p_smp;
748	osm_ni_context_t *p_ni_context;
749	uint8_t port_num;
750
751	OSM_LOG_ENTER(sm->p_log);
752
753	p_smp = osm_madw_get_smp_ptr(p_madw);
754	p_ni = (ib_node_info_t *) ib_smp_get_payload_ptr(p_smp);
755	p_ni_context = osm_madw_get_ni_context_ptr(p_madw);
756	port_num = ib_node_info_get_local_port_num(p_ni);
757
758	OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
759		"Rediscovered %s node 0x%" PRIx64 " TID 0x%" PRIx64
760		", discovered %u times already\n",
761		ib_get_node_type_str(p_ni->node_type),
762		cl_ntoh64(p_ni->node_guid),
763		cl_ntoh64(p_smp->trans_id), p_node->discovery_count);
764
765	/*
766	   If we haven't already encountered this existing node
767	   on this particular sweep, then process further.
768	 */
769	p_node->discovery_count++;
770
771	switch (p_ni->node_type) {
772	case IB_NODE_TYPE_CA:
773	case IB_NODE_TYPE_ROUTER:
774		__osm_ni_rcv_process_existing_ca_or_router(sm, p_node,
775							   p_madw);
776		break;
777
778	case IB_NODE_TYPE_SWITCH:
779		__osm_ni_rcv_process_existing_switch(sm, p_node, p_madw);
780		break;
781
782	default:
783		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D09: "
784			"Unknown node type %u with GUID 0x%" PRIx64 "\n",
785			p_ni->node_type, cl_ntoh64(p_ni->node_guid));
786		break;
787	}
788
789	__osm_ni_rcv_set_links(sm, p_node, port_num, p_ni_context);
790
791	OSM_LOG_EXIT(sm->p_log);
792}
793
794/**********************************************************************
795 **********************************************************************/
796void osm_ni_rcv_process(IN void *context, IN void *data)
797{
798	osm_sm_t *sm = context;
799	osm_madw_t *p_madw = data;
800	ib_node_info_t *p_ni;
801	ib_smp_t *p_smp;
802	osm_node_t *p_node;
803
804	CL_ASSERT(sm);
805
806	OSM_LOG_ENTER(sm->p_log);
807
808	CL_ASSERT(p_madw);
809
810	p_smp = osm_madw_get_smp_ptr(p_madw);
811	p_ni = (ib_node_info_t *) ib_smp_get_payload_ptr(p_smp);
812
813	CL_ASSERT(p_smp->attr_id == IB_MAD_ATTR_NODE_INFO);
814
815	if (p_ni->node_guid == 0) {
816		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D16: "
817			"Got Zero Node GUID! Found on the directed route:\n");
818		osm_dump_smp_dr_path(sm->p_log, p_smp, OSM_LOG_ERROR);
819		goto Exit;
820	}
821
822	if (p_ni->port_guid == 0) {
823		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D17: "
824			"Got Zero Port GUID! Found on the directed route:\n");
825		osm_dump_smp_dr_path(sm->p_log, p_smp, OSM_LOG_ERROR);
826		goto Exit;
827	}
828
829	/*
830	   Determine if this node has already been discovered,
831	   and process accordingly.
832	   During processing of this node, hold the shared lock.
833	 */
834
835	CL_PLOCK_EXCL_ACQUIRE(sm->p_lock);
836	p_node = osm_get_node_by_guid(sm->p_subn, p_ni->node_guid);
837
838	osm_dump_node_info(sm->p_log, p_ni, OSM_LOG_DEBUG);
839
840	if (!p_node)
841		__osm_ni_rcv_process_new(sm, p_madw);
842	else
843		__osm_ni_rcv_process_existing(sm, p_node, p_madw);
844
845	CL_PLOCK_RELEASE(sm->p_lock);
846
847Exit:
848	OSM_LOG_EXIT(sm->p_log);
849}
850