1/*
2 * Copyright (c) 2004-2008 Voltaire, Inc. All rights reserved.
3 * Copyright (c) 2002-2008 Mellanox Technologies LTD. All rights reserved.
4 * Copyright (c) 1996-2003 Intel Corporation. All rights reserved.
5 * Copyright (c) 2008 Xsigo Systems Inc.  All rights reserved.
6 *
7 * This software is available to you under a choice of one of two
8 * licenses.  You may choose to be licensed under the terms of the GNU
9 * General Public License (GPL) Version 2, available from the file
10 * COPYING in the main directory of this source tree, or the
11 * OpenIB.org BSD license below:
12 *
13 *     Redistribution and use in source and binary forms, with or
14 *     without modification, are permitted provided that the following
15 *     conditions are met:
16 *
17 *      - Redistributions of source code must retain the above
18 *        copyright notice, this list of conditions and the following
19 *        disclaimer.
20 *
21 *      - Redistributions in binary form must reproduce the above
22 *        copyright notice, this list of conditions and the following
23 *        disclaimer in the documentation and/or other materials
24 *        provided with the distribution.
25 *
26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 * SOFTWARE.
34 *
35 */
36
37/*
38 * Abstract:
39 *    Implementation of osm_drop_mgr_t.
40 * This object represents the Drop Manager object.
41 * This object is part of the opensm family of objects.
42 */
43
44#if HAVE_CONFIG_H
45#  include <config.h>
46#endif				/* HAVE_CONFIG_H */
47
48#include <stdlib.h>
49#include <string.h>
50#include <iba/ib_types.h>
51#include <complib/cl_qmap.h>
52#include <complib/cl_passivelock.h>
53#include <complib/cl_debug.h>
54#include <complib/cl_ptr_vector.h>
55#include <opensm/osm_sm.h>
56#include <opensm/osm_router.h>
57#include <opensm/osm_switch.h>
58#include <opensm/osm_node.h>
59#include <opensm/osm_helper.h>
60#include <opensm/osm_mcm_info.h>
61#include <opensm/osm_multicast.h>
62#include <opensm/osm_remote_sm.h>
63#include <opensm/osm_inform.h>
64#include <opensm/osm_ucast_mgr.h>
65
66/**********************************************************************
67 **********************************************************************/
68static void
69__osm_drop_mgr_remove_router(osm_sm_t * sm, IN const ib_net64_t portguid)
70{
71	osm_router_t *p_rtr;
72	cl_qmap_t *p_rtr_guid_tbl;
73
74	p_rtr_guid_tbl = &sm->p_subn->rtr_guid_tbl;
75	p_rtr = (osm_router_t *) cl_qmap_remove(p_rtr_guid_tbl, portguid);
76	if (p_rtr != (osm_router_t *) cl_qmap_end(p_rtr_guid_tbl)) {
77		OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
78			"Cleaned router for port guid 0x%016" PRIx64 "\n",
79			cl_ntoh64(portguid));
80		osm_router_delete(&p_rtr);
81	}
82}
83
84/**********************************************************************
85 **********************************************************************/
86static void drop_mgr_clean_physp(osm_sm_t * sm, IN osm_physp_t * p_physp)
87{
88	osm_physp_t *p_remote_physp;
89	osm_port_t *p_remote_port;
90
91	p_remote_physp = osm_physp_get_remote(p_physp);
92	if (p_remote_physp) {
93		p_remote_port = osm_get_port_by_guid(sm->p_subn,
94						     p_remote_physp->port_guid);
95
96		if (p_remote_port) {
97			/* Let's check if this is a case of link that is lost (both ports
98			   weren't recognized), or a "hiccup" in the subnet - in which case
99			   the remote port was recognized, and its state is ACTIVE.
100			   If this is just a "hiccup" - force a heavy sweep in the next sweep.
101			   We don't want to lose that part of the subnet. */
102			if (p_remote_port->discovery_count &&
103			    osm_physp_get_port_state(p_remote_physp) ==
104			    IB_LINK_ACTIVE) {
105				OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
106					"Forcing new heavy sweep. Remote "
107					"port 0x%016" PRIx64 " port num: %u "
108					"was recognized in ACTIVE state\n",
109					cl_ntoh64(p_remote_physp->port_guid),
110					p_remote_physp->port_num);
111				sm->p_subn->force_heavy_sweep = TRUE;
112			}
113
114			/* If the remote node is ca or router - need to remove the remote port,
115			   since it is no longer reachable. This can be done if we reset the
116			   discovery count of the remote port. */
117			if (!p_remote_physp->p_node->sw) {
118				p_remote_port->discovery_count = 0;
119				OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
120					"Resetting discovery count of node: "
121					"0x%016" PRIx64 " port num:%u\n",
122					cl_ntoh64(osm_node_get_node_guid
123						  (p_remote_physp->p_node)),
124					p_remote_physp->port_num);
125			}
126		}
127
128		OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
129			"Unlinking local node 0x%016" PRIx64 ", port %u"
130			"\n\t\t\t\tand remote node 0x%016" PRIx64
131			", port %u\n",
132			cl_ntoh64(osm_node_get_node_guid(p_physp->p_node)),
133			p_physp->port_num,
134			cl_ntoh64(osm_node_get_node_guid
135				  (p_remote_physp->p_node)),
136			p_remote_physp->port_num);
137
138		if (sm->ucast_mgr.cache_valid)
139			osm_ucast_cache_add_link(&sm->ucast_mgr,
140						 p_physp, p_remote_physp);
141
142		osm_physp_unlink(p_physp, p_remote_physp);
143
144	}
145
146	OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
147		"Clearing node 0x%016" PRIx64 " physical port number %u\n",
148		cl_ntoh64(osm_node_get_node_guid(p_physp->p_node)),
149		p_physp->port_num);
150
151	osm_physp_destroy(p_physp);
152}
153
154/**********************************************************************
155 **********************************************************************/
156static void __osm_drop_mgr_remove_port(osm_sm_t * sm, IN osm_port_t * p_port)
157{
158	ib_net64_t port_guid;
159	osm_port_t *p_port_check;
160	cl_qmap_t *p_sm_guid_tbl;
161	osm_mcm_info_t *p_mcm;
162	osm_mgrp_t *p_mgrp;
163	cl_ptr_vector_t *p_port_lid_tbl;
164	uint16_t min_lid_ho;
165	uint16_t max_lid_ho;
166	uint16_t lid_ho;
167	osm_node_t *p_node;
168	osm_remote_sm_t *p_sm;
169	ib_gid_t port_gid;
170	ib_mad_notice_attr_t notice;
171	ib_api_status_t status;
172
173	OSM_LOG_ENTER(sm->p_log);
174
175	port_guid = osm_port_get_guid(p_port);
176	OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
177		"Unreachable port 0x%016" PRIx64 "\n", cl_ntoh64(port_guid));
178
179	p_port_check =
180	    (osm_port_t *) cl_qmap_remove(&sm->p_subn->port_guid_tbl,
181					  port_guid);
182	if (p_port_check != p_port) {
183		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0101: "
184			"Port 0x%016" PRIx64 " not in guid table\n",
185			cl_ntoh64(port_guid));
186		goto Exit;
187	}
188
189	p_sm_guid_tbl = &sm->p_subn->sm_guid_tbl;
190	p_sm = (osm_remote_sm_t *) cl_qmap_remove(p_sm_guid_tbl, port_guid);
191	if (p_sm != (osm_remote_sm_t *) cl_qmap_end(p_sm_guid_tbl)) {
192		/* need to remove this item */
193		OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
194			"Cleaned SM for port guid 0x%016" PRIx64 "\n",
195			cl_ntoh64(port_guid));
196
197		free(p_sm);
198	}
199
200	__osm_drop_mgr_remove_router(sm, port_guid);
201
202	osm_port_get_lid_range_ho(p_port, &min_lid_ho, &max_lid_ho);
203
204	OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
205		"Clearing abandoned LID range [%u,%u]\n",
206		min_lid_ho, max_lid_ho);
207
208	p_port_lid_tbl = &sm->p_subn->port_lid_tbl;
209	for (lid_ho = min_lid_ho; lid_ho <= max_lid_ho; lid_ho++)
210		cl_ptr_vector_set(p_port_lid_tbl, lid_ho, NULL);
211
212	drop_mgr_clean_physp(sm, p_port->p_physp);
213
214	p_mcm = (osm_mcm_info_t *) cl_qlist_remove_head(&p_port->mcm_list);
215	while (p_mcm != (osm_mcm_info_t *) cl_qlist_end(&p_port->mcm_list)) {
216		p_mgrp = osm_get_mgrp_by_mlid(sm->p_subn, p_mcm->mlid);
217		if (p_mgrp) {
218			osm_mgrp_delete_port(sm->p_subn, sm->p_log,
219					     p_mgrp, p_port->guid);
220			osm_mcm_info_delete((osm_mcm_info_t *) p_mcm);
221		}
222		p_mcm =
223		    (osm_mcm_info_t *) cl_qlist_remove_head(&p_port->mcm_list);
224	}
225
226	/* initialize the p_node - may need to get node_desc later */
227	p_node = p_port->p_node;
228
229	osm_port_delete(&p_port);
230
231	/* issue a notice - trap 65 */
232
233	/* details of the notice */
234	notice.generic_type = 0x83;	/* is generic subn mgt type */
235	ib_notice_set_prod_type_ho(&notice, 4);	/* A class manager generator */
236	/* endport ceases to be reachable */
237	notice.g_or_v.generic.trap_num = CL_HTON16(65);
238	/* The sm_base_lid is saved in network order already. */
239	notice.issuer_lid = sm->p_subn->sm_base_lid;
240	/* following C14-72.1.2 and table 119 p725 */
241	/* we need to provide the GID */
242	port_gid.unicast.prefix = sm->p_subn->opt.subnet_prefix;
243	port_gid.unicast.interface_id = port_guid;
244	memcpy(&(notice.data_details.ntc_64_67.gid),
245	       &(port_gid), sizeof(ib_gid_t));
246
247	/* According to page 653 - the issuer gid in this case of trap
248	   is the SM gid, since the SM is the initiator of this trap. */
249	notice.issuer_gid.unicast.prefix = sm->p_subn->opt.subnet_prefix;
250	notice.issuer_gid.unicast.interface_id = sm->p_subn->sm_port_guid;
251
252	status = osm_report_notice(sm->p_log, sm->p_subn, &notice);
253	if (status != IB_SUCCESS) {
254		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0103: "
255			"Error sending trap reports (%s)\n",
256			ib_get_err_str(status));
257		goto Exit;
258	}
259
260	OSM_LOG(sm->p_log, OSM_LOG_INFO,
261		"Removed port with GUID:0x%016" PRIx64
262		" LID range [%u, %u] of node:%s\n",
263		cl_ntoh64(port_gid.unicast.interface_id),
264		min_lid_ho, max_lid_ho,
265		p_node ? p_node->print_desc : "UNKNOWN");
266
267Exit:
268	OSM_LOG_EXIT(sm->p_log);
269}
270
271/**********************************************************************
272 **********************************************************************/
273static void __osm_drop_mgr_remove_switch(osm_sm_t * sm, IN osm_node_t * p_node)
274{
275	osm_switch_t *p_sw;
276	cl_qmap_t *p_sw_guid_tbl;
277	ib_net64_t node_guid;
278
279	OSM_LOG_ENTER(sm->p_log);
280
281	node_guid = osm_node_get_node_guid(p_node);
282	p_sw_guid_tbl = &sm->p_subn->sw_guid_tbl;
283
284	p_sw = (osm_switch_t *) cl_qmap_remove(p_sw_guid_tbl, node_guid);
285	if (p_sw == (osm_switch_t *) cl_qmap_end(p_sw_guid_tbl)) {
286		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0102: "
287			"Node 0x%016" PRIx64 " not in switch table\n",
288			cl_ntoh64(osm_node_get_node_guid(p_node)));
289	} else {
290		p_node->sw = NULL;
291		osm_switch_delete(&p_sw);
292	}
293
294	OSM_LOG_EXIT(sm->p_log);
295}
296
297/**********************************************************************
298 **********************************************************************/
299static boolean_t
300__osm_drop_mgr_process_node(osm_sm_t * sm, IN osm_node_t * p_node)
301{
302	osm_physp_t *p_physp;
303	osm_port_t *p_port;
304	osm_node_t *p_node_check;
305	uint32_t port_num;
306	uint32_t max_ports;
307	ib_net64_t port_guid;
308	boolean_t return_val = FALSE;
309
310	OSM_LOG_ENTER(sm->p_log);
311
312	OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
313		"Unreachable node 0x%016" PRIx64 "\n",
314		cl_ntoh64(osm_node_get_node_guid(p_node)));
315
316	if (sm->ucast_mgr.cache_valid)
317		osm_ucast_cache_add_node(&sm->ucast_mgr, p_node);
318
319	/*
320	   Delete all the logical and physical port objects
321	   associated with this node.
322	 */
323	max_ports = osm_node_get_num_physp(p_node);
324	for (port_num = 0; port_num < max_ports; port_num++) {
325		p_physp = osm_node_get_physp_ptr(p_node, port_num);
326		if (p_physp) {
327			port_guid = osm_physp_get_port_guid(p_physp);
328
329			p_port = osm_get_port_by_guid(sm->p_subn, port_guid);
330
331			if (p_port)
332				__osm_drop_mgr_remove_port(sm, p_port);
333			else
334				drop_mgr_clean_physp(sm, p_physp);
335		}
336	}
337
338	return_val = TRUE;
339
340	if (p_node->sw)
341		__osm_drop_mgr_remove_switch(sm, p_node);
342
343	p_node_check =
344	    (osm_node_t *) cl_qmap_remove(&sm->p_subn->node_guid_tbl,
345					  osm_node_get_node_guid(p_node));
346	if (p_node_check != p_node) {
347		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0105: "
348			"Node 0x%016" PRIx64 " not in guid table\n",
349			cl_ntoh64(osm_node_get_node_guid(p_node)));
350	}
351
352	/* free memory allocated to node */
353	osm_node_delete(&p_node);
354
355	OSM_LOG_EXIT(sm->p_log);
356	return (return_val);
357}
358
359/**********************************************************************
360 **********************************************************************/
361static void __osm_drop_mgr_check_node(osm_sm_t * sm, IN osm_node_t * p_node)
362{
363	ib_net64_t node_guid;
364	osm_physp_t *p_physp;
365	osm_port_t *p_port;
366	ib_net64_t port_guid;
367
368	OSM_LOG_ENTER(sm->p_log);
369
370	node_guid = osm_node_get_node_guid(p_node);
371
372	if (osm_node_get_type(p_node) != IB_NODE_TYPE_SWITCH) {
373		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0107: "
374			"Node 0x%016" PRIx64 " is not a switch node\n",
375			cl_ntoh64(node_guid));
376		goto Exit;
377	}
378
379	/* Make sure we have a switch object for this node */
380	if (!p_node->sw) {
381		/* We do not have switch info for this node */
382		OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
383			"Node 0x%016" PRIx64 " no switch in table\n",
384			cl_ntoh64(node_guid));
385
386		__osm_drop_mgr_process_node(sm, p_node);
387		goto Exit;
388	}
389
390	/* Make sure we have a port object for port zero */
391	p_physp = osm_node_get_physp_ptr(p_node, 0);
392	if (!p_physp) {
393		OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
394			"Node 0x%016" PRIx64 " no valid physical port 0\n",
395			cl_ntoh64(node_guid));
396
397		__osm_drop_mgr_process_node(sm, p_node);
398		goto Exit;
399	}
400
401	port_guid = osm_physp_get_port_guid(p_physp);
402
403	p_port = osm_get_port_by_guid(sm->p_subn, port_guid);
404
405	if (!p_port) {
406		OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
407			"Node 0x%016" PRIx64 " has no port object\n",
408			cl_ntoh64(node_guid));
409
410		__osm_drop_mgr_process_node(sm, p_node);
411		goto Exit;
412	}
413
414	if (p_port->discovery_count == 0) {
415		OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
416			"Node 0x%016" PRIx64 " port has discovery count zero\n",
417			cl_ntoh64(node_guid));
418
419		__osm_drop_mgr_process_node(sm, p_node);
420		goto Exit;
421	}
422
423Exit:
424	OSM_LOG_EXIT(sm->p_log);
425	return;
426}
427
428/**********************************************************************
429 **********************************************************************/
430void osm_drop_mgr_process(osm_sm_t * sm)
431{
432	cl_qmap_t *p_node_guid_tbl;
433	cl_qmap_t *p_port_guid_tbl;
434	osm_port_t *p_port;
435	osm_port_t *p_next_port;
436	osm_node_t *p_node;
437	osm_node_t *p_next_node;
438
439	CL_ASSERT(sm);
440
441	OSM_LOG_ENTER(sm->p_log);
442
443	p_node_guid_tbl = &sm->p_subn->node_guid_tbl;
444	p_port_guid_tbl = &sm->p_subn->port_guid_tbl;
445
446	CL_PLOCK_EXCL_ACQUIRE(sm->p_lock);
447
448	p_next_node = (osm_node_t *) cl_qmap_head(p_node_guid_tbl);
449	while (p_next_node != (osm_node_t *) cl_qmap_end(p_node_guid_tbl)) {
450		p_node = p_next_node;
451		p_next_node =
452		    (osm_node_t *) cl_qmap_next(&p_next_node->map_item);
453
454		CL_ASSERT(cl_qmap_key(&p_node->map_item) ==
455			  osm_node_get_node_guid(p_node));
456
457		OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
458			"Checking node 0x%016" PRIx64 "\n",
459			cl_ntoh64(osm_node_get_node_guid(p_node)));
460
461		/*
462		   Check if this node was discovered during the last sweep.
463		   If not, it is unreachable in the current subnet, and
464		   should therefore be removed from the subnet object.
465		 */
466		if (p_node->discovery_count == 0)
467			__osm_drop_mgr_process_node(sm, p_node);
468	}
469
470	/*
471	   Go over all the nodes. If the node is a switch - make sure
472	   there is also a switch record for it, and a portInfo record for
473	   port zero of of the node.
474	   If not - this means that there was some error in getting the data
475	   of this node. Drop the node.
476	 */
477	p_next_node = (osm_node_t *) cl_qmap_head(p_node_guid_tbl);
478	while (p_next_node != (osm_node_t *) cl_qmap_end(p_node_guid_tbl)) {
479		p_node = p_next_node;
480		p_next_node =
481		    (osm_node_t *) cl_qmap_next(&p_next_node->map_item);
482
483		OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
484			"Checking full discovery of node 0x%016" PRIx64 "\n",
485			cl_ntoh64(osm_node_get_node_guid(p_node)));
486
487		if (osm_node_get_type(p_node) != IB_NODE_TYPE_SWITCH)
488			continue;
489
490		/* We are handling a switch node */
491		__osm_drop_mgr_check_node(sm, p_node);
492	}
493
494	p_next_port = (osm_port_t *) cl_qmap_head(p_port_guid_tbl);
495	while (p_next_port != (osm_port_t *) cl_qmap_end(p_port_guid_tbl)) {
496		p_port = p_next_port;
497		p_next_port =
498		    (osm_port_t *) cl_qmap_next(&p_next_port->map_item);
499
500		CL_ASSERT(cl_qmap_key(&p_port->map_item) ==
501			  osm_port_get_guid(p_port));
502
503		OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
504			"Checking port 0x%016" PRIx64 "\n",
505			cl_ntoh64(osm_port_get_guid(p_port)));
506
507		/*
508		   If the port is unreachable, remove it from the guid table.
509		 */
510		if (p_port->discovery_count == 0)
511			__osm_drop_mgr_remove_port(sm, p_port);
512	}
513
514	CL_PLOCK_RELEASE(sm->p_lock);
515	OSM_LOG_EXIT(sm->p_log);
516}
517