1/*
2 * Copyright (c) 2008      Mellanox Technologies LTD. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses.  You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 *     Redistribution and use in source and binary forms, with or
11 *     without modification, are permitted provided that the following
12 *     conditions are met:
13 *
14 *      - Redistributions of source code must retain the above
15 *        copyright notice, this list of conditions and the following
16 *        disclaimer.
17 *
18 *      - Redistributions in binary form must reproduce the above
19 *        copyright notice, this list of conditions and the following
20 *        disclaimer in the documentation and/or other materials
21 *        provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 */
33
34/*
35 * Abstract:
36 *    Implementation of OpenSM Cached Unicast Routing
37 *
38 * Environment:
39 *    Linux User Mode
40 *
41 */
42
43#if HAVE_CONFIG_H
44#  include <config.h>
45#endif
46
47#include <stdlib.h>
48#include <string.h>
49#include <ctype.h>
50#include <errno.h>
51#include <iba/ib_types.h>
52#include <complib/cl_qmap.h>
53#include <complib/cl_pool.h>
54#include <complib/cl_debug.h>
55#include <opensm/osm_opensm.h>
56#include <opensm/osm_ucast_mgr.h>
57#include <opensm/osm_ucast_cache.h>
58#include <opensm/osm_switch.h>
59#include <opensm/osm_node.h>
60#include <opensm/osm_port.h>
61
62#define CACHE_SW_PORTS 36
63
64typedef struct cache_port {
65	boolean_t is_leaf;
66	uint16_t remote_lid_ho;
67} cache_port_t;
68
69typedef struct cache_switch {
70	cl_map_item_t map_item;
71	boolean_t dropped;
72	uint16_t max_lid_ho;
73	uint16_t num_hops;
74	uint8_t **hops;
75	uint8_t *lft;
76	uint8_t num_ports;
77	cache_port_t ports[0];
78} cache_switch_t;
79
80/**********************************************************************
81 **********************************************************************/
82
83static uint16_t __cache_sw_get_base_lid_ho(cache_switch_t * p_sw)
84{
85	return p_sw->ports[0].remote_lid_ho;
86}
87
88/**********************************************************************
89 **********************************************************************/
90
91static boolean_t __cache_sw_is_leaf(cache_switch_t * p_sw)
92{
93	return p_sw->ports[0].is_leaf;
94}
95
96/**********************************************************************
97 **********************************************************************/
98
99static void __cache_sw_set_leaf(cache_switch_t * p_sw)
100{
101	p_sw->ports[0].is_leaf = TRUE;
102}
103
104/**********************************************************************
105 **********************************************************************/
106
107static cache_switch_t *__cache_sw_new(uint16_t lid_ho, unsigned num_ports)
108{
109	cache_switch_t *p_cache_sw = malloc(sizeof(cache_switch_t) +
110					    num_ports * sizeof(cache_port_t));
111	if (!p_cache_sw)
112		return NULL;
113
114	memset(p_cache_sw, 0,
115	       sizeof(*p_cache_sw) + num_ports * sizeof(cache_port_t));
116
117	p_cache_sw->num_ports = num_ports;
118
119	/* port[0] fields represent this switch details - lid and type */
120	p_cache_sw->ports[0].remote_lid_ho = lid_ho;
121	p_cache_sw->ports[0].is_leaf = FALSE;
122
123	return p_cache_sw;
124}
125
126/**********************************************************************
127 **********************************************************************/
128
129static void __cache_sw_destroy(cache_switch_t * p_sw)
130{
131	if (!p_sw)
132		return;
133
134	if (p_sw->lft)
135		free(p_sw->lft);
136	if (p_sw->hops)
137		free(p_sw->hops);
138	free(p_sw);
139}
140
141/**********************************************************************
142 **********************************************************************/
143
144static cache_switch_t *__cache_get_sw(osm_ucast_mgr_t * p_mgr, uint16_t lid_ho)
145{
146	cache_switch_t *p_cache_sw = (cache_switch_t *)
147	    cl_qmap_get(&p_mgr->cache_sw_tbl, lid_ho);
148	if (p_cache_sw == (cache_switch_t *)
149	    cl_qmap_end(&p_mgr->cache_sw_tbl))
150		p_cache_sw = NULL;
151
152	return p_cache_sw;
153}
154
155/**********************************************************************
156 **********************************************************************/
157static void __cache_add_sw_link(osm_ucast_mgr_t * p_mgr, osm_physp_t *p,
158				uint16_t remote_lid_ho, boolean_t is_ca)
159{
160	cache_switch_t *p_cache_sw;
161	uint16_t lid_ho = cl_ntoh16(osm_node_get_base_lid(p->p_node, 0));
162
163	OSM_LOG_ENTER(p_mgr->p_log);
164
165	if (!lid_ho || !remote_lid_ho || !p->port_num)
166		goto Exit;
167
168	OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
169		"Caching switch port: lid %u [port %u] -> lid %u (%s)\n",
170		lid_ho, p->port_num, remote_lid_ho, (is_ca) ? "CA/RTR" : "SW");
171
172	p_cache_sw = __cache_get_sw(p_mgr, lid_ho);
173	if (!p_cache_sw) {
174		p_cache_sw = __cache_sw_new(lid_ho, p->p_node->sw->num_ports);
175		if (!p_cache_sw) {
176			OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR,
177				"ERR AD01: Out of memory - cache is invalid\n");
178			osm_ucast_cache_invalidate(p_mgr);
179			goto Exit;
180		}
181		cl_qmap_insert(&p_mgr->cache_sw_tbl, lid_ho,
182			       &p_cache_sw->map_item);
183	}
184
185	if (p->port_num >= p_cache_sw->num_ports) {
186		OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR,
187			"ERR AD02: Wrong switch? - cache is invalid\n");
188		osm_ucast_cache_invalidate(p_mgr);
189		goto Exit;
190	}
191
192	if (is_ca)
193		__cache_sw_set_leaf(p_cache_sw);
194
195	if (p_cache_sw->ports[p->port_num].remote_lid_ho == 0) {
196		/* cache this link only if it hasn't been already cached */
197		p_cache_sw->ports[p->port_num].remote_lid_ho = remote_lid_ho;
198		p_cache_sw->ports[p->port_num].is_leaf = is_ca;
199	}
200Exit:
201	OSM_LOG_EXIT(p_mgr->p_log);
202}
203
204/**********************************************************************
205 **********************************************************************/
206
207static void __cache_cleanup_switches(osm_ucast_mgr_t * p_mgr)
208{
209	cache_switch_t *p_sw;
210	cache_switch_t *p_next_sw;
211	unsigned port_num;
212	boolean_t found_port;
213
214	if (!p_mgr->cache_valid)
215		return;
216
217	p_next_sw = (cache_switch_t *) cl_qmap_head(&p_mgr->cache_sw_tbl);
218	while (p_next_sw !=
219	       (cache_switch_t *) cl_qmap_end(&p_mgr->cache_sw_tbl)) {
220		p_sw = p_next_sw;
221		p_next_sw = (cache_switch_t *) cl_qmap_next(&p_sw->map_item);
222
223		found_port = FALSE;
224		for (port_num = 1; port_num < p_sw->num_ports; port_num++)
225			if (p_sw->ports[port_num].remote_lid_ho)
226				found_port = TRUE;
227
228		if (!found_port) {
229			cl_qmap_remove_item(&p_mgr->cache_sw_tbl,
230					    &p_sw->map_item);
231			__cache_sw_destroy(p_sw);
232		}
233	}
234}
235
236/**********************************************************************
237 **********************************************************************/
238
239static void
240__cache_check_link_change(osm_ucast_mgr_t * p_mgr,
241			  osm_physp_t * p_physp_1, osm_physp_t * p_physp_2)
242{
243	OSM_LOG_ENTER(p_mgr->p_log);
244	CL_ASSERT(p_physp_1 && p_physp_2);
245
246	if (!p_mgr->cache_valid)
247		goto Exit;
248
249	if (!p_physp_1->p_remote_physp && !p_physp_2->p_remote_physp)
250		/* both ports were down - new link */
251		goto Exit;
252
253	/* unicast cache cannot tolerate any link location change */
254
255	if ((p_physp_1->p_remote_physp &&
256	     p_physp_1->p_remote_physp->p_remote_physp) ||
257	    (p_physp_2->p_remote_physp &&
258	     p_physp_2->p_remote_physp->p_remote_physp)) {
259		OSM_LOG(p_mgr->p_log, OSM_LOG_INFO,
260			"Link location change discovered - cache is invalid\n");
261		osm_ucast_cache_invalidate(p_mgr);
262		goto Exit;
263	}
264Exit:
265	OSM_LOG_EXIT(p_mgr->p_log);
266}
267
268/**********************************************************************
269 **********************************************************************/
270
271static void __cache_remove_port(osm_ucast_mgr_t * p_mgr, uint16_t lid_ho,
272				uint8_t port_num, uint16_t remote_lid_ho,
273				boolean_t is_ca)
274{
275	cache_switch_t *p_cache_sw;
276
277	OSM_LOG_ENTER(p_mgr->p_log);
278
279	if (!p_mgr->cache_valid)
280		goto Exit;
281
282	p_cache_sw = __cache_get_sw(p_mgr, lid_ho);
283	if (!p_cache_sw) {
284		OSM_LOG(p_mgr->p_log, OSM_LOG_INFO,
285			"Found uncached switch/link (lid %u, port %u) - "
286			"cache is invalid\n", lid_ho, port_num);
287		osm_ucast_cache_invalidate(p_mgr);
288		goto Exit;
289	}
290
291	if (port_num >= p_cache_sw->num_ports ||
292	    !p_cache_sw->ports[port_num].remote_lid_ho) {
293		OSM_LOG(p_mgr->p_log, OSM_LOG_INFO,
294			"Found uncached switch link (lid %u, port %u) - "
295			"cache is invalid\n", lid_ho, port_num);
296		osm_ucast_cache_invalidate(p_mgr);
297		goto Exit;
298	}
299
300	if (p_cache_sw->ports[port_num].remote_lid_ho != remote_lid_ho) {
301		OSM_LOG(p_mgr->p_log, OSM_LOG_INFO,
302			"Remote lid change on switch lid %u, port %u "
303			"(was %u, now %u) - cache is invalid\n",
304			lid_ho, port_num,
305			p_cache_sw->ports[port_num].remote_lid_ho,
306			remote_lid_ho);
307		osm_ucast_cache_invalidate(p_mgr);
308		goto Exit;
309	}
310
311	if ((p_cache_sw->ports[port_num].is_leaf && !is_ca) ||
312	    (!p_cache_sw->ports[port_num].is_leaf && is_ca)) {
313		OSM_LOG(p_mgr->p_log, OSM_LOG_INFO,
314			"Remote node type change on switch lid %u, port %u - "
315			"cache is invalid\n", lid_ho, port_num);
316		osm_ucast_cache_invalidate(p_mgr);
317		goto Exit;
318	}
319
320	OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
321		"New link from lid %u, port %u to lid %u - "
322		"found in cache\n", lid_ho, port_num, remote_lid_ho);
323
324	/* the new link was cached - clean it from the cache */
325
326	p_cache_sw->ports[port_num].remote_lid_ho = 0;
327	p_cache_sw->ports[port_num].is_leaf = FALSE;
328Exit:
329	OSM_LOG_EXIT(p_mgr->p_log);
330}				/* __cache_remove_port() */
331
332/**********************************************************************
333 **********************************************************************/
334
335static void
336__cache_restore_ucast_info(osm_ucast_mgr_t * p_mgr,
337			   cache_switch_t * p_cache_sw, osm_switch_t * p_sw)
338{
339	if (!p_mgr->cache_valid)
340		return;
341
342	/* when seting unicast info, the cached port
343	   should have all the required info */
344	CL_ASSERT(p_cache_sw->max_lid_ho && p_cache_sw->lft &&
345		  p_cache_sw->num_hops && p_cache_sw->hops);
346
347	p_sw->max_lid_ho = p_cache_sw->max_lid_ho;
348
349	if (p_sw->new_lft)
350		free(p_sw->new_lft);
351	p_sw->new_lft = p_cache_sw->lft;
352	p_cache_sw->lft = NULL;
353
354	p_sw->num_hops = p_cache_sw->num_hops;
355	p_cache_sw->num_hops = 0;
356	if (p_sw->hops)
357		free(p_sw->hops);
358	p_sw->hops = p_cache_sw->hops;
359	p_cache_sw->hops = NULL;
360}
361
362/**********************************************************************
363 **********************************************************************/
364
365static void __ucast_cache_dump(osm_ucast_mgr_t * p_mgr)
366{
367	cache_switch_t *p_sw;
368	unsigned i;
369
370	OSM_LOG_ENTER(p_mgr->p_log);
371
372	if (!osm_log_is_active(p_mgr->p_log, OSM_LOG_DEBUG))
373		goto Exit;
374
375	OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
376		"Dumping missing nodes/links as logged by unicast cache:\n");
377	for (p_sw = (cache_switch_t *) cl_qmap_head(&p_mgr->cache_sw_tbl);
378	     p_sw != (cache_switch_t *) cl_qmap_end(&p_mgr->cache_sw_tbl);
379	     p_sw = (cache_switch_t *) cl_qmap_next(&p_sw->map_item)) {
380
381		OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
382			"\t Switch lid %u %s%s\n",
383			__cache_sw_get_base_lid_ho(p_sw),
384			(__cache_sw_is_leaf(p_sw)) ? "[leaf switch] " : "",
385			(p_sw->dropped) ? "[whole switch missing]" : "");
386
387		for (i = 1; i < p_sw->num_ports; i++)
388			if (p_sw->ports[i].remote_lid_ho > 0)
389				OSM_LOG(p_mgr->p_log,
390					OSM_LOG_DEBUG,
391					"\t     - port %u -> lid %u %s\n",
392					i, p_sw->ports[i].remote_lid_ho,
393					(p_sw->ports[i].is_leaf) ?
394					"[remote node is leaf]" : "");
395	}
396Exit:
397	OSM_LOG_EXIT(p_mgr->p_log);
398}
399
400/**********************************************************************
401 **********************************************************************/
402
403void osm_ucast_cache_invalidate(osm_ucast_mgr_t * p_mgr)
404{
405	cache_switch_t *p_sw;
406	cache_switch_t *p_next_sw;
407
408	OSM_LOG_ENTER(p_mgr->p_log);
409	OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, "Invalidating unicast cache\n");
410
411	if (!p_mgr->cache_valid)
412		goto Exit;
413
414	p_mgr->cache_valid = FALSE;
415
416	p_next_sw = (cache_switch_t *) cl_qmap_head(&p_mgr->cache_sw_tbl);
417	while (p_next_sw !=
418	       (cache_switch_t *) cl_qmap_end(&p_mgr->cache_sw_tbl)) {
419		p_sw = p_next_sw;
420		p_next_sw = (cache_switch_t *) cl_qmap_next(&p_sw->map_item);
421		__cache_sw_destroy(p_sw);
422	}
423	cl_qmap_remove_all(&p_mgr->cache_sw_tbl);
424Exit:
425	OSM_LOG_EXIT(p_mgr->p_log);
426}
427
428/**********************************************************************
429 **********************************************************************/
430
431static void ucast_cache_validate(osm_ucast_mgr_t * p_mgr)
432{
433	cache_switch_t *p_cache_sw;
434	cache_switch_t *p_remote_cache_sw;
435	unsigned port_num;
436	unsigned max_ports;
437	uint8_t remote_node_type;
438	uint16_t lid_ho;
439	uint16_t remote_lid_ho;
440	osm_switch_t *p_sw;
441	osm_switch_t *p_remote_sw;
442	osm_node_t *p_node;
443	osm_physp_t *p_physp;
444	osm_physp_t *p_remote_physp;
445	osm_port_t *p_remote_port;
446	cl_qmap_t *p_sw_tbl;
447
448	OSM_LOG_ENTER(p_mgr->p_log);
449	if (!p_mgr->cache_valid)
450		goto Exit;
451
452	/* If there are no switches in the subnet, we are done */
453	p_sw_tbl = &p_mgr->p_subn->sw_guid_tbl;
454	if (cl_qmap_count(p_sw_tbl) == 0) {
455		osm_ucast_cache_invalidate(p_mgr);
456		goto Exit;
457	}
458
459	/*
460	 * Scan all the physical switch ports in the subnet.
461	 * If the port need_update flag is on, check whether
462	 * it's just some node/port reset or a cached topology
463	 * change. Otherwise the cache is invalid.
464	 */
465	for (p_sw = (osm_switch_t *) cl_qmap_head(p_sw_tbl);
466	     p_sw != (osm_switch_t *) cl_qmap_end(p_sw_tbl);
467	     p_sw = (osm_switch_t *) cl_qmap_next(&p_sw->map_item)) {
468
469		p_node = p_sw->p_node;
470
471		lid_ho = cl_ntoh16(osm_node_get_base_lid(p_node, 0));
472		p_cache_sw = __cache_get_sw(p_mgr, lid_ho);
473
474		max_ports = osm_node_get_num_physp(p_node);
475
476		/* skip port 0 */
477		for (port_num = 1; port_num < max_ports; port_num++) {
478
479			p_physp = osm_node_get_physp_ptr(p_node, port_num);
480
481			if (!p_physp || !p_physp->p_remote_physp ||
482			    !osm_physp_link_exists(p_physp,
483						   p_physp->p_remote_physp))
484				/* no valid link */
485				continue;
486
487			/*
488			 * While scanning all the physical ports in the subnet,
489			 * mark corresponding leaf switches in the cache.
490			 */
491			if (p_cache_sw &&
492			    !p_cache_sw->dropped &&
493			    !__cache_sw_is_leaf(p_cache_sw) &&
494			    p_physp->p_remote_physp->p_node &&
495			    osm_node_get_type(p_physp->p_remote_physp->
496					      p_node) != IB_NODE_TYPE_SWITCH)
497				__cache_sw_set_leaf(p_cache_sw);
498
499			if (!p_physp->need_update)
500				continue;
501
502			OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
503				"Checking switch lid %u, port %u\n",
504				lid_ho, port_num);
505
506			p_remote_physp = osm_physp_get_remote(p_physp);
507			remote_node_type =
508			    osm_node_get_type(p_remote_physp->p_node);
509
510			if (remote_node_type == IB_NODE_TYPE_SWITCH)
511				remote_lid_ho =
512				    cl_ntoh16(osm_node_get_base_lid
513					      (p_remote_physp->p_node, 0));
514			else
515				remote_lid_ho =
516				    cl_ntoh16(osm_node_get_base_lid
517					      (p_remote_physp->p_node,
518					       osm_physp_get_port_num
519					       (p_remote_physp)));
520
521			if (!p_cache_sw ||
522			    port_num >= p_cache_sw->num_ports ||
523			    !p_cache_sw->ports[port_num].remote_lid_ho) {
524				/*
525				 * There is some uncached change on the port.
526				 * In general, the reasons might be as follows:
527				 *  - switch reset
528				 *  - port reset (or port down/up)
529				 *  - quick connection location change
530				 *  - new link (or new switch)
531				 *
532				 * First two reasons allow cache usage, while
533				 * the last two reasons should invalidate cache.
534				 *
535				 * In case of quick connection location change,
536				 * cache would have been invalidated by
537				 * osm_ucast_cache_check_new_link() function.
538				 *
539				 * In case of new link between two known nodes,
540				 * cache also would have been invalidated by
541				 * osm_ucast_cache_check_new_link() function.
542				 *
543				 * Another reason is cached link between two
544				 * known switches went back. In this case the
545				 * osm_ucast_cache_check_new_link() function would
546				 * clear both sides of the link from the cache
547				 * during the discovery process, so effectively
548				 * this would be equivalent to port reset.
549				 *
550				 * So three possible reasons remain:
551				 *  - switch reset
552				 *  - port reset (or port down/up)
553				 *  - link of a new switch
554				 *
555				 * To validate cache, we need to check only the
556				 * third reason - link of a new node/switch:
557				 *  - If this is the local switch that is new,
558				 *    then it should have (p_sw->need_update == 2).
559				 *  - If the remote node is switch and it's new,
560				 *    then it also should have
561				 *    (p_sw->need_update == 2).
562				 *  - If the remote node is CA/RTR and it's new,
563				 *    then its port should have is_new flag on.
564				 */
565				if (p_sw->need_update == 2) {
566					OSM_LOG(p_mgr->p_log, OSM_LOG_INFO,
567						"New switch found (lid %u) - "
568						"cache is invalid\n", lid_ho);
569					osm_ucast_cache_invalidate(p_mgr);
570					goto Exit;
571				}
572
573				if (remote_node_type == IB_NODE_TYPE_SWITCH) {
574
575					p_remote_sw =
576					    p_remote_physp->p_node->sw;
577					if (p_remote_sw->need_update == 2) {
578						/* this could also be case of
579						   switch coming back with an
580						   additional link that it
581						   didn't have before */
582						OSM_LOG(p_mgr->p_log,
583							OSM_LOG_INFO,
584							"New switch/link found (lid %u) - "
585							"cache is invalid\n",
586							remote_lid_ho);
587						osm_ucast_cache_invalidate
588						    (p_mgr);
589						goto Exit;
590					}
591				} else {
592					/*
593					 * Remote node is CA/RTR.
594					 * Get p_port of the remote node and
595					 * check its p_port->is_new flag.
596					 */
597					p_remote_port =
598					    osm_get_port_by_guid(p_mgr->p_subn,
599								 osm_physp_get_port_guid
600								 (p_remote_physp));
601					if (p_remote_port->is_new) {
602						OSM_LOG(p_mgr->p_log,
603							OSM_LOG_INFO,
604							"New CA/RTR found (lid %u) - "
605							"cache is invalid\n",
606							remote_lid_ho);
607						osm_ucast_cache_invalidate
608						    (p_mgr);
609						goto Exit;
610					}
611				}
612			} else {
613				/*
614				 * The change on the port is cached.
615				 * In general, the reasons might be as follows:
616				 *  - link between two known nodes went back
617				 *  - one or more nodes went back, causing all
618				 *    the links to reappear
619				 *
620				 * If it was link that went back, then this case
621				 * would have been taken care of during the
622				 * discovery by osm_ucast_cache_check_new_link(),
623				 * so it's some node that went back.
624				 */
625				if ((p_cache_sw->ports[port_num].is_leaf &&
626				     remote_node_type == IB_NODE_TYPE_SWITCH) ||
627				    (!p_cache_sw->ports[port_num].is_leaf &&
628				     remote_node_type != IB_NODE_TYPE_SWITCH)) {
629					OSM_LOG(p_mgr->p_log, OSM_LOG_INFO,
630						"Remote node type change on switch lid %u, port %u - "
631						"cache is invalid\n",
632						lid_ho, port_num);
633					osm_ucast_cache_invalidate(p_mgr);
634					goto Exit;
635				}
636
637				if (p_cache_sw->ports[port_num].remote_lid_ho !=
638				    remote_lid_ho) {
639					OSM_LOG(p_mgr->p_log, OSM_LOG_INFO,
640						"Remote lid change on switch lid %u, port %u"
641						"(was %u, now %u) - cache is invalid\n",
642						lid_ho, port_num,
643						p_cache_sw->ports[port_num].
644						remote_lid_ho, remote_lid_ho);
645					osm_ucast_cache_invalidate(p_mgr);
646					goto Exit;
647				}
648
649				/*
650				 * We don't care who is the node that has
651				 * reappeared in the subnet (local or remote).
652				 * What's important that the cached link matches
653				 * the real fabrics link.
654				 * Just clean it from cache.
655				 */
656
657				p_cache_sw->ports[port_num].remote_lid_ho = 0;
658				p_cache_sw->ports[port_num].is_leaf = FALSE;
659				if (p_cache_sw->dropped) {
660					__cache_restore_ucast_info(p_mgr,
661								   p_cache_sw,
662								   p_sw);
663					p_cache_sw->dropped = FALSE;
664				}
665
666				OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
667					"Restored link from cache: lid %u, port %u to lid %u\n",
668					lid_ho, port_num, remote_lid_ho);
669			}
670		}
671	}
672
673	/* Remove all the cached switches that
674	   have all their ports restored */
675	__cache_cleanup_switches(p_mgr);
676
677	/*
678	 * Done scanning all the physical switch ports in the subnet.
679	 * Now we need to check the other side:
680	 * Scan all the cached switches and their ports:
681	 *  - If the cached switch is missing in the subnet
682	 *    (dropped flag is on), check that it's a leaf switch.
683	 *    If it's not a leaf, the cache is invalid, because
684	 *    cache can tolerate only leaf switch removal.
685	 *  - If the cached switch exists in fabric, check all
686	 *    its cached ports. These cached ports represent
687	 *    missing link in the fabric.
688	 *    The missing links that can be tolerated are:
689	 *      + link to missing CA/RTR
690	 *      + link to missing leaf switch
691	 */
692	for (p_cache_sw = (cache_switch_t *) cl_qmap_head(&p_mgr->cache_sw_tbl);
693	     p_cache_sw != (cache_switch_t *) cl_qmap_end(&p_mgr->cache_sw_tbl);
694	     p_cache_sw =
695	     (cache_switch_t *) cl_qmap_next(&p_cache_sw->map_item)) {
696
697		if (p_cache_sw->dropped) {
698			if (!__cache_sw_is_leaf(p_cache_sw)) {
699				OSM_LOG(p_mgr->p_log, OSM_LOG_INFO,
700					"Missing non-leaf switch (lid %u) - "
701					"cache is invalid\n",
702					__cache_sw_get_base_lid_ho(p_cache_sw));
703				osm_ucast_cache_invalidate(p_mgr);
704				goto Exit;
705			}
706
707			OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
708				"Missing leaf switch (lid %u) - "
709				"continuing validation\n",
710				__cache_sw_get_base_lid_ho(p_cache_sw));
711			continue;
712		}
713
714		for (port_num = 1; port_num < p_cache_sw->num_ports; port_num++) {
715			if (!p_cache_sw->ports[port_num].remote_lid_ho)
716				continue;
717
718			if (p_cache_sw->ports[port_num].is_leaf) {
719				CL_ASSERT(__cache_sw_is_leaf(p_cache_sw));
720				OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
721					"Switch lid %u, port %u: missing link to CA/RTR - "
722					"continuing validation\n",
723					__cache_sw_get_base_lid_ho(p_cache_sw),
724					port_num);
725				continue;
726			}
727
728			p_remote_cache_sw = __cache_get_sw(p_mgr,
729							   p_cache_sw->
730							   ports[port_num].
731							   remote_lid_ho);
732
733			if (!p_remote_cache_sw || !p_remote_cache_sw->dropped) {
734				OSM_LOG(p_mgr->p_log, OSM_LOG_INFO,
735					"Switch lid %u, port %u: missing link to existing switch - "
736					"cache is invalid\n",
737					__cache_sw_get_base_lid_ho(p_cache_sw),
738					port_num);
739				osm_ucast_cache_invalidate(p_mgr);
740				goto Exit;
741			}
742
743			if (!__cache_sw_is_leaf(p_remote_cache_sw)) {
744				OSM_LOG(p_mgr->p_log, OSM_LOG_INFO,
745					"Switch lid %u, port %u: missing link to non-leaf switch - "
746					"cache is invalid\n",
747					__cache_sw_get_base_lid_ho(p_cache_sw),
748					port_num);
749				osm_ucast_cache_invalidate(p_mgr);
750				goto Exit;
751			}
752
753			/*
754			 * At this point we know that the missing link is to
755			 * a leaf switch. However, one case deserves a special
756			 * treatment. If there was a link between two leaf
757			 * switches, then missing leaf switch might break
758			 * routing. It is possible that there are routes
759			 * that use leaf switches to get from switch to switch
760			 * and not just to get to the CAs behind the leaf switch.
761			 */
762			if (__cache_sw_is_leaf(p_cache_sw) &&
763			    __cache_sw_is_leaf(p_remote_cache_sw)) {
764				OSM_LOG(p_mgr->p_log, OSM_LOG_INFO,
765					"Switch lid %u, port %u: missing leaf-2-leaf link - "
766					"cache is invalid\n",
767					__cache_sw_get_base_lid_ho(p_cache_sw),
768					port_num);
769				osm_ucast_cache_invalidate(p_mgr);
770				goto Exit;
771			}
772
773			OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
774				"Switch lid %u, port %u: missing remote leaf switch - "
775				"continuing validation\n",
776				__cache_sw_get_base_lid_ho(p_cache_sw),
777				port_num);
778		}
779	}
780
781	OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, "Unicast cache is valid\n");
782	__ucast_cache_dump(p_mgr);
783Exit:
784	OSM_LOG_EXIT(p_mgr->p_log);
785}				/* osm_ucast_cache_validate() */
786
787/**********************************************************************
788 **********************************************************************/
789
790void osm_ucast_cache_check_new_link(osm_ucast_mgr_t * p_mgr,
791				    osm_node_t * p_node_1, uint8_t port_num_1,
792				    osm_node_t * p_node_2, uint8_t port_num_2)
793{
794	uint16_t lid_ho_1;
795	uint16_t lid_ho_2;
796
797	OSM_LOG_ENTER(p_mgr->p_log);
798
799	if (!p_mgr->cache_valid)
800		goto Exit;
801
802	__cache_check_link_change(p_mgr,
803				  osm_node_get_physp_ptr(p_node_1, port_num_1),
804				  osm_node_get_physp_ptr(p_node_2, port_num_2));
805
806	if (!p_mgr->cache_valid)
807		goto Exit;
808
809	if (osm_node_get_type(p_node_1) != IB_NODE_TYPE_SWITCH &&
810	    osm_node_get_type(p_node_2) != IB_NODE_TYPE_SWITCH) {
811		OSM_LOG(p_mgr->p_log, OSM_LOG_INFO,
812			"Found CA/RTR-2-CA/RTR link - cache is invalid\n");
813		osm_ucast_cache_invalidate(p_mgr);
814		goto Exit;
815	}
816
817	/* for code simplicity, we want the first node to be switch */
818	if (osm_node_get_type(p_node_1) != IB_NODE_TYPE_SWITCH) {
819		osm_node_t *tmp_node = p_node_1;
820		uint8_t tmp_port_num = port_num_1;
821		p_node_1 = p_node_2;
822		port_num_1 = port_num_2;
823		p_node_2 = tmp_node;
824		port_num_2 = tmp_port_num;
825	}
826
827	lid_ho_1 = cl_ntoh16(osm_node_get_base_lid(p_node_1, 0));
828
829	if (osm_node_get_type(p_node_2) == IB_NODE_TYPE_SWITCH)
830		lid_ho_2 = cl_ntoh16(osm_node_get_base_lid(p_node_2, 0));
831	else
832		lid_ho_2 =
833		    cl_ntoh16(osm_node_get_base_lid(p_node_2, port_num_2));
834
835	if (!lid_ho_1 || !lid_ho_2) {
836		/*
837		 * No lid assigned, which means that one of the nodes is new.
838		 * Need to wait for lid manager to process this node.
839		 * The switches and their links will be checked later when
840		 * the whole cache validity will be verified.
841		 */
842		OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
843			"Link port %u <-> %u reveals new node - cache will "
844			"be validated later\n", port_num_1, port_num_2);
845		goto Exit;
846	}
847
848	__cache_remove_port(p_mgr, lid_ho_1, port_num_1, lid_ho_2,
849			    (osm_node_get_type(p_node_2) !=
850			     IB_NODE_TYPE_SWITCH));
851
852	/* if node_2 is a switch, the link should be cleaned from its cache */
853
854	if (osm_node_get_type(p_node_2) == IB_NODE_TYPE_SWITCH)
855		__cache_remove_port(p_mgr, lid_ho_2,
856				    port_num_2, lid_ho_1, FALSE);
857
858Exit:
859	OSM_LOG_EXIT(p_mgr->p_log);
860}				/* osm_ucast_cache_check_new_link() */
861
862/**********************************************************************
863 **********************************************************************/
864
865void osm_ucast_cache_add_link(osm_ucast_mgr_t * p_mgr,
866			      osm_physp_t * p_physp1, osm_physp_t * p_physp2)
867{
868	osm_node_t *p_node_1 = p_physp1->p_node, *p_node_2 = p_physp2->p_node;
869	uint16_t lid_ho_1, lid_ho_2;
870
871	OSM_LOG_ENTER(p_mgr->p_log);
872
873	if (!p_mgr->cache_valid)
874		goto Exit;
875
876	if (osm_node_get_type(p_node_1) != IB_NODE_TYPE_SWITCH &&
877	    osm_node_get_type(p_node_2) != IB_NODE_TYPE_SWITCH) {
878		OSM_LOG(p_mgr->p_log, OSM_LOG_INFO,
879			"Dropping CA-2-CA link - cache invalid\n");
880		osm_ucast_cache_invalidate(p_mgr);
881		goto Exit;
882	}
883
884	if ((osm_node_get_type(p_node_1) == IB_NODE_TYPE_SWITCH &&
885	     !osm_node_get_physp_ptr(p_node_1, 0)) ||
886	    (osm_node_get_type(p_node_2) == IB_NODE_TYPE_SWITCH &&
887	     !osm_node_get_physp_ptr(p_node_2, 0))) {
888		/* we're caching a link when one of the nodes
889		   has already been dropped and cached */
890		OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
891			"Port %u <-> port %u: port0 on one of the nodes "
892			"has already been dropped and cached\n",
893			p_physp1->port_num, p_physp2->port_num);
894		goto Exit;
895	}
896
897	/* One of the nodes is switch. Just for code
898	   simplicity, make sure that it's the first node. */
899
900	if (osm_node_get_type(p_node_1) != IB_NODE_TYPE_SWITCH) {
901		osm_physp_t *tmp = p_physp1;
902		p_physp1 = p_physp2;
903		p_physp2 = tmp;
904		p_node_1 = p_physp1->p_node;
905		p_node_2 = p_physp2->p_node;
906	}
907
908	if (!p_node_1->sw) {
909		/* something is wrong - we'd better not use cache */
910		osm_ucast_cache_invalidate(p_mgr);
911		goto Exit;
912	}
913
914	lid_ho_1 = cl_ntoh16(osm_node_get_base_lid(p_node_1, 0));
915
916	if (osm_node_get_type(p_node_2) == IB_NODE_TYPE_SWITCH) {
917
918		if (!p_node_2->sw) {
919			/* something is wrong - we'd better not use cache */
920			osm_ucast_cache_invalidate(p_mgr);
921			goto Exit;
922		}
923
924		lid_ho_2 = cl_ntoh16(osm_node_get_base_lid(p_node_2, 0));
925
926		/* lost switch-2-switch link - cache both sides */
927		__cache_add_sw_link(p_mgr, p_physp1, lid_ho_2, FALSE);
928		__cache_add_sw_link(p_mgr, p_physp2, lid_ho_1, FALSE);
929	} else {
930		lid_ho_2 = cl_ntoh16(osm_physp_get_base_lid(p_physp2));
931
932		/* lost link to CA/RTR - cache only switch side */
933		__cache_add_sw_link(p_mgr, p_physp1, lid_ho_2, TRUE);
934	}
935
936Exit:
937	OSM_LOG_EXIT(p_mgr->p_log);
938}				/* osm_ucast_cache_add_link() */
939
940/**********************************************************************
941 **********************************************************************/
942
943void osm_ucast_cache_add_node(osm_ucast_mgr_t * p_mgr, osm_node_t * p_node)
944{
945	uint16_t lid_ho;
946	uint8_t max_ports;
947	uint8_t port_num;
948	osm_physp_t *p_physp;
949	cache_switch_t *p_cache_sw;
950
951	OSM_LOG_ENTER(p_mgr->p_log);
952
953	if (!p_mgr->cache_valid)
954		goto Exit;
955
956	if (osm_node_get_type(p_node) == IB_NODE_TYPE_SWITCH) {
957
958		lid_ho = cl_ntoh16(osm_node_get_base_lid(p_node, 0));
959
960		OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
961			"Caching dropped switch lid %u\n", lid_ho);
962
963		if (!p_node->sw) {
964			/* something is wrong - forget about cache */
965			OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR,
966				"ERR AD03: no switch info for node lid %u - "
967				"clearing cache\n", lid_ho);
968			osm_ucast_cache_invalidate(p_mgr);
969			goto Exit;
970		}
971
972		/* unlink (add to cache) all the ports of this switch */
973		max_ports = osm_node_get_num_physp(p_node);
974		for (port_num = 1; port_num < max_ports; port_num++) {
975
976			p_physp = osm_node_get_physp_ptr(p_node, port_num);
977			if (!p_physp || !p_physp->p_remote_physp)
978				continue;
979
980			osm_ucast_cache_add_link(p_mgr, p_physp,
981						 p_physp->p_remote_physp);
982		}
983
984		/*
985		 * All the ports have been dropped (cached).
986		 * If one of the ports was connected to CA/RTR,
987		 * then the cached switch would be marked as leaf.
988		 * If it isn't, then the dropped switch isn't a leaf,
989		 * and cache can't handle it.
990		 */
991
992		p_cache_sw = __cache_get_sw(p_mgr, lid_ho);
993		CL_ASSERT(p_cache_sw);
994
995		if (!__cache_sw_is_leaf(p_cache_sw)) {
996			OSM_LOG(p_mgr->p_log, OSM_LOG_INFO,
997				"Dropped non-leaf switch (lid %u) - "
998				"cache is invalid\n", lid_ho);
999			osm_ucast_cache_invalidate(p_mgr);
1000			goto Exit;
1001		}
1002
1003		p_cache_sw->dropped = TRUE;
1004
1005		if (!p_node->sw->num_hops || !p_node->sw->hops) {
1006			OSM_LOG(p_mgr->p_log, OSM_LOG_INFO,
1007				"No LID matrices for switch lid %u - "
1008				"cache is invalid\n", lid_ho);
1009			osm_ucast_cache_invalidate(p_mgr);
1010			goto Exit;
1011		}
1012
1013		/* lid matrices */
1014
1015		p_cache_sw->num_hops = p_node->sw->num_hops;
1016		p_node->sw->num_hops = 0;
1017		p_cache_sw->hops = p_node->sw->hops;
1018		p_node->sw->hops = NULL;
1019
1020		/* linear forwarding table */
1021
1022		if (p_node->sw->new_lft) {
1023			/* LFT buffer exists - we use it, because
1024			   it is more updated than the switch's LFT */
1025			p_cache_sw->lft = p_node->sw->new_lft;
1026			p_node->sw->new_lft = NULL;
1027		} else {
1028			/* no LFT buffer, so we use the switch's LFT */
1029			p_cache_sw->lft = p_node->sw->lft;
1030			p_node->sw->lft = NULL;
1031		}
1032		p_cache_sw->max_lid_ho = p_node->sw->max_lid_ho;
1033	} else {
1034		/* dropping CA/RTR: add to cache all the ports of this node */
1035		max_ports = osm_node_get_num_physp(p_node);
1036		for (port_num = 1; port_num < max_ports; port_num++) {
1037
1038			p_physp = osm_node_get_physp_ptr(p_node, port_num);
1039			if (!p_physp || !p_physp->p_remote_physp)
1040				continue;
1041
1042			CL_ASSERT(osm_node_get_type
1043				  (p_physp->p_remote_physp->p_node) ==
1044				  IB_NODE_TYPE_SWITCH);
1045
1046			osm_ucast_cache_add_link(p_mgr,
1047						 p_physp->p_remote_physp,
1048						 p_physp);
1049		}
1050	}
1051Exit:
1052	OSM_LOG_EXIT(p_mgr->p_log);
1053}				/* osm_ucast_cache_add_node() */
1054
1055/**********************************************************************
1056 **********************************************************************/
1057
1058int osm_ucast_cache_process(osm_ucast_mgr_t * p_mgr)
1059{
1060	cl_qmap_t *tbl = &p_mgr->p_subn->sw_guid_tbl;
1061	cl_map_item_t *item;
1062	osm_switch_t *p_sw;
1063
1064	if (!p_mgr->p_subn->opt.use_ucast_cache)
1065		return 1;
1066
1067	ucast_cache_validate(p_mgr);
1068	if (!p_mgr->cache_valid)
1069		return 1;
1070
1071	OSM_LOG(p_mgr->p_log, OSM_LOG_INFO,
1072		"Configuring switch tables using cached routing\n");
1073
1074	for (item = cl_qmap_head(tbl); item != cl_qmap_end(tbl);
1075	     item = cl_qmap_next(item)) {
1076		p_sw = (osm_switch_t *) item;
1077
1078		if (p_sw->need_update && !p_sw->new_lft) {
1079			/* no new routing was recently calculated for this
1080			   switch, but the LFT needs to be updated anyway */
1081			p_sw->new_lft = p_sw->lft;
1082			p_sw->lft = malloc(IB_LID_UCAST_END_HO + 1);
1083			if (!p_sw->lft)
1084				return IB_INSUFFICIENT_MEMORY;
1085			memset(p_sw->lft, OSM_NO_PATH, IB_LID_UCAST_END_HO + 1);
1086		}
1087
1088		osm_ucast_mgr_set_fwd_table(p_mgr, p_sw);
1089	}
1090
1091	return 0;
1092}
1093
1094/**********************************************************************
1095 **********************************************************************/
1096