ib_cache.c revision 331769
1/*
2 * Copyright (c) 2004 Topspin Communications.  All rights reserved.
3 * Copyright (c) 2005 Intel Corporation. All rights reserved.
4 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
5 * Copyright (c) 2005 Voltaire, Inc. All rights reserved.
6 *
7 * This software is available to you under a choice of one of two
8 * licenses.  You may choose to be licensed under the terms of the GNU
9 * General Public License (GPL) Version 2, available from the file
10 * COPYING in the main directory of this source tree, or the
11 * OpenIB.org BSD license below:
12 *
13 *     Redistribution and use in source and binary forms, with or
14 *     without modification, are permitted provided that the following
15 *     conditions are met:
16 *
17 *      - Redistributions of source code must retain the above
18 *        copyright notice, this list of conditions and the following
19 *        disclaimer.
20 *
21 *      - Redistributions in binary form must reproduce the above
22 *        copyright notice, this list of conditions and the following
23 *        disclaimer in the documentation and/or other materials
24 *        provided with the distribution.
25 *
26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 * SOFTWARE.
34 */
35
36#include <linux/module.h>
37#include <linux/errno.h>
38#include <linux/slab.h>
39#include <linux/workqueue.h>
40#include <linux/netdevice.h>
41
42#include <rdma/ib_cache.h>
43
44#include "core_priv.h"
45
46struct ib_pkey_cache {
47	int             table_len;
48	u16             table[0];
49};
50
51struct ib_update_work {
52	struct work_struct work;
53	struct ib_device  *device;
54	u8                 port_num;
55};
56
57union ib_gid zgid;
58EXPORT_SYMBOL(zgid);
59
60static const struct ib_gid_attr zattr;
61
62enum gid_attr_find_mask {
63	GID_ATTR_FIND_MASK_GID          = 1UL << 0,
64	GID_ATTR_FIND_MASK_NETDEV	= 1UL << 1,
65	GID_ATTR_FIND_MASK_DEFAULT	= 1UL << 2,
66	GID_ATTR_FIND_MASK_GID_TYPE	= 1UL << 3,
67};
68
69enum gid_table_entry_props {
70	GID_TABLE_ENTRY_INVALID		= 1UL << 0,
71	GID_TABLE_ENTRY_DEFAULT		= 1UL << 1,
72};
73
74enum gid_table_write_action {
75	GID_TABLE_WRITE_ACTION_ADD,
76	GID_TABLE_WRITE_ACTION_DEL,
77	/* MODIFY only updates the GID table. Currently only used by
78	 * ib_cache_update.
79	 */
80	GID_TABLE_WRITE_ACTION_MODIFY
81};
82
83struct ib_gid_table_entry {
84	unsigned long	    props;
85	union ib_gid        gid;
86	struct ib_gid_attr  attr;
87	void		   *context;
88};
89
90struct ib_gid_table {
91	int                  sz;
92	/* In RoCE, adding a GID to the table requires:
93	 * (a) Find if this GID is already exists.
94	 * (b) Find a free space.
95	 * (c) Write the new GID
96	 *
97	 * Delete requires different set of operations:
98	 * (a) Find the GID
99	 * (b) Delete it.
100	 *
101	 * Add/delete should be carried out atomically.
102	 * This is done by locking this mutex from multiple
103	 * writers. We don't need this lock for IB, as the MAD
104	 * layer replaces all entries. All data_vec entries
105	 * are locked by this lock.
106	 **/
107	struct mutex         lock;
108	/* This lock protects the table entries from being
109	 * read and written simultaneously.
110	 */
111	rwlock_t	     rwlock;
112	struct ib_gid_table_entry *data_vec;
113};
114
115static void dispatch_gid_change_event(struct ib_device *ib_dev, u8 port)
116{
117	if (rdma_cap_roce_gid_table(ib_dev, port)) {
118		struct ib_event event;
119
120		event.device		= ib_dev;
121		event.element.port_num	= port;
122		event.event		= IB_EVENT_GID_CHANGE;
123
124		ib_dispatch_event(&event);
125	}
126}
127
128static const char * const gid_type_str[] = {
129	[IB_GID_TYPE_IB]	= "IB/RoCE v1",
130	[IB_GID_TYPE_ROCE_UDP_ENCAP]	= "RoCE v2",
131};
132
133const char *ib_cache_gid_type_str(enum ib_gid_type gid_type)
134{
135	if (gid_type < ARRAY_SIZE(gid_type_str) && gid_type_str[gid_type])
136		return gid_type_str[gid_type];
137
138	return "Invalid GID type";
139}
140EXPORT_SYMBOL(ib_cache_gid_type_str);
141
142int ib_cache_gid_parse_type_str(const char *buf)
143{
144	unsigned int i;
145	size_t len;
146	int err = -EINVAL;
147
148	len = strlen(buf);
149	if (len == 0)
150		return -EINVAL;
151
152	if (buf[len - 1] == '\n')
153		len--;
154
155	for (i = 0; i < ARRAY_SIZE(gid_type_str); ++i)
156		if (gid_type_str[i] && !strncmp(buf, gid_type_str[i], len) &&
157		    len == strlen(gid_type_str[i])) {
158			err = i;
159			break;
160		}
161
162	return err;
163}
164EXPORT_SYMBOL(ib_cache_gid_parse_type_str);
165
166/* This function expects that rwlock will be write locked in all
167 * scenarios and that lock will be locked in sleep-able (RoCE)
168 * scenarios.
169 */
170static int write_gid(struct ib_device *ib_dev, u8 port,
171		     struct ib_gid_table *table, int ix,
172		     const union ib_gid *gid,
173		     const struct ib_gid_attr *attr,
174		     enum gid_table_write_action action,
175		     bool  default_gid)
176	__releases(&table->rwlock) __acquires(&table->rwlock)
177{
178	int ret = 0;
179	struct net_device *old_net_dev;
180	enum ib_gid_type old_gid_type;
181
182	/* in rdma_cap_roce_gid_table, this funciton should be protected by a
183	 * sleep-able lock.
184	 */
185
186	if (rdma_cap_roce_gid_table(ib_dev, port)) {
187		table->data_vec[ix].props |= GID_TABLE_ENTRY_INVALID;
188		write_unlock_irq(&table->rwlock);
189		/* GID_TABLE_WRITE_ACTION_MODIFY currently isn't supported by
190		 * RoCE providers and thus only updates the cache.
191		 */
192		if (action == GID_TABLE_WRITE_ACTION_ADD)
193			ret = ib_dev->add_gid(ib_dev, port, ix, gid, attr,
194					      &table->data_vec[ix].context);
195		else if (action == GID_TABLE_WRITE_ACTION_DEL)
196			ret = ib_dev->del_gid(ib_dev, port, ix,
197					      &table->data_vec[ix].context);
198		write_lock_irq(&table->rwlock);
199	}
200
201	old_net_dev = table->data_vec[ix].attr.ndev;
202	old_gid_type = table->data_vec[ix].attr.gid_type;
203	if (old_net_dev && old_net_dev != attr->ndev)
204		dev_put(old_net_dev);
205	/* if modify_gid failed, just delete the old gid */
206	if (ret || action == GID_TABLE_WRITE_ACTION_DEL) {
207		gid = &zgid;
208		attr = &zattr;
209		table->data_vec[ix].context = NULL;
210	}
211
212	memcpy(&table->data_vec[ix].gid, gid, sizeof(*gid));
213	memcpy(&table->data_vec[ix].attr, attr, sizeof(*attr));
214	if (default_gid) {
215		table->data_vec[ix].props |= GID_TABLE_ENTRY_DEFAULT;
216		if (action == GID_TABLE_WRITE_ACTION_DEL)
217			table->data_vec[ix].attr.gid_type = old_gid_type;
218	}
219	if (table->data_vec[ix].attr.ndev &&
220	    table->data_vec[ix].attr.ndev != old_net_dev)
221		dev_hold(table->data_vec[ix].attr.ndev);
222
223	table->data_vec[ix].props &= ~GID_TABLE_ENTRY_INVALID;
224
225	return ret;
226}
227
228static int add_gid(struct ib_device *ib_dev, u8 port,
229		   struct ib_gid_table *table, int ix,
230		   const union ib_gid *gid,
231		   const struct ib_gid_attr *attr,
232		   bool  default_gid) {
233	return write_gid(ib_dev, port, table, ix, gid, attr,
234			 GID_TABLE_WRITE_ACTION_ADD, default_gid);
235}
236
237static int modify_gid(struct ib_device *ib_dev, u8 port,
238		      struct ib_gid_table *table, int ix,
239		      const union ib_gid *gid,
240		      const struct ib_gid_attr *attr,
241		      bool  default_gid) {
242	return write_gid(ib_dev, port, table, ix, gid, attr,
243			 GID_TABLE_WRITE_ACTION_MODIFY, default_gid);
244}
245
246static int del_gid(struct ib_device *ib_dev, u8 port,
247		   struct ib_gid_table *table, int ix,
248		   bool  default_gid) {
249	return write_gid(ib_dev, port, table, ix, &zgid, &zattr,
250			 GID_TABLE_WRITE_ACTION_DEL, default_gid);
251}
252
253/* rwlock should be read locked */
254static int find_gid(struct ib_gid_table *table, const union ib_gid *gid,
255		    const struct ib_gid_attr *val, bool default_gid,
256		    unsigned long mask, int *pempty)
257{
258	int i = 0;
259	int found = -1;
260	int empty = pempty ? -1 : 0;
261
262	while (i < table->sz && (found < 0 || empty < 0)) {
263		struct ib_gid_table_entry *data = &table->data_vec[i];
264		struct ib_gid_attr *attr = &data->attr;
265		int curr_index = i;
266
267		i++;
268
269		if (data->props & GID_TABLE_ENTRY_INVALID)
270			continue;
271
272		if (empty < 0)
273			if (!memcmp(&data->gid, &zgid, sizeof(*gid)) &&
274			    !memcmp(attr, &zattr, sizeof(*attr)) &&
275			    !data->props)
276				empty = curr_index;
277
278		if (found >= 0)
279			continue;
280
281		if (mask & GID_ATTR_FIND_MASK_GID_TYPE &&
282		    attr->gid_type != val->gid_type)
283			continue;
284
285		if (mask & GID_ATTR_FIND_MASK_GID &&
286		    memcmp(gid, &data->gid, sizeof(*gid)))
287			continue;
288
289		if (mask & GID_ATTR_FIND_MASK_NETDEV &&
290		    attr->ndev != val->ndev)
291			continue;
292
293		if (mask & GID_ATTR_FIND_MASK_DEFAULT &&
294		    !!(data->props & GID_TABLE_ENTRY_DEFAULT) !=
295		    default_gid)
296			continue;
297
298		found = curr_index;
299	}
300
301	if (pempty)
302		*pempty = empty;
303
304	return found;
305}
306
307static void addrconf_ifid_eui48(u8 *eui, struct net_device *dev)
308{
309	if (dev->if_addrlen != ETH_ALEN)
310		return;
311	memcpy(eui, IF_LLADDR(dev), 3);
312	memcpy(eui + 5, IF_LLADDR(dev) + 3, 3);
313
314	/* NOTE: The scope ID is added by the GID to IP conversion */
315
316	eui[3] = 0xFF;
317	eui[4] = 0xFE;
318	eui[0] ^= 2;
319}
320
321static void make_default_gid(struct  net_device *dev, union ib_gid *gid)
322{
323	gid->global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL);
324	addrconf_ifid_eui48(&gid->raw[8], dev);
325}
326
327int ib_cache_gid_add(struct ib_device *ib_dev, u8 port,
328		     union ib_gid *gid, struct ib_gid_attr *attr)
329{
330	struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
331	struct ib_gid_table *table;
332	int ix;
333	int ret = 0;
334	struct net_device *idev;
335	int empty;
336
337	table = ports_table[port - rdma_start_port(ib_dev)];
338
339	if (!memcmp(gid, &zgid, sizeof(*gid)))
340		return -EINVAL;
341
342	if (ib_dev->get_netdev) {
343		idev = ib_dev->get_netdev(ib_dev, port);
344		if (idev && attr->ndev != idev) {
345			union ib_gid default_gid;
346
347			/* Adding default GIDs in not permitted */
348			make_default_gid(idev, &default_gid);
349			if (!memcmp(gid, &default_gid, sizeof(*gid))) {
350				dev_put(idev);
351				return -EPERM;
352			}
353		}
354		if (idev)
355			dev_put(idev);
356	}
357
358	mutex_lock(&table->lock);
359	write_lock_irq(&table->rwlock);
360
361	ix = find_gid(table, gid, attr, false, GID_ATTR_FIND_MASK_GID |
362		      GID_ATTR_FIND_MASK_GID_TYPE |
363		      GID_ATTR_FIND_MASK_NETDEV, &empty);
364	if (ix >= 0)
365		goto out_unlock;
366
367	if (empty < 0) {
368		ret = -ENOSPC;
369		goto out_unlock;
370	}
371
372	ret = add_gid(ib_dev, port, table, empty, gid, attr, false);
373	if (!ret)
374		dispatch_gid_change_event(ib_dev, port);
375
376out_unlock:
377	write_unlock_irq(&table->rwlock);
378	mutex_unlock(&table->lock);
379	return ret;
380}
381
382int ib_cache_gid_del(struct ib_device *ib_dev, u8 port,
383		     union ib_gid *gid, struct ib_gid_attr *attr)
384{
385	struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
386	struct ib_gid_table *table;
387	int ix;
388
389	table = ports_table[port - rdma_start_port(ib_dev)];
390
391	mutex_lock(&table->lock);
392	write_lock_irq(&table->rwlock);
393
394	ix = find_gid(table, gid, attr, false,
395		      GID_ATTR_FIND_MASK_GID	  |
396		      GID_ATTR_FIND_MASK_GID_TYPE |
397		      GID_ATTR_FIND_MASK_NETDEV	  |
398		      GID_ATTR_FIND_MASK_DEFAULT,
399		      NULL);
400	if (ix < 0)
401		goto out_unlock;
402
403	if (!del_gid(ib_dev, port, table, ix, false))
404		dispatch_gid_change_event(ib_dev, port);
405
406out_unlock:
407	write_unlock_irq(&table->rwlock);
408	mutex_unlock(&table->lock);
409	return 0;
410}
411
412int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port,
413				     struct net_device *ndev)
414{
415	struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
416	struct ib_gid_table *table;
417	int ix;
418	bool deleted = false;
419
420	table  = ports_table[port - rdma_start_port(ib_dev)];
421
422	mutex_lock(&table->lock);
423	write_lock_irq(&table->rwlock);
424
425	for (ix = 0; ix < table->sz; ix++)
426		if (table->data_vec[ix].attr.ndev == ndev)
427			if (!del_gid(ib_dev, port, table, ix,
428				     !!(table->data_vec[ix].props &
429					GID_TABLE_ENTRY_DEFAULT)))
430				deleted = true;
431
432	write_unlock_irq(&table->rwlock);
433	mutex_unlock(&table->lock);
434
435	if (deleted)
436		dispatch_gid_change_event(ib_dev, port);
437
438	return 0;
439}
440
441static int __ib_cache_gid_get(struct ib_device *ib_dev, u8 port, int index,
442			      union ib_gid *gid, struct ib_gid_attr *attr)
443{
444	struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
445	struct ib_gid_table *table;
446
447	table = ports_table[port - rdma_start_port(ib_dev)];
448
449	if (index < 0 || index >= table->sz)
450		return -EINVAL;
451
452	if (table->data_vec[index].props & GID_TABLE_ENTRY_INVALID)
453		return -EAGAIN;
454
455	memcpy(gid, &table->data_vec[index].gid, sizeof(*gid));
456	if (attr) {
457		memcpy(attr, &table->data_vec[index].attr, sizeof(*attr));
458		/* make sure network device is valid and attached */
459		if (attr->ndev != NULL &&
460		    (attr->ndev->if_flags & IFF_DYING) == 0 &&
461		    attr->ndev->if_addr != NULL)
462			dev_hold(attr->ndev);
463		else
464			attr->ndev = NULL;
465	}
466
467	return 0;
468}
469
470static int _ib_cache_gid_table_find(struct ib_device *ib_dev,
471				    const union ib_gid *gid,
472				    const struct ib_gid_attr *val,
473				    unsigned long mask,
474				    u8 *port, u16 *index)
475{
476	struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
477	struct ib_gid_table *table;
478	u8 p;
479	int local_index;
480	unsigned long flags;
481
482	for (p = 0; p < ib_dev->phys_port_cnt; p++) {
483		table = ports_table[p];
484		read_lock_irqsave(&table->rwlock, flags);
485		local_index = find_gid(table, gid, val, false, mask, NULL);
486		if (local_index >= 0) {
487			if (index)
488				*index = local_index;
489			if (port)
490				*port = p + rdma_start_port(ib_dev);
491			read_unlock_irqrestore(&table->rwlock, flags);
492			return 0;
493		}
494		read_unlock_irqrestore(&table->rwlock, flags);
495	}
496
497	return -ENOENT;
498}
499
500static int ib_cache_gid_find(struct ib_device *ib_dev,
501			     const union ib_gid *gid,
502			     enum ib_gid_type gid_type,
503			     struct net_device *ndev, u8 *port,
504			     u16 *index)
505{
506	unsigned long mask = GID_ATTR_FIND_MASK_GID |
507			     GID_ATTR_FIND_MASK_GID_TYPE;
508	struct ib_gid_attr gid_attr_val = {.ndev = ndev, .gid_type = gid_type};
509
510	if (ndev)
511		mask |= GID_ATTR_FIND_MASK_NETDEV;
512
513	return _ib_cache_gid_table_find(ib_dev, gid, &gid_attr_val,
514					mask, port, index);
515}
516
517int ib_find_cached_gid_by_port(struct ib_device *ib_dev,
518			       const union ib_gid *gid,
519			       enum ib_gid_type gid_type,
520			       u8 port, struct net_device *ndev,
521			       u16 *index)
522{
523	int local_index;
524	struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
525	struct ib_gid_table *table;
526	unsigned long mask = GID_ATTR_FIND_MASK_GID |
527			     GID_ATTR_FIND_MASK_GID_TYPE;
528	struct ib_gid_attr val = {.ndev = ndev, .gid_type = gid_type};
529	unsigned long flags;
530
531	if (port < rdma_start_port(ib_dev) ||
532	    port > rdma_end_port(ib_dev))
533		return -ENOENT;
534
535	table = ports_table[port - rdma_start_port(ib_dev)];
536
537	if (ndev)
538		mask |= GID_ATTR_FIND_MASK_NETDEV;
539
540	read_lock_irqsave(&table->rwlock, flags);
541	local_index = find_gid(table, gid, &val, false, mask, NULL);
542	if (local_index >= 0) {
543		if (index)
544			*index = local_index;
545		read_unlock_irqrestore(&table->rwlock, flags);
546		return 0;
547	}
548
549	read_unlock_irqrestore(&table->rwlock, flags);
550	return -ENOENT;
551}
552EXPORT_SYMBOL(ib_find_cached_gid_by_port);
553
554/**
555 * ib_find_gid_by_filter - Returns the GID table index where a specified
556 * GID value occurs
557 * @device: The device to query.
558 * @gid: The GID value to search for.
559 * @port_num: The port number of the device where the GID value could be
560 *   searched.
561 * @filter: The filter function is executed on any matching GID in the table.
562 *   If the filter function returns true, the corresponding index is returned,
563 *   otherwise, we continue searching the GID table. It's guaranteed that
564 *   while filter is executed, ndev field is valid and the structure won't
565 *   change. filter is executed in an atomic context. filter must not be NULL.
566 * @index: The index into the cached GID table where the GID was found.  This
567 *   parameter may be NULL.
568 *
569 * ib_cache_gid_find_by_filter() searches for the specified GID value
570 * of which the filter function returns true in the port's GID table.
571 * This function is only supported on RoCE ports.
572 *
573 */
574static int ib_cache_gid_find_by_filter(struct ib_device *ib_dev,
575				       const union ib_gid *gid,
576				       u8 port,
577				       bool (*filter)(const union ib_gid *,
578						      const struct ib_gid_attr *,
579						      void *),
580				       void *context,
581				       u16 *index)
582{
583	struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
584	struct ib_gid_table *table;
585	unsigned int i;
586	unsigned long flags;
587	bool found = false;
588
589	if (!ports_table)
590		return -EOPNOTSUPP;
591
592	if (port < rdma_start_port(ib_dev) ||
593	    port > rdma_end_port(ib_dev) ||
594	    !rdma_protocol_roce(ib_dev, port))
595		return -EPROTONOSUPPORT;
596
597	table = ports_table[port - rdma_start_port(ib_dev)];
598
599	read_lock_irqsave(&table->rwlock, flags);
600	for (i = 0; i < table->sz; i++) {
601		struct ib_gid_attr attr;
602
603		if (table->data_vec[i].props & GID_TABLE_ENTRY_INVALID)
604			goto next;
605
606		if (memcmp(gid, &table->data_vec[i].gid, sizeof(*gid)))
607			goto next;
608
609		memcpy(&attr, &table->data_vec[i].attr, sizeof(attr));
610
611		if (filter(gid, &attr, context))
612			found = true;
613
614next:
615		if (found)
616			break;
617	}
618	read_unlock_irqrestore(&table->rwlock, flags);
619
620	if (!found)
621		return -ENOENT;
622
623	if (index)
624		*index = i;
625	return 0;
626}
627
628static struct ib_gid_table *alloc_gid_table(int sz)
629{
630	struct ib_gid_table *table =
631		kzalloc(sizeof(struct ib_gid_table), GFP_KERNEL);
632
633	if (!table)
634		return NULL;
635
636	table->data_vec = kcalloc(sz, sizeof(*table->data_vec), GFP_KERNEL);
637	if (!table->data_vec)
638		goto err_free_table;
639
640	mutex_init(&table->lock);
641
642	table->sz = sz;
643	rwlock_init(&table->rwlock);
644
645	return table;
646
647err_free_table:
648	kfree(table);
649	return NULL;
650}
651
652static void release_gid_table(struct ib_gid_table *table)
653{
654	if (table) {
655		kfree(table->data_vec);
656		kfree(table);
657	}
658}
659
660static void cleanup_gid_table_port(struct ib_device *ib_dev, u8 port,
661				   struct ib_gid_table *table)
662{
663	int i;
664	bool deleted = false;
665
666	if (!table)
667		return;
668
669	write_lock_irq(&table->rwlock);
670	for (i = 0; i < table->sz; ++i) {
671		if (memcmp(&table->data_vec[i].gid, &zgid,
672			   sizeof(table->data_vec[i].gid)))
673			if (!del_gid(ib_dev, port, table, i,
674				     table->data_vec[i].props &
675				     GID_ATTR_FIND_MASK_DEFAULT))
676				deleted = true;
677	}
678	write_unlock_irq(&table->rwlock);
679
680	if (deleted)
681		dispatch_gid_change_event(ib_dev, port);
682}
683
684void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port,
685				  struct net_device *ndev,
686				  unsigned long gid_type_mask,
687				  enum ib_cache_gid_default_mode mode)
688{
689	struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
690	union ib_gid gid;
691	struct ib_gid_attr gid_attr;
692	struct ib_gid_attr zattr_type = zattr;
693	struct ib_gid_table *table;
694	unsigned int gid_type;
695
696	table  = ports_table[port - rdma_start_port(ib_dev)];
697
698	make_default_gid(ndev, &gid);
699	memset(&gid_attr, 0, sizeof(gid_attr));
700	gid_attr.ndev = ndev;
701
702	for (gid_type = 0; gid_type < IB_GID_TYPE_SIZE; ++gid_type) {
703		int ix;
704		union ib_gid current_gid;
705		struct ib_gid_attr current_gid_attr = {};
706
707		if (1UL << gid_type & ~gid_type_mask)
708			continue;
709
710		gid_attr.gid_type = gid_type;
711
712		mutex_lock(&table->lock);
713		write_lock_irq(&table->rwlock);
714		ix = find_gid(table, NULL, &gid_attr, true,
715			      GID_ATTR_FIND_MASK_GID_TYPE |
716			      GID_ATTR_FIND_MASK_DEFAULT,
717			      NULL);
718
719		/* Coudn't find default GID location */
720		if (WARN_ON(ix < 0))
721			goto release;
722
723		zattr_type.gid_type = gid_type;
724
725		if (!__ib_cache_gid_get(ib_dev, port, ix,
726					&current_gid, &current_gid_attr) &&
727		    mode == IB_CACHE_GID_DEFAULT_MODE_SET &&
728		    !memcmp(&gid, &current_gid, sizeof(gid)) &&
729		    !memcmp(&gid_attr, &current_gid_attr, sizeof(gid_attr)))
730			goto release;
731
732		if (memcmp(&current_gid, &zgid, sizeof(current_gid)) ||
733		    memcmp(&current_gid_attr, &zattr_type,
734			   sizeof(current_gid_attr))) {
735			if (del_gid(ib_dev, port, table, ix, true)) {
736				pr_warn("ib_cache_gid: can't delete index %d for default gid %pI6\n",
737					ix, gid.raw);
738				goto release;
739			} else {
740				dispatch_gid_change_event(ib_dev, port);
741			}
742		}
743
744		if (mode == IB_CACHE_GID_DEFAULT_MODE_SET) {
745			if (add_gid(ib_dev, port, table, ix, &gid, &gid_attr, true))
746				pr_warn("ib_cache_gid: unable to add default gid %pI6\n",
747					gid.raw);
748			else
749				dispatch_gid_change_event(ib_dev, port);
750		}
751
752release:
753		if (current_gid_attr.ndev)
754			dev_put(current_gid_attr.ndev);
755		write_unlock_irq(&table->rwlock);
756		mutex_unlock(&table->lock);
757	}
758}
759
760static int gid_table_reserve_default(struct ib_device *ib_dev, u8 port,
761				     struct ib_gid_table *table)
762{
763	unsigned int i;
764	unsigned long roce_gid_type_mask;
765	unsigned int num_default_gids;
766	unsigned int current_gid = 0;
767
768	roce_gid_type_mask = roce_gid_type_mask_support(ib_dev, port);
769	num_default_gids = hweight_long(roce_gid_type_mask);
770	for (i = 0; i < num_default_gids && i < table->sz; i++) {
771		struct ib_gid_table_entry *entry =
772			&table->data_vec[i];
773
774		entry->props |= GID_TABLE_ENTRY_DEFAULT;
775		current_gid = find_next_bit(&roce_gid_type_mask,
776					    BITS_PER_LONG,
777					    current_gid);
778		entry->attr.gid_type = current_gid++;
779	}
780
781	return 0;
782}
783
784static int _gid_table_setup_one(struct ib_device *ib_dev)
785{
786	u8 port;
787	struct ib_gid_table **table;
788	int err = 0;
789
790	table = kcalloc(ib_dev->phys_port_cnt, sizeof(*table), GFP_KERNEL);
791
792	if (!table) {
793		pr_warn("failed to allocate ib gid cache for %s\n",
794			ib_dev->name);
795		return -ENOMEM;
796	}
797
798	for (port = 0; port < ib_dev->phys_port_cnt; port++) {
799		u8 rdma_port = port + rdma_start_port(ib_dev);
800
801		table[port] =
802			alloc_gid_table(
803				ib_dev->port_immutable[rdma_port].gid_tbl_len);
804		if (!table[port]) {
805			err = -ENOMEM;
806			goto rollback_table_setup;
807		}
808
809		err = gid_table_reserve_default(ib_dev,
810						port + rdma_start_port(ib_dev),
811						table[port]);
812		if (err)
813			goto rollback_table_setup;
814	}
815
816	ib_dev->cache.gid_cache = table;
817	return 0;
818
819rollback_table_setup:
820	for (port = 0; port < ib_dev->phys_port_cnt; port++) {
821		cleanup_gid_table_port(ib_dev, port + rdma_start_port(ib_dev),
822				       table[port]);
823		release_gid_table(table[port]);
824	}
825
826	kfree(table);
827	return err;
828}
829
830static void gid_table_release_one(struct ib_device *ib_dev)
831{
832	struct ib_gid_table **table = ib_dev->cache.gid_cache;
833	u8 port;
834
835	if (!table)
836		return;
837
838	for (port = 0; port < ib_dev->phys_port_cnt; port++)
839		release_gid_table(table[port]);
840
841	kfree(table);
842	ib_dev->cache.gid_cache = NULL;
843}
844
845static void gid_table_cleanup_one(struct ib_device *ib_dev)
846{
847	struct ib_gid_table **table = ib_dev->cache.gid_cache;
848	u8 port;
849
850	if (!table)
851		return;
852
853	for (port = 0; port < ib_dev->phys_port_cnt; port++)
854		cleanup_gid_table_port(ib_dev, port + rdma_start_port(ib_dev),
855				       table[port]);
856}
857
858static int gid_table_setup_one(struct ib_device *ib_dev)
859{
860	int err;
861
862	err = _gid_table_setup_one(ib_dev);
863
864	if (err)
865		return err;
866
867	err = roce_rescan_device(ib_dev);
868
869	if (err) {
870		gid_table_cleanup_one(ib_dev);
871		gid_table_release_one(ib_dev);
872	}
873
874	return err;
875}
876
877int ib_get_cached_gid(struct ib_device *device,
878		      u8                port_num,
879		      int               index,
880		      union ib_gid     *gid,
881		      struct ib_gid_attr *gid_attr)
882{
883	int res;
884	unsigned long flags;
885	struct ib_gid_table **ports_table = device->cache.gid_cache;
886	struct ib_gid_table *table = ports_table[port_num - rdma_start_port(device)];
887
888	if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device))
889		return -EINVAL;
890
891	read_lock_irqsave(&table->rwlock, flags);
892	res = __ib_cache_gid_get(device, port_num, index, gid, gid_attr);
893	read_unlock_irqrestore(&table->rwlock, flags);
894
895	return res;
896}
897EXPORT_SYMBOL(ib_get_cached_gid);
898
899int ib_find_cached_gid(struct ib_device *device,
900		       const union ib_gid *gid,
901		       enum ib_gid_type gid_type,
902		       struct net_device *ndev,
903		       u8               *port_num,
904		       u16              *index)
905{
906	return ib_cache_gid_find(device, gid, gid_type, ndev, port_num, index);
907}
908EXPORT_SYMBOL(ib_find_cached_gid);
909
910int ib_find_gid_by_filter(struct ib_device *device,
911			  const union ib_gid *gid,
912			  u8 port_num,
913			  bool (*filter)(const union ib_gid *gid,
914					 const struct ib_gid_attr *,
915					 void *),
916			  void *context, u16 *index)
917{
918	/* Only RoCE GID table supports filter function */
919	if (!rdma_cap_roce_gid_table(device, port_num) && filter)
920		return -EPROTONOSUPPORT;
921
922	return ib_cache_gid_find_by_filter(device, gid,
923					   port_num, filter,
924					   context, index);
925}
926EXPORT_SYMBOL(ib_find_gid_by_filter);
927
928int ib_get_cached_pkey(struct ib_device *device,
929		       u8                port_num,
930		       int               index,
931		       u16              *pkey)
932{
933	struct ib_pkey_cache *cache;
934	unsigned long flags;
935	int ret = 0;
936
937	if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device))
938		return -EINVAL;
939
940	read_lock_irqsave(&device->cache.lock, flags);
941
942	cache = device->cache.pkey_cache[port_num - rdma_start_port(device)];
943
944	if (index < 0 || index >= cache->table_len)
945		ret = -EINVAL;
946	else
947		*pkey = cache->table[index];
948
949	read_unlock_irqrestore(&device->cache.lock, flags);
950
951	return ret;
952}
953EXPORT_SYMBOL(ib_get_cached_pkey);
954
955int ib_find_cached_pkey(struct ib_device *device,
956			u8                port_num,
957			u16               pkey,
958			u16              *index)
959{
960	struct ib_pkey_cache *cache;
961	unsigned long flags;
962	int i;
963	int ret = -ENOENT;
964	int partial_ix = -1;
965
966	if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device))
967		return -EINVAL;
968
969	read_lock_irqsave(&device->cache.lock, flags);
970
971	cache = device->cache.pkey_cache[port_num - rdma_start_port(device)];
972
973	*index = -1;
974
975	for (i = 0; i < cache->table_len; ++i)
976		if ((cache->table[i] & 0x7fff) == (pkey & 0x7fff)) {
977			if (cache->table[i] & 0x8000) {
978				*index = i;
979				ret = 0;
980				break;
981			} else
982				partial_ix = i;
983		}
984
985	if (ret && partial_ix >= 0) {
986		*index = partial_ix;
987		ret = 0;
988	}
989
990	read_unlock_irqrestore(&device->cache.lock, flags);
991
992	return ret;
993}
994EXPORT_SYMBOL(ib_find_cached_pkey);
995
996int ib_find_exact_cached_pkey(struct ib_device *device,
997			      u8                port_num,
998			      u16               pkey,
999			      u16              *index)
1000{
1001	struct ib_pkey_cache *cache;
1002	unsigned long flags;
1003	int i;
1004	int ret = -ENOENT;
1005
1006	if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device))
1007		return -EINVAL;
1008
1009	read_lock_irqsave(&device->cache.lock, flags);
1010
1011	cache = device->cache.pkey_cache[port_num - rdma_start_port(device)];
1012
1013	*index = -1;
1014
1015	for (i = 0; i < cache->table_len; ++i)
1016		if (cache->table[i] == pkey) {
1017			*index = i;
1018			ret = 0;
1019			break;
1020		}
1021
1022	read_unlock_irqrestore(&device->cache.lock, flags);
1023
1024	return ret;
1025}
1026EXPORT_SYMBOL(ib_find_exact_cached_pkey);
1027
1028int ib_get_cached_lmc(struct ib_device *device,
1029		      u8                port_num,
1030		      u8                *lmc)
1031{
1032	unsigned long flags;
1033	int ret = 0;
1034
1035	if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device))
1036		return -EINVAL;
1037
1038	read_lock_irqsave(&device->cache.lock, flags);
1039	*lmc = device->cache.lmc_cache[port_num - rdma_start_port(device)];
1040	read_unlock_irqrestore(&device->cache.lock, flags);
1041
1042	return ret;
1043}
1044EXPORT_SYMBOL(ib_get_cached_lmc);
1045
1046static void ib_cache_update(struct ib_device *device,
1047			    u8                port)
1048{
1049	struct ib_port_attr       *tprops = NULL;
1050	struct ib_pkey_cache      *pkey_cache = NULL, *old_pkey_cache;
1051	struct ib_gid_cache {
1052		int             table_len;
1053		union ib_gid    table[0];
1054	}			  *gid_cache = NULL;
1055	int                        i;
1056	int                        ret;
1057	struct ib_gid_table	  *table;
1058	struct ib_gid_table	 **ports_table = device->cache.gid_cache;
1059	bool			   use_roce_gid_table =
1060					rdma_cap_roce_gid_table(device, port);
1061
1062	if (port < rdma_start_port(device) || port > rdma_end_port(device))
1063		return;
1064
1065	table = ports_table[port - rdma_start_port(device)];
1066
1067	tprops = kmalloc(sizeof *tprops, GFP_KERNEL);
1068	if (!tprops)
1069		return;
1070
1071	ret = ib_query_port(device, port, tprops);
1072	if (ret) {
1073		pr_warn("ib_query_port failed (%d) for %s\n",
1074			ret, device->name);
1075		goto err;
1076	}
1077
1078	pkey_cache = kmalloc(sizeof *pkey_cache + tprops->pkey_tbl_len *
1079			     sizeof *pkey_cache->table, GFP_KERNEL);
1080	if (!pkey_cache)
1081		goto err;
1082
1083	pkey_cache->table_len = tprops->pkey_tbl_len;
1084
1085	if (!use_roce_gid_table) {
1086		gid_cache = kmalloc(sizeof(*gid_cache) + tprops->gid_tbl_len *
1087			    sizeof(*gid_cache->table), GFP_KERNEL);
1088		if (!gid_cache)
1089			goto err;
1090
1091		gid_cache->table_len = tprops->gid_tbl_len;
1092	}
1093
1094	for (i = 0; i < pkey_cache->table_len; ++i) {
1095		ret = ib_query_pkey(device, port, i, pkey_cache->table + i);
1096		if (ret) {
1097			pr_warn("ib_query_pkey failed (%d) for %s (index %d)\n",
1098				ret, device->name, i);
1099			goto err;
1100		}
1101	}
1102
1103	if (!use_roce_gid_table) {
1104		for (i = 0;  i < gid_cache->table_len; ++i) {
1105			ret = ib_query_gid(device, port, i,
1106					   gid_cache->table + i, NULL);
1107			if (ret) {
1108				pr_warn("ib_query_gid failed (%d) for %s (index %d)\n",
1109					ret, device->name, i);
1110				goto err;
1111			}
1112		}
1113	}
1114
1115	write_lock_irq(&device->cache.lock);
1116
1117	old_pkey_cache = device->cache.pkey_cache[port - rdma_start_port(device)];
1118
1119	device->cache.pkey_cache[port - rdma_start_port(device)] = pkey_cache;
1120	if (!use_roce_gid_table) {
1121		write_lock(&table->rwlock);
1122		for (i = 0; i < gid_cache->table_len; i++) {
1123			modify_gid(device, port, table, i, gid_cache->table + i,
1124				   &zattr, false);
1125		}
1126		write_unlock(&table->rwlock);
1127	}
1128
1129	device->cache.lmc_cache[port - rdma_start_port(device)] = tprops->lmc;
1130
1131	write_unlock_irq(&device->cache.lock);
1132
1133	kfree(gid_cache);
1134	kfree(old_pkey_cache);
1135	kfree(tprops);
1136	return;
1137
1138err:
1139	kfree(pkey_cache);
1140	kfree(gid_cache);
1141	kfree(tprops);
1142}
1143
1144static void ib_cache_task(struct work_struct *_work)
1145{
1146	struct ib_update_work *work =
1147		container_of(_work, struct ib_update_work, work);
1148
1149	ib_cache_update(work->device, work->port_num);
1150	kfree(work);
1151}
1152
1153static void ib_cache_event(struct ib_event_handler *handler,
1154			   struct ib_event *event)
1155{
1156	struct ib_update_work *work;
1157
1158	if (event->event == IB_EVENT_PORT_ERR    ||
1159	    event->event == IB_EVENT_PORT_ACTIVE ||
1160	    event->event == IB_EVENT_LID_CHANGE  ||
1161	    event->event == IB_EVENT_PKEY_CHANGE ||
1162	    event->event == IB_EVENT_SM_CHANGE   ||
1163	    event->event == IB_EVENT_CLIENT_REREGISTER ||
1164	    event->event == IB_EVENT_GID_CHANGE) {
1165		work = kmalloc(sizeof *work, GFP_ATOMIC);
1166		if (work) {
1167			INIT_WORK(&work->work, ib_cache_task);
1168			work->device   = event->device;
1169			work->port_num = event->element.port_num;
1170			queue_work(ib_wq, &work->work);
1171		}
1172	}
1173}
1174
1175int ib_cache_setup_one(struct ib_device *device)
1176{
1177	int p;
1178	int err;
1179
1180	rwlock_init(&device->cache.lock);
1181
1182	device->cache.pkey_cache =
1183		kzalloc(sizeof *device->cache.pkey_cache *
1184			(rdma_end_port(device) - rdma_start_port(device) + 1), GFP_KERNEL);
1185	device->cache.lmc_cache = kmalloc(sizeof *device->cache.lmc_cache *
1186					  (rdma_end_port(device) -
1187					   rdma_start_port(device) + 1),
1188					  GFP_KERNEL);
1189	if (!device->cache.pkey_cache ||
1190	    !device->cache.lmc_cache) {
1191		pr_warn("Couldn't allocate cache for %s\n", device->name);
1192		return -ENOMEM;
1193	}
1194
1195	err = gid_table_setup_one(device);
1196	if (err)
1197		/* Allocated memory will be cleaned in the release function */
1198		return err;
1199
1200	for (p = 0; p <= rdma_end_port(device) - rdma_start_port(device); ++p)
1201		ib_cache_update(device, p + rdma_start_port(device));
1202
1203	INIT_IB_EVENT_HANDLER(&device->cache.event_handler,
1204			      device, ib_cache_event);
1205	err = ib_register_event_handler(&device->cache.event_handler);
1206	if (err)
1207		goto err;
1208
1209	return 0;
1210
1211err:
1212	gid_table_cleanup_one(device);
1213	return err;
1214}
1215
1216void ib_cache_release_one(struct ib_device *device)
1217{
1218	int p;
1219
1220	/*
1221	 * The release function frees all the cache elements.
1222	 * This function should be called as part of freeing
1223	 * all the device's resources when the cache could no
1224	 * longer be accessed.
1225	 */
1226	if (device->cache.pkey_cache)
1227		for (p = 0;
1228		     p <= rdma_end_port(device) - rdma_start_port(device); ++p)
1229			kfree(device->cache.pkey_cache[p]);
1230
1231	gid_table_release_one(device);
1232	kfree(device->cache.pkey_cache);
1233	kfree(device->cache.lmc_cache);
1234}
1235
1236void ib_cache_cleanup_one(struct ib_device *device)
1237{
1238	/* The cleanup function unregisters the event handler,
1239	 * waits for all in-progress workqueue elements and cleans
1240	 * up the GID cache. This function should be called after
1241	 * the device was removed from the devices list and all
1242	 * clients were removed, so the cache exists but is
1243	 * non-functional and shouldn't be updated anymore.
1244	 */
1245	ib_unregister_event_handler(&device->cache.event_handler);
1246	flush_workqueue(ib_wq);
1247	gid_table_cleanup_one(device);
1248}
1249
1250void __init ib_cache_setup(void)
1251{
1252	roce_gid_mgmt_init();
1253}
1254
1255void __exit ib_cache_cleanup(void)
1256{
1257	roce_gid_mgmt_cleanup();
1258}
1259