ib_device.c revision 331769
1/*
2 * Copyright (c) 2004 Topspin Communications.  All rights reserved.
3 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses.  You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 *     Redistribution and use in source and binary forms, with or
12 *     without modification, are permitted provided that the following
13 *     conditions are met:
14 *
15 *      - Redistributions of source code must retain the above
16 *        copyright notice, this list of conditions and the following
17 *        disclaimer.
18 *
19 *      - Redistributions in binary form must reproduce the above
20 *        copyright notice, this list of conditions and the following
21 *        disclaimer in the documentation and/or other materials
22 *        provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33
34#include <linux/module.h>
35#include <linux/string.h>
36#include <linux/errno.h>
37#include <linux/kernel.h>
38#include <linux/slab.h>
39#include <linux/mutex.h>
40#include <linux/netdevice.h>
41#include <rdma/ib_addr.h>
42#include <rdma/ib_cache.h>
43
44#include "core_priv.h"
45
46MODULE_AUTHOR("Roland Dreier");
47MODULE_DESCRIPTION("core kernel InfiniBand API");
48MODULE_LICENSE("Dual BSD/GPL");
49
50struct ib_client_data {
51	struct list_head  list;
52	struct ib_client *client;
53	void *            data;
54	/* The device or client is going down. Do not call client or device
55	 * callbacks other than remove(). */
56	bool		  going_down;
57};
58
59struct workqueue_struct *ib_comp_wq;
60struct workqueue_struct *ib_wq;
61EXPORT_SYMBOL_GPL(ib_wq);
62
63/* The device_list and client_list contain devices and clients after their
64 * registration has completed, and the devices and clients are removed
65 * during unregistration. */
66static LIST_HEAD(device_list);
67static LIST_HEAD(client_list);
68
69/*
70 * device_mutex and lists_rwsem protect access to both device_list and
71 * client_list.  device_mutex protects writer access by device and client
72 * registration / de-registration.  lists_rwsem protects reader access to
73 * these lists.  Iterators of these lists must lock it for read, while updates
74 * to the lists must be done with a write lock. A special case is when the
75 * device_mutex is locked. In this case locking the lists for read access is
76 * not necessary as the device_mutex implies it.
77 *
78 * lists_rwsem also protects access to the client data list.
79 */
80static DEFINE_MUTEX(device_mutex);
81static DECLARE_RWSEM(lists_rwsem);
82
83
84static int ib_device_check_mandatory(struct ib_device *device)
85{
86#define IB_MANDATORY_FUNC(x) { offsetof(struct ib_device, x), #x }
87	static const struct {
88		size_t offset;
89		char  *name;
90	} mandatory_table[] = {
91		IB_MANDATORY_FUNC(query_device),
92		IB_MANDATORY_FUNC(query_port),
93		IB_MANDATORY_FUNC(query_pkey),
94		IB_MANDATORY_FUNC(query_gid),
95		IB_MANDATORY_FUNC(alloc_pd),
96		IB_MANDATORY_FUNC(dealloc_pd),
97		IB_MANDATORY_FUNC(create_ah),
98		IB_MANDATORY_FUNC(destroy_ah),
99		IB_MANDATORY_FUNC(create_qp),
100		IB_MANDATORY_FUNC(modify_qp),
101		IB_MANDATORY_FUNC(destroy_qp),
102		IB_MANDATORY_FUNC(post_send),
103		IB_MANDATORY_FUNC(post_recv),
104		IB_MANDATORY_FUNC(create_cq),
105		IB_MANDATORY_FUNC(destroy_cq),
106		IB_MANDATORY_FUNC(poll_cq),
107		IB_MANDATORY_FUNC(req_notify_cq),
108		IB_MANDATORY_FUNC(get_dma_mr),
109		IB_MANDATORY_FUNC(dereg_mr),
110		IB_MANDATORY_FUNC(get_port_immutable)
111	};
112	int i;
113
114	for (i = 0; i < ARRAY_SIZE(mandatory_table); ++i) {
115		if (!*(void **) ((char *) device + mandatory_table[i].offset)) {
116			pr_warn("Device %s is missing mandatory function %s\n",
117				device->name, mandatory_table[i].name);
118			return -EINVAL;
119		}
120	}
121
122	return 0;
123}
124
125static struct ib_device *__ib_device_get_by_name(const char *name)
126{
127	struct ib_device *device;
128
129	list_for_each_entry(device, &device_list, core_list)
130		if (!strncmp(name, device->name, IB_DEVICE_NAME_MAX))
131			return device;
132
133	return NULL;
134}
135
136
137static int alloc_name(char *name)
138{
139	unsigned long *inuse;
140	char buf[IB_DEVICE_NAME_MAX];
141	struct ib_device *device;
142	int i;
143
144	inuse = (unsigned long *) get_zeroed_page(GFP_KERNEL);
145	if (!inuse)
146		return -ENOMEM;
147
148	list_for_each_entry(device, &device_list, core_list) {
149		if (!sscanf(device->name, name, &i))
150			continue;
151		if (i < 0 || i >= PAGE_SIZE * 8)
152			continue;
153		snprintf(buf, sizeof buf, name, i);
154		if (!strncmp(buf, device->name, IB_DEVICE_NAME_MAX))
155			set_bit(i, inuse);
156	}
157
158	i = find_first_zero_bit(inuse, PAGE_SIZE * 8);
159	free_page((unsigned long) inuse);
160	snprintf(buf, sizeof buf, name, i);
161
162	if (__ib_device_get_by_name(buf))
163		return -ENFILE;
164
165	strlcpy(name, buf, IB_DEVICE_NAME_MAX);
166	return 0;
167}
168
169static void ib_device_release(struct device *device)
170{
171	struct ib_device *dev = container_of(device, struct ib_device, dev);
172
173	ib_cache_release_one(dev);
174	kfree(dev->port_immutable);
175	kfree(dev);
176}
177
178static struct class ib_class = {
179	.name    = "infiniband",
180	.dev_release = ib_device_release,
181};
182
183/**
184 * ib_alloc_device - allocate an IB device struct
185 * @size:size of structure to allocate
186 *
187 * Low-level drivers should use ib_alloc_device() to allocate &struct
188 * ib_device.  @size is the size of the structure to be allocated,
189 * including any private data used by the low-level driver.
190 * ib_dealloc_device() must be used to free structures allocated with
191 * ib_alloc_device().
192 */
193struct ib_device *ib_alloc_device(size_t size)
194{
195	struct ib_device *device;
196
197	if (WARN_ON(size < sizeof(struct ib_device)))
198		return NULL;
199
200	device = kzalloc(size, GFP_KERNEL);
201	if (!device)
202		return NULL;
203
204	device->dev.parent = &linux_root_device;
205	device->dev.class = &ib_class;
206	device_initialize(&device->dev);
207
208	dev_set_drvdata(&device->dev, device);
209
210	INIT_LIST_HEAD(&device->event_handler_list);
211	spin_lock_init(&device->event_handler_lock);
212	spin_lock_init(&device->client_data_lock);
213	INIT_LIST_HEAD(&device->client_data_list);
214	INIT_LIST_HEAD(&device->port_list);
215
216	return device;
217}
218EXPORT_SYMBOL(ib_alloc_device);
219
220/**
221 * ib_dealloc_device - free an IB device struct
222 * @device:structure to free
223 *
224 * Free a structure allocated with ib_alloc_device().
225 */
226void ib_dealloc_device(struct ib_device *device)
227{
228	WARN_ON(device->reg_state != IB_DEV_UNREGISTERED &&
229		device->reg_state != IB_DEV_UNINITIALIZED);
230	kobject_put(&device->dev.kobj);
231}
232EXPORT_SYMBOL(ib_dealloc_device);
233
234static int add_client_context(struct ib_device *device, struct ib_client *client)
235{
236	struct ib_client_data *context;
237	unsigned long flags;
238
239	context = kmalloc(sizeof *context, GFP_KERNEL);
240	if (!context) {
241		pr_warn("Couldn't allocate client context for %s/%s\n",
242			device->name, client->name);
243		return -ENOMEM;
244	}
245
246	context->client = client;
247	context->data   = NULL;
248	context->going_down = false;
249
250	down_write(&lists_rwsem);
251	spin_lock_irqsave(&device->client_data_lock, flags);
252	list_add(&context->list, &device->client_data_list);
253	spin_unlock_irqrestore(&device->client_data_lock, flags);
254	up_write(&lists_rwsem);
255
256	return 0;
257}
258
259static int verify_immutable(const struct ib_device *dev, u8 port)
260{
261	return WARN_ON(!rdma_cap_ib_mad(dev, port) &&
262			    rdma_max_mad_size(dev, port) != 0);
263}
264
265static int read_port_immutable(struct ib_device *device)
266{
267	int ret;
268	u8 start_port = rdma_start_port(device);
269	u8 end_port = rdma_end_port(device);
270	u8 port;
271
272	/**
273	 * device->port_immutable is indexed directly by the port number to make
274	 * access to this data as efficient as possible.
275	 *
276	 * Therefore port_immutable is declared as a 1 based array with
277	 * potential empty slots at the beginning.
278	 */
279	device->port_immutable = kzalloc(sizeof(*device->port_immutable)
280					 * (end_port + 1),
281					 GFP_KERNEL);
282	if (!device->port_immutable)
283		return -ENOMEM;
284
285	for (port = start_port; port <= end_port; ++port) {
286		ret = device->get_port_immutable(device, port,
287						 &device->port_immutable[port]);
288		if (ret)
289			return ret;
290
291		if (verify_immutable(device, port))
292			return -EINVAL;
293	}
294	return 0;
295}
296
297void ib_get_device_fw_str(struct ib_device *dev, char *str, size_t str_len)
298{
299	if (dev->get_dev_fw_str)
300		dev->get_dev_fw_str(dev, str, str_len);
301	else
302		str[0] = '\0';
303}
304EXPORT_SYMBOL(ib_get_device_fw_str);
305
306/**
307 * ib_register_device - Register an IB device with IB core
308 * @device:Device to register
309 *
310 * Low-level drivers use ib_register_device() to register their
311 * devices with the IB core.  All registered clients will receive a
312 * callback for each device that is added. @device must be allocated
313 * with ib_alloc_device().
314 */
315int ib_register_device(struct ib_device *device,
316		       int (*port_callback)(struct ib_device *,
317					    u8, struct kobject *))
318{
319	int ret;
320	struct ib_client *client;
321	struct ib_udata uhw = {.outlen = 0, .inlen = 0};
322
323	mutex_lock(&device_mutex);
324
325	if (strchr(device->name, '%')) {
326		ret = alloc_name(device->name);
327		if (ret)
328			goto out;
329	}
330
331	if (ib_device_check_mandatory(device)) {
332		ret = -EINVAL;
333		goto out;
334	}
335
336	ret = read_port_immutable(device);
337	if (ret) {
338		pr_warn("Couldn't create per port immutable data %s\n",
339			device->name);
340		goto out;
341	}
342
343	ret = ib_cache_setup_one(device);
344	if (ret) {
345		pr_warn("Couldn't set up InfiniBand P_Key/GID cache\n");
346		goto out;
347	}
348
349	memset(&device->attrs, 0, sizeof(device->attrs));
350	ret = device->query_device(device, &device->attrs, &uhw);
351	if (ret) {
352		pr_warn("Couldn't query the device attributes\n");
353		ib_cache_cleanup_one(device);
354		goto out;
355	}
356
357	ret = ib_device_register_sysfs(device, port_callback);
358	if (ret) {
359		pr_warn("Couldn't register device %s with driver model\n",
360			device->name);
361		ib_cache_cleanup_one(device);
362		goto out;
363	}
364
365	device->reg_state = IB_DEV_REGISTERED;
366
367	list_for_each_entry(client, &client_list, list)
368		if (client->add && !add_client_context(device, client))
369			client->add(device);
370
371	down_write(&lists_rwsem);
372	list_add_tail(&device->core_list, &device_list);
373	up_write(&lists_rwsem);
374out:
375	mutex_unlock(&device_mutex);
376	return ret;
377}
378EXPORT_SYMBOL(ib_register_device);
379
380/**
381 * ib_unregister_device - Unregister an IB device
382 * @device:Device to unregister
383 *
384 * Unregister an IB device.  All clients will receive a remove callback.
385 */
386void ib_unregister_device(struct ib_device *device)
387{
388	struct ib_client_data *context, *tmp;
389	unsigned long flags;
390
391	mutex_lock(&device_mutex);
392
393	down_write(&lists_rwsem);
394	list_del(&device->core_list);
395	spin_lock_irqsave(&device->client_data_lock, flags);
396	list_for_each_entry_safe(context, tmp, &device->client_data_list, list)
397		context->going_down = true;
398	spin_unlock_irqrestore(&device->client_data_lock, flags);
399	downgrade_write(&lists_rwsem);
400
401	list_for_each_entry_safe(context, tmp, &device->client_data_list,
402				 list) {
403		if (context->client->remove)
404			context->client->remove(device, context->data);
405	}
406	up_read(&lists_rwsem);
407
408	mutex_unlock(&device_mutex);
409
410	ib_device_unregister_sysfs(device);
411	ib_cache_cleanup_one(device);
412
413	down_write(&lists_rwsem);
414	spin_lock_irqsave(&device->client_data_lock, flags);
415	list_for_each_entry_safe(context, tmp, &device->client_data_list, list)
416		kfree(context);
417	spin_unlock_irqrestore(&device->client_data_lock, flags);
418	up_write(&lists_rwsem);
419
420	device->reg_state = IB_DEV_UNREGISTERED;
421}
422EXPORT_SYMBOL(ib_unregister_device);
423
424/**
425 * ib_register_client - Register an IB client
426 * @client:Client to register
427 *
428 * Upper level users of the IB drivers can use ib_register_client() to
429 * register callbacks for IB device addition and removal.  When an IB
430 * device is added, each registered client's add method will be called
431 * (in the order the clients were registered), and when a device is
432 * removed, each client's remove method will be called (in the reverse
433 * order that clients were registered).  In addition, when
434 * ib_register_client() is called, the client will receive an add
435 * callback for all devices already registered.
436 */
437int ib_register_client(struct ib_client *client)
438{
439	struct ib_device *device;
440
441	mutex_lock(&device_mutex);
442
443	list_for_each_entry(device, &device_list, core_list)
444		if (client->add && !add_client_context(device, client))
445			client->add(device);
446
447	down_write(&lists_rwsem);
448	list_add_tail(&client->list, &client_list);
449	up_write(&lists_rwsem);
450
451	mutex_unlock(&device_mutex);
452
453	return 0;
454}
455EXPORT_SYMBOL(ib_register_client);
456
457/**
458 * ib_unregister_client - Unregister an IB client
459 * @client:Client to unregister
460 *
461 * Upper level users use ib_unregister_client() to remove their client
462 * registration.  When ib_unregister_client() is called, the client
463 * will receive a remove callback for each IB device still registered.
464 */
465void ib_unregister_client(struct ib_client *client)
466{
467	struct ib_client_data *context, *tmp;
468	struct ib_device *device;
469	unsigned long flags;
470
471	mutex_lock(&device_mutex);
472
473	down_write(&lists_rwsem);
474	list_del(&client->list);
475	up_write(&lists_rwsem);
476
477	list_for_each_entry(device, &device_list, core_list) {
478		struct ib_client_data *found_context = NULL;
479
480		down_write(&lists_rwsem);
481		spin_lock_irqsave(&device->client_data_lock, flags);
482		list_for_each_entry_safe(context, tmp, &device->client_data_list, list)
483			if (context->client == client) {
484				context->going_down = true;
485				found_context = context;
486				break;
487			}
488		spin_unlock_irqrestore(&device->client_data_lock, flags);
489		up_write(&lists_rwsem);
490
491		if (client->remove)
492			client->remove(device, found_context ?
493					       found_context->data : NULL);
494
495		if (!found_context) {
496			pr_warn("No client context found for %s/%s\n",
497				device->name, client->name);
498			continue;
499		}
500
501		down_write(&lists_rwsem);
502		spin_lock_irqsave(&device->client_data_lock, flags);
503		list_del(&found_context->list);
504		kfree(found_context);
505		spin_unlock_irqrestore(&device->client_data_lock, flags);
506		up_write(&lists_rwsem);
507	}
508
509	mutex_unlock(&device_mutex);
510}
511EXPORT_SYMBOL(ib_unregister_client);
512
513/**
514 * ib_get_client_data - Get IB client context
515 * @device:Device to get context for
516 * @client:Client to get context for
517 *
518 * ib_get_client_data() returns client context set with
519 * ib_set_client_data().
520 */
521void *ib_get_client_data(struct ib_device *device, struct ib_client *client)
522{
523	struct ib_client_data *context;
524	void *ret = NULL;
525	unsigned long flags;
526
527	spin_lock_irqsave(&device->client_data_lock, flags);
528	list_for_each_entry(context, &device->client_data_list, list)
529		if (context->client == client) {
530			ret = context->data;
531			break;
532		}
533	spin_unlock_irqrestore(&device->client_data_lock, flags);
534
535	return ret;
536}
537EXPORT_SYMBOL(ib_get_client_data);
538
539/**
540 * ib_set_client_data - Set IB client context
541 * @device:Device to set context for
542 * @client:Client to set context for
543 * @data:Context to set
544 *
545 * ib_set_client_data() sets client context that can be retrieved with
546 * ib_get_client_data().
547 */
548void ib_set_client_data(struct ib_device *device, struct ib_client *client,
549			void *data)
550{
551	struct ib_client_data *context;
552	unsigned long flags;
553
554	spin_lock_irqsave(&device->client_data_lock, flags);
555	list_for_each_entry(context, &device->client_data_list, list)
556		if (context->client == client) {
557			context->data = data;
558			goto out;
559		}
560
561	pr_warn("No client context found for %s/%s\n",
562		device->name, client->name);
563
564out:
565	spin_unlock_irqrestore(&device->client_data_lock, flags);
566}
567EXPORT_SYMBOL(ib_set_client_data);
568
569/**
570 * ib_register_event_handler - Register an IB event handler
571 * @event_handler:Handler to register
572 *
573 * ib_register_event_handler() registers an event handler that will be
574 * called back when asynchronous IB events occur (as defined in
575 * chapter 11 of the InfiniBand Architecture Specification).  This
576 * callback may occur in interrupt context.
577 */
578int ib_register_event_handler  (struct ib_event_handler *event_handler)
579{
580	unsigned long flags;
581
582	spin_lock_irqsave(&event_handler->device->event_handler_lock, flags);
583	list_add_tail(&event_handler->list,
584		      &event_handler->device->event_handler_list);
585	spin_unlock_irqrestore(&event_handler->device->event_handler_lock, flags);
586
587	return 0;
588}
589EXPORT_SYMBOL(ib_register_event_handler);
590
591/**
592 * ib_unregister_event_handler - Unregister an event handler
593 * @event_handler:Handler to unregister
594 *
595 * Unregister an event handler registered with
596 * ib_register_event_handler().
597 */
598int ib_unregister_event_handler(struct ib_event_handler *event_handler)
599{
600	unsigned long flags;
601
602	spin_lock_irqsave(&event_handler->device->event_handler_lock, flags);
603	list_del(&event_handler->list);
604	spin_unlock_irqrestore(&event_handler->device->event_handler_lock, flags);
605
606	return 0;
607}
608EXPORT_SYMBOL(ib_unregister_event_handler);
609
610/**
611 * ib_dispatch_event - Dispatch an asynchronous event
612 * @event:Event to dispatch
613 *
614 * Low-level drivers must call ib_dispatch_event() to dispatch the
615 * event to all registered event handlers when an asynchronous event
616 * occurs.
617 */
618void ib_dispatch_event(struct ib_event *event)
619{
620	unsigned long flags;
621	struct ib_event_handler *handler;
622
623	spin_lock_irqsave(&event->device->event_handler_lock, flags);
624
625	list_for_each_entry(handler, &event->device->event_handler_list, list)
626		handler->handler(handler, event);
627
628	spin_unlock_irqrestore(&event->device->event_handler_lock, flags);
629}
630EXPORT_SYMBOL(ib_dispatch_event);
631
632/**
633 * ib_query_port - Query IB port attributes
634 * @device:Device to query
635 * @port_num:Port number to query
636 * @port_attr:Port attributes
637 *
638 * ib_query_port() returns the attributes of a port through the
639 * @port_attr pointer.
640 */
641int ib_query_port(struct ib_device *device,
642		  u8 port_num,
643		  struct ib_port_attr *port_attr)
644{
645	union ib_gid gid;
646	int err;
647
648	if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device))
649		return -EINVAL;
650
651	memset(port_attr, 0, sizeof(*port_attr));
652	err = device->query_port(device, port_num, port_attr);
653	if (err || port_attr->subnet_prefix)
654		return err;
655
656	if (rdma_port_get_link_layer(device, port_num) != IB_LINK_LAYER_INFINIBAND)
657		return 0;
658
659	err = ib_query_gid(device, port_num, 0, &gid, NULL);
660	if (err)
661		return err;
662
663	port_attr->subnet_prefix = be64_to_cpu(gid.global.subnet_prefix);
664	return 0;
665}
666EXPORT_SYMBOL(ib_query_port);
667
668/**
669 * ib_query_gid - Get GID table entry
670 * @device:Device to query
671 * @port_num:Port number to query
672 * @index:GID table index to query
673 * @gid:Returned GID
674 * @attr: Returned GID attributes related to this GID index (only in RoCE).
675 *   NULL means ignore.
676 *
677 * ib_query_gid() fetches the specified GID table entry.
678 */
679int ib_query_gid(struct ib_device *device,
680		 u8 port_num, int index, union ib_gid *gid,
681		 struct ib_gid_attr *attr)
682{
683	if (rdma_cap_roce_gid_table(device, port_num))
684		return ib_get_cached_gid(device, port_num, index, gid, attr);
685
686	if (attr)
687		return -EINVAL;
688
689	return device->query_gid(device, port_num, index, gid);
690}
691EXPORT_SYMBOL(ib_query_gid);
692
693/**
694 * ib_enum_roce_netdev - enumerate all RoCE ports
695 * @ib_dev : IB device we want to query
696 * @filter: Should we call the callback?
697 * @filter_cookie: Cookie passed to filter
698 * @cb: Callback to call for each found RoCE ports
699 * @cookie: Cookie passed back to the callback
700 *
701 * Enumerates all of the physical RoCE ports of ib_dev
702 * which are related to netdevice and calls callback() on each
703 * device for which filter() function returns non zero.
704 */
705void ib_enum_roce_netdev(struct ib_device *ib_dev,
706			 roce_netdev_filter filter,
707			 void *filter_cookie,
708			 roce_netdev_callback cb,
709			 void *cookie)
710{
711	u8 port;
712
713	for (port = rdma_start_port(ib_dev); port <= rdma_end_port(ib_dev);
714	     port++)
715		if (rdma_protocol_roce(ib_dev, port)) {
716			struct net_device *idev = NULL;
717
718			if (ib_dev->get_netdev)
719				idev = ib_dev->get_netdev(ib_dev, port);
720
721			if (idev && (idev->if_flags & IFF_DYING)) {
722				dev_put(idev);
723				idev = NULL;
724			}
725
726			if (filter(ib_dev, port, idev, filter_cookie))
727				cb(ib_dev, port, idev, cookie);
728
729			if (idev)
730				dev_put(idev);
731		}
732}
733
734/**
735 * ib_enum_all_roce_netdevs - enumerate all RoCE devices
736 * @filter: Should we call the callback?
737 * @filter_cookie: Cookie passed to filter
738 * @cb: Callback to call for each found RoCE ports
739 * @cookie: Cookie passed back to the callback
740 *
741 * Enumerates all RoCE devices' physical ports which are related
742 * to netdevices and calls callback() on each device for which
743 * filter() function returns non zero.
744 */
745void ib_enum_all_roce_netdevs(roce_netdev_filter filter,
746			      void *filter_cookie,
747			      roce_netdev_callback cb,
748			      void *cookie)
749{
750	struct ib_device *dev;
751
752	down_read(&lists_rwsem);
753	list_for_each_entry(dev, &device_list, core_list)
754		ib_enum_roce_netdev(dev, filter, filter_cookie, cb, cookie);
755	up_read(&lists_rwsem);
756}
757
758/**
759 * ib_cache_gid_del_all_by_netdev - delete GIDs belonging a netdevice
760 *
761 * @ndev: Pointer to netdevice
762 */
763void ib_cache_gid_del_all_by_netdev(struct net_device *ndev)
764{
765	struct ib_device *ib_dev;
766	u8 port;
767
768	down_read(&lists_rwsem);
769	list_for_each_entry(ib_dev, &device_list, core_list) {
770		for (port = rdma_start_port(ib_dev);
771		     port <= rdma_end_port(ib_dev);
772		     port++) {
773			if (rdma_protocol_roce(ib_dev, port) == 0)
774				continue;
775			(void) ib_cache_gid_del_all_netdev_gids(ib_dev, port, ndev);
776		}
777	}
778	up_read(&lists_rwsem);
779}
780
781/**
782 * ib_query_pkey - Get P_Key table entry
783 * @device:Device to query
784 * @port_num:Port number to query
785 * @index:P_Key table index to query
786 * @pkey:Returned P_Key
787 *
788 * ib_query_pkey() fetches the specified P_Key table entry.
789 */
790int ib_query_pkey(struct ib_device *device,
791		  u8 port_num, u16 index, u16 *pkey)
792{
793	return device->query_pkey(device, port_num, index, pkey);
794}
795EXPORT_SYMBOL(ib_query_pkey);
796
797/**
798 * ib_modify_device - Change IB device attributes
799 * @device:Device to modify
800 * @device_modify_mask:Mask of attributes to change
801 * @device_modify:New attribute values
802 *
803 * ib_modify_device() changes a device's attributes as specified by
804 * the @device_modify_mask and @device_modify structure.
805 */
806int ib_modify_device(struct ib_device *device,
807		     int device_modify_mask,
808		     struct ib_device_modify *device_modify)
809{
810	if (!device->modify_device)
811		return -ENOSYS;
812
813	return device->modify_device(device, device_modify_mask,
814				     device_modify);
815}
816EXPORT_SYMBOL(ib_modify_device);
817
818/**
819 * ib_modify_port - Modifies the attributes for the specified port.
820 * @device: The device to modify.
821 * @port_num: The number of the port to modify.
822 * @port_modify_mask: Mask used to specify which attributes of the port
823 *   to change.
824 * @port_modify: New attribute values for the port.
825 *
826 * ib_modify_port() changes a port's attributes as specified by the
827 * @port_modify_mask and @port_modify structure.
828 */
829int ib_modify_port(struct ib_device *device,
830		   u8 port_num, int port_modify_mask,
831		   struct ib_port_modify *port_modify)
832{
833	if (!device->modify_port)
834		return -ENOSYS;
835
836	if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device))
837		return -EINVAL;
838
839	return device->modify_port(device, port_num, port_modify_mask,
840				   port_modify);
841}
842EXPORT_SYMBOL(ib_modify_port);
843
844/**
845 * ib_find_gid - Returns the port number and GID table index where
846 *   a specified GID value occurs.
847 * @device: The device to query.
848 * @gid: The GID value to search for.
849 * @gid_type: Type of GID.
850 * @ndev: The ndev related to the GID to search for.
851 * @port_num: The port number of the device where the GID value was found.
852 * @index: The index into the GID table where the GID was found.  This
853 *   parameter may be NULL.
854 */
855int ib_find_gid(struct ib_device *device, union ib_gid *gid,
856		enum ib_gid_type gid_type, struct net_device *ndev,
857		u8 *port_num, u16 *index)
858{
859	union ib_gid tmp_gid;
860	int ret, port, i;
861
862	for (port = rdma_start_port(device); port <= rdma_end_port(device); ++port) {
863		if (rdma_cap_roce_gid_table(device, port)) {
864			if (!ib_find_cached_gid_by_port(device, gid, gid_type, port,
865							ndev, index)) {
866				*port_num = port;
867				return 0;
868			}
869		}
870
871		if (gid_type != IB_GID_TYPE_IB)
872			continue;
873
874		for (i = 0; i < device->port_immutable[port].gid_tbl_len; ++i) {
875			ret = ib_query_gid(device, port, i, &tmp_gid, NULL);
876			if (ret)
877				return ret;
878			if (!memcmp(&tmp_gid, gid, sizeof *gid)) {
879				*port_num = port;
880				if (index)
881					*index = i;
882				return 0;
883			}
884		}
885	}
886
887	return -ENOENT;
888}
889EXPORT_SYMBOL(ib_find_gid);
890
891/**
892 * ib_find_pkey - Returns the PKey table index where a specified
893 *   PKey value occurs.
894 * @device: The device to query.
895 * @port_num: The port number of the device to search for the PKey.
896 * @pkey: The PKey value to search for.
897 * @index: The index into the PKey table where the PKey was found.
898 */
899int ib_find_pkey(struct ib_device *device,
900		 u8 port_num, u16 pkey, u16 *index)
901{
902	int ret, i;
903	u16 tmp_pkey;
904	int partial_ix = -1;
905
906	for (i = 0; i < device->port_immutable[port_num].pkey_tbl_len; ++i) {
907		ret = ib_query_pkey(device, port_num, i, &tmp_pkey);
908		if (ret)
909			return ret;
910		if ((pkey & 0x7fff) == (tmp_pkey & 0x7fff)) {
911			/* if there is full-member pkey take it.*/
912			if (tmp_pkey & 0x8000) {
913				*index = i;
914				return 0;
915			}
916			if (partial_ix < 0)
917				partial_ix = i;
918		}
919	}
920
921	/*no full-member, if exists take the limited*/
922	if (partial_ix >= 0) {
923		*index = partial_ix;
924		return 0;
925	}
926	return -ENOENT;
927}
928EXPORT_SYMBOL(ib_find_pkey);
929
930/**
931 * ib_get_net_dev_by_params() - Return the appropriate net_dev
932 * for a received CM request
933 * @dev:	An RDMA device on which the request has been received.
934 * @port:	Port number on the RDMA device.
935 * @pkey:	The Pkey the request came on.
936 * @gid:	A GID that the net_dev uses to communicate.
937 * @addr:	Contains the IP address that the request specified as its
938 *		destination.
939 */
940struct net_device *ib_get_net_dev_by_params(struct ib_device *dev,
941					    u8 port,
942					    u16 pkey,
943					    const union ib_gid *gid,
944					    const struct sockaddr *addr)
945{
946	struct net_device *net_dev = NULL;
947	struct ib_client_data *context;
948
949	if (!rdma_protocol_ib(dev, port))
950		return NULL;
951
952	down_read(&lists_rwsem);
953
954	list_for_each_entry(context, &dev->client_data_list, list) {
955		struct ib_client *client = context->client;
956
957		if (context->going_down)
958			continue;
959
960		if (client->get_net_dev_by_params) {
961			net_dev = client->get_net_dev_by_params(dev, port, pkey,
962								gid, addr,
963								context->data);
964			if (net_dev)
965				break;
966		}
967	}
968
969	up_read(&lists_rwsem);
970
971	return net_dev;
972}
973EXPORT_SYMBOL(ib_get_net_dev_by_params);
974
975static int __init ib_core_init(void)
976{
977	int ret;
978
979	ib_wq = alloc_workqueue("infiniband", 0, 0);
980	if (!ib_wq)
981		return -ENOMEM;
982
983	ib_comp_wq = alloc_workqueue("ib-comp-wq",
984			WQ_UNBOUND | WQ_HIGHPRI | WQ_MEM_RECLAIM,
985			mp_ncpus * 4 /* WQ_UNBOUND_MAX_ACTIVE */);
986	if (!ib_comp_wq) {
987		ret = -ENOMEM;
988		goto err;
989	}
990
991	ret = class_register(&ib_class);
992	if (ret) {
993		pr_warn("Couldn't create InfiniBand device class\n");
994		goto err_comp;
995	}
996
997	ret = addr_init();
998	if (ret) {
999		pr_warn("Could't init IB address resolution\n");
1000		goto err_sysfs;
1001	}
1002
1003	ret = ib_mad_init();
1004	if (ret) {
1005		pr_warn("Couldn't init IB MAD\n");
1006		goto err_addr;
1007	}
1008
1009	ret = ib_sa_init();
1010	if (ret) {
1011		pr_warn("Couldn't init SA\n");
1012		goto err_mad;
1013	}
1014
1015	ib_cache_setup();
1016
1017	return 0;
1018
1019err_mad:
1020	ib_mad_cleanup();
1021err_addr:
1022	addr_cleanup();
1023err_sysfs:
1024	class_unregister(&ib_class);
1025err_comp:
1026	destroy_workqueue(ib_comp_wq);
1027err:
1028	destroy_workqueue(ib_wq);
1029	return ret;
1030}
1031
1032static void __exit ib_core_cleanup(void)
1033{
1034	ib_cache_cleanup();
1035	ib_sa_cleanup();
1036	ib_mad_cleanup();
1037	addr_cleanup();
1038	class_unregister(&ib_class);
1039	destroy_workqueue(ib_comp_wq);
1040	/* Make sure that any pending umem accounting work is done. */
1041	destroy_workqueue(ib_wq);
1042}
1043
1044module_init(ib_core_init);
1045module_exit(ib_core_cleanup);
1046
1047MODULE_VERSION(ibcore, 1);
1048MODULE_DEPEND(ibcore, linuxkpi, 1, 1, 1);
1049