1/*
2 * Copyright (c) 2004-2009 Voltaire Inc.  All rights reserved.
3 * Copyright (c) 2014 Intel Corporation.  All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses.  You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 *     Redistribution and use in source and binary forms, with or
12 *     without modification, are permitted provided that the following
13 *     conditions are met:
14 *
15 *      - Redistributions of source code must retain the above
16 *        copyright notice, this list of conditions and the following
17 *        disclaimer.
18 *
19 *      - Redistributions in binary form must reproduce the above
20 *        copyright notice, this list of conditions and the following
21 *        disclaimer in the documentation and/or other materials
22 *        provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 *
33 */
34
35#include <config.h>
36
37#include <sys/poll.h>
38#include <unistd.h>
39#include <string.h>
40#include <stdio.h>
41#include <errno.h>
42#include <sys/types.h>
43#include <sys/stat.h>
44#include <fcntl.h>
45#include <sys/ioctl.h>
46#include <dirent.h>
47#include <ctype.h>
48#include <inttypes.h>
49#include <assert.h>
50
51#include <infiniband/umad.h>
52
53#define IB_OPENIB_OUI                 (0x001405)
54
55#include "sysfs.h"
56
57typedef struct ib_user_mad_reg_req {
58	uint32_t id;
59	uint32_t method_mask[4];
60	uint8_t qpn;
61	uint8_t mgmt_class;
62	uint8_t mgmt_class_version;
63	uint8_t oui[3];
64	uint8_t rmpp_version;
65} ib_user_mad_reg_req_t;
66
67static_assert(sizeof(struct ib_user_mad_reg_req) == IOCPARM_LEN(IB_USER_MAD_REGISTER_AGENT),
68    "Invalid structure size");
69
70struct ib_user_mad_reg_req2 {
71	uint32_t id;
72	uint32_t qpn;
73	uint8_t  mgmt_class;
74	uint8_t  mgmt_class_version;
75	uint16_t res;
76	uint32_t flags;
77	uint64_t method_mask[2];
78	uint32_t oui;
79	uint8_t  rmpp_version;
80	uint8_t  reserved[3];
81};
82
83static_assert(sizeof(struct ib_user_mad_reg_req2) == IOCPARM_LEN(IB_USER_MAD_REGISTER_AGENT2),
84    "Invalid structure size");
85
86#define IBWARN(fmt, args...) fprintf(stderr, "ibwarn: [%d] %s: " fmt "\n", getpid(), __func__, ## args)
87
88#define TRACE	if (umaddebug)	IBWARN
89#define DEBUG	if (umaddebug)	IBWARN
90
91static int umaddebug = 0;
92
93#define UMAD_DEV_FILE_SZ	256
94
95static const char *def_ca_name = "mthca0";
96static int def_ca_port = 1;
97
98static unsigned abi_version;
99static unsigned new_user_mad_api;
100
101/*************************************
102 * Port
103 */
104static int find_cached_ca(const char *ca_name, umad_ca_t * ca)
105{
106	return 0;		/* caching not implemented yet */
107}
108
109static int put_ca(umad_ca_t * ca)
110{
111	return 0;		/* caching not implemented yet */
112}
113
114static int release_port(umad_port_t * port)
115{
116	free(port->pkeys);
117	port->pkeys = NULL;
118	port->pkeys_size = 0;
119	return 0;
120}
121
122static int check_for_digit_name(const struct dirent *dent)
123{
124	const char *p = dent->d_name;
125	while (*p && isdigit(*p))
126		p++;
127	return *p ? 0 : 1;
128}
129
130static int get_port(const char *ca_name, const char *dir, int portnum, umad_port_t * port)
131{
132	char port_dir[256];
133	union umad_gid gid;
134	struct dirent **namelist = NULL;
135	int i, len, num_pkeys = 0;
136	uint32_t capmask;
137
138	strncpy(port->ca_name, ca_name, sizeof port->ca_name - 1);
139	port->portnum = portnum;
140	port->pkeys = NULL;
141	port->rate = 0;
142
143	len = snprintf(port_dir, sizeof(port_dir), "%s/%d", dir, portnum);
144	if (len < 0 || len > sizeof(port_dir))
145		goto clean;
146
147	if (sys_read_uint(port_dir, SYS_PORT_LMC, &port->lmc) < 0)
148		goto clean;
149	if (sys_read_uint(port_dir, SYS_PORT_SMLID, &port->sm_lid) < 0)
150		goto clean;
151	if (sys_read_uint(port_dir, SYS_PORT_SMSL, &port->sm_sl) < 0)
152		goto clean;
153	if (sys_read_uint(port_dir, SYS_PORT_LID, &port->base_lid) < 0)
154		goto clean;
155	if (sys_read_uint(port_dir, SYS_PORT_STATE, &port->state) < 0)
156		goto clean;
157	if (sys_read_uint(port_dir, SYS_PORT_PHY_STATE, &port->phys_state) < 0)
158		goto clean;
159	/*
160	 * If width was not set properly this read may fail.
161	 * Instead of failing everything, we will just skip the check
162	 * and it will be set to 0.
163	 */
164	sys_read_uint(port_dir, SYS_PORT_RATE, &port->rate);
165	if (sys_read_uint(port_dir, SYS_PORT_CAPMASK, &capmask) < 0)
166		goto clean;
167
168	if (sys_read_string(port_dir, SYS_PORT_LINK_LAYER,
169	    port->link_layer, UMAD_CA_NAME_LEN) < 0)
170		/* assume IB by default */
171		sprintf(port->link_layer, "IB");
172
173	port->capmask = htobe32(capmask);
174
175	if (sys_read_gid(port_dir, SYS_PORT_GID, &gid) < 0)
176		goto clean;
177
178	port->gid_prefix = gid.global.subnet_prefix;
179	port->port_guid = gid.global.interface_id;
180
181	snprintf(port_dir + len, sizeof(port_dir) - len, "/pkeys");
182	num_pkeys = sys_scandir(port_dir, &namelist, check_for_digit_name, NULL);
183	if (num_pkeys <= 0) {
184		IBWARN("no pkeys found for %s:%u (at dir %s)...",
185		       port->ca_name, port->portnum, port_dir);
186		goto clean;
187	}
188	port->pkeys = calloc(num_pkeys, sizeof(port->pkeys[0]));
189	if (!port->pkeys) {
190		IBWARN("get_port: calloc failed: %s", strerror(errno));
191		goto clean;
192	}
193	for (i = 0; i < num_pkeys; i++) {
194		unsigned idx, val;
195		idx = strtoul(namelist[i]->d_name, NULL, 0);
196		sys_read_uint(port_dir, namelist[i]->d_name, &val);
197		port->pkeys[idx] = val;
198		free(namelist[i]);
199	}
200	port->pkeys_size = num_pkeys;
201	free(namelist);
202	namelist = NULL;
203	port_dir[len] = '\0';
204
205	/* FIXME: handle gids */
206
207	return 0;
208
209clean:
210	if (namelist) {
211		for (i = 0; i < num_pkeys; i++)
212			free(namelist[i]);
213		free(namelist);
214	}
215	if (port->pkeys)
216		free(port->pkeys);
217	return -EIO;
218}
219
220static int release_ca(umad_ca_t * ca)
221{
222	int i;
223
224	for (i = 0; i <= ca->numports; i++) {
225		if (!ca->ports[i])
226			continue;
227		release_port(ca->ports[i]);
228		free(ca->ports[i]);
229		ca->ports[i] = NULL;
230	}
231	return 0;
232}
233
234/*
235 * if *port > 0, check ca[port] state. Otherwise set *port to
236 * the first port that is active, and if such is not found, to
237 * the first port that is link up and if none are linkup, then
238 * the first port that is not disabled.  Otherwise return -1.
239 */
240static int resolve_ca_port(const char *ca_name, int *port)
241{
242	umad_ca_t ca;
243	int active = -1, up = -1;
244	int i, ret = 0;
245
246	TRACE("checking ca '%s'", ca_name);
247
248	if (umad_get_ca(ca_name, &ca) < 0)
249		return -1;
250
251	if (ca.node_type == 2) {
252		*port = 0;	/* switch sma port 0 */
253		ret = 1;
254		goto Exit;
255	}
256
257	if (*port > 0) {	/* check only the port the user wants */
258		if (*port > ca.numports) {
259			ret = -1;
260			goto Exit;
261		}
262		if (!ca.ports[*port]) {
263			ret = -1;
264			goto Exit;
265		}
266		if (strcmp(ca.ports[*port]->link_layer, "InfiniBand") &&
267		    strcmp(ca.ports[*port]->link_layer, "IB")) {
268			ret = -1;
269			goto Exit;
270		}
271		if (ca.ports[*port]->state == 4) {
272			ret = 1;
273			goto Exit;
274		}
275		if (ca.ports[*port]->phys_state != 3)
276			goto Exit;
277		ret = -1;
278		goto Exit;
279	}
280
281	for (i = 0; i <= ca.numports; i++) {
282		DEBUG("checking port %d", i);
283		if (!ca.ports[i])
284			continue;
285		if (strcmp(ca.ports[i]->link_layer, "InfiniBand") &&
286		    strcmp(ca.ports[i]->link_layer, "IB"))
287			continue;
288		if (up < 0 && ca.ports[i]->phys_state == 5)
289			up = *port = i;
290		if (ca.ports[i]->state == 4) {
291			active = *port = i;
292			DEBUG("found active port %d", i);
293			break;
294		}
295	}
296
297	if (active == -1 && up == -1) {	/* no active or linkup port found */
298		for (i = 0; i <= ca.numports; i++) {
299			DEBUG("checking port %d", i);
300			if (!ca.ports[i])
301				continue;
302			if (ca.ports[i]->phys_state != 3) {
303				up = *port = i;
304				break;
305			}
306		}
307	}
308
309	if (active >= 0) {
310		ret = 1;
311		goto Exit;
312	}
313	if (up >= 0) {
314		ret = 0;
315		goto Exit;
316	}
317	ret = -1;
318Exit:
319	release_ca(&ca);
320	return ret;
321}
322
323static const char *resolve_ca_name(const char *ca_name, int *best_port)
324{
325	static char names[UMAD_MAX_DEVICES][UMAD_CA_NAME_LEN];
326	int phys_found = -1, port_found = 0, port, port_type;
327	int caidx, n;
328
329	if (ca_name && (!best_port || *best_port))
330		return ca_name;
331
332	if (ca_name) {
333		if (resolve_ca_port(ca_name, best_port) < 0)
334			return NULL;
335		return ca_name;
336	}
337
338	/* Get the list of CA names */
339	if ((n = umad_get_cas_names((void *)names, UMAD_MAX_DEVICES)) < 0)
340		return NULL;
341
342	/* Find the first existing CA with an active port */
343	for (caidx = 0; caidx < n; caidx++) {
344		TRACE("checking ca '%s'", names[caidx]);
345
346		port = best_port ? *best_port : 0;
347		if ((port_type = resolve_ca_port(names[caidx], &port)) < 0)
348			continue;
349
350		DEBUG("found ca %s with port %d type %d",
351		      names[caidx], port, port_type);
352
353		if (port_type > 0) {
354			if (best_port)
355				*best_port = port;
356			DEBUG("found ca %s with active port %d",
357			      names[caidx], port);
358			return (char *)(names + caidx);
359		}
360
361		if (phys_found == -1) {
362			phys_found = caidx;
363			port_found = port;
364		}
365	}
366
367	DEBUG("phys found %d on %s port %d",
368	      phys_found, phys_found >= 0 ? names[phys_found] : NULL,
369	      port_found);
370	if (phys_found >= 0) {
371		if (best_port)
372			*best_port = port_found;
373		return names[phys_found];
374	}
375
376	if (best_port)
377		*best_port = def_ca_port;
378	return def_ca_name;
379}
380
381static int get_ca(const char *ca_name, umad_ca_t * ca)
382{
383	char dir_name[256];
384	struct dirent **namelist;
385	int r, i, ret;
386	int portnum;
387
388	ca->numports = 0;
389	memset(ca->ports, 0, sizeof ca->ports);
390	strncpy(ca->ca_name, ca_name, sizeof(ca->ca_name) - 1);
391
392	snprintf(dir_name, sizeof(dir_name), "%s/%s", SYS_INFINIBAND,
393		 ca->ca_name);
394
395	if ((r = sys_read_uint(dir_name, SYS_NODE_TYPE, &ca->node_type)) < 0)
396		return r;
397	if (sys_read_string(dir_name, SYS_CA_FW_VERS, ca->fw_ver,
398			    sizeof ca->fw_ver) < 0)
399		ca->fw_ver[0] = '\0';
400	if (sys_read_string(dir_name, SYS_CA_HW_VERS, ca->hw_ver,
401			    sizeof ca->hw_ver) < 0)
402		ca->hw_ver[0] = '\0';
403	if ((r = sys_read_string(dir_name, SYS_CA_TYPE, ca->ca_type,
404				 sizeof ca->ca_type)) < 0)
405		ca->ca_type[0] = '\0';
406	if ((r = sys_read_guid(dir_name, SYS_CA_NODE_GUID, &ca->node_guid)) < 0)
407		return r;
408	if ((r =
409	     sys_read_guid(dir_name, SYS_CA_SYS_GUID, &ca->system_guid)) < 0)
410		return r;
411
412	snprintf(dir_name, sizeof(dir_name), "%s/%s/%s",
413		 SYS_INFINIBAND, ca->ca_name, SYS_CA_PORTS_DIR);
414
415	if ((r = sys_scandir(dir_name, &namelist, NULL, alphasort)) < 0) {
416		ret = errno < 0 ? errno : -EIO;
417		goto error;
418	}
419
420	ret = 0;
421	for (i = 0; i < r; i++) {
422		portnum = 0;
423		if (!strcmp(".", namelist[i]->d_name) ||
424		    !strcmp("..", namelist[i]->d_name))
425			continue;
426		if (strcmp("0", namelist[i]->d_name) &&
427		    ((portnum = atoi(namelist[i]->d_name)) <= 0 ||
428		     portnum >= UMAD_CA_MAX_PORTS)) {
429			ret = -EIO;
430			goto clean;
431		}
432		if (!(ca->ports[portnum] =
433		      calloc(1, sizeof(*ca->ports[portnum])))) {
434			ret = -ENOMEM;
435			goto clean;
436		}
437		if (get_port(ca_name, dir_name, portnum, ca->ports[portnum]) <
438		    0) {
439			free(ca->ports[portnum]);
440			ca->ports[portnum] = NULL;
441			ret = -EIO;
442			goto clean;
443		}
444		if (ca->numports < portnum)
445			ca->numports = portnum;
446	}
447
448	for (i = 0; i < r; i++)
449		free(namelist[i]);
450	free(namelist);
451
452	put_ca(ca);
453	return 0;
454
455clean:
456	for (i = 0; i < r; i++)
457		free(namelist[i]);
458	free(namelist);
459error:
460	release_ca(ca);
461
462	return ret;
463}
464
465static int umad_id_to_dev(int umad_id, char *dev, unsigned *port)
466{
467	char path[256];
468	int r;
469
470	snprintf(path, sizeof(path), SYS_INFINIBAND_MAD "/umad%d", umad_id);
471
472	if ((r =
473	     sys_read_string(path, SYS_IB_MAD_DEV, dev, UMAD_CA_NAME_LEN)) < 0)
474		return r;
475
476	if ((r = sys_read_uint(path, SYS_IB_MAD_PORT, port)) < 0)
477		return r;
478
479	return 0;
480}
481
482static int dev_to_umad_id(const char *dev, unsigned port)
483{
484	char umad_dev[UMAD_CA_NAME_LEN];
485	unsigned umad_port;
486	int id;
487
488	for (id = 0; id < UMAD_MAX_PORTS; id++) {
489		if (umad_id_to_dev(id, umad_dev, &umad_port) < 0)
490			continue;
491		if (strncmp(dev, umad_dev, UMAD_CA_NAME_LEN))
492			continue;
493		if (port != umad_port)
494			continue;
495
496		DEBUG("mapped %s %d to %d", dev, port, id);
497		return id;
498	}
499
500	return -1;		/* not found */
501}
502
503/*******************************
504 * Public interface
505 */
506
507int umad_init(void)
508{
509	TRACE("umad_init");
510	if (sys_read_uint(IB_UMAD_ABI_DIR, IB_UMAD_ABI_FILE, &abi_version) < 0) {
511		IBWARN
512		    ("can't read ABI version from %s (%m): is ibcore module loaded?",
513		     PATH_TO_SYS(IB_UMAD_ABI_DIR "/" IB_UMAD_ABI_FILE));
514		return -1;
515	}
516	if (abi_version < IB_UMAD_ABI_VERSION) {
517		IBWARN
518		    ("wrong ABI version: %s is %d but library minimal ABI is %d",
519		     PATH_TO_SYS(IB_UMAD_ABI_DIR "/" IB_UMAD_ABI_FILE), abi_version,
520		     IB_UMAD_ABI_VERSION);
521		return -1;
522	}
523	return 0;
524}
525
526int umad_done(void)
527{
528	TRACE("umad_done");
529	/* FIXME - verify that all ports are closed */
530	return 0;
531}
532
533static unsigned is_ib_type(const char *ca_name)
534{
535	char dir_name[256];
536	unsigned type;
537
538	snprintf(dir_name, sizeof(dir_name), "%s/%s", SYS_INFINIBAND, ca_name);
539
540	if (sys_read_uint(dir_name, SYS_NODE_TYPE, &type) < 0)
541		return 0;
542
543	return type >= 1 && type <= 3 ? 1 : 0;
544}
545
546int umad_get_cas_names(char cas[][UMAD_CA_NAME_LEN], int max)
547{
548	struct dirent **namelist;
549	int n, i, j = 0;
550
551	TRACE("max %d", max);
552
553	n = sys_scandir(SYS_INFINIBAND, &namelist, NULL, alphasort);
554	if (n > 0) {
555		for (i = 0; i < n; i++) {
556			if (strcmp(namelist[i]->d_name, ".") &&
557			    strcmp(namelist[i]->d_name, "..")) {
558				if (j < max && is_ib_type(namelist[i]->d_name))
559					strncpy(cas[j++], namelist[i]->d_name,
560						UMAD_CA_NAME_LEN);
561			}
562			free(namelist[i]);
563		}
564		DEBUG("return %d cas", j);
565	} else {
566		/* Is this still needed ? */
567		strncpy((char *)cas, def_ca_name, UMAD_CA_NAME_LEN);
568		DEBUG("return 1 ca");
569		j = 1;
570	}
571	if (n >= 0)
572		free(namelist);
573	return j;
574}
575
576int umad_get_ca_portguids(const char *ca_name, __be64 *portguids, int max)
577{
578	umad_ca_t ca;
579	int ports = 0, i;
580
581	TRACE("ca name %s max port guids %d", ca_name, max);
582	if (!(ca_name = resolve_ca_name(ca_name, NULL)))
583		return -ENODEV;
584
585	if (umad_get_ca(ca_name, &ca) < 0)
586		return -1;
587
588	if (portguids) {
589		if (ca.numports + 1 > max) {
590			release_ca(&ca);
591			return -ENOMEM;
592		}
593
594		for (i = 0; i <= ca.numports; i++)
595			portguids[ports++] = ca.ports[i] ?
596				ca.ports[i]->port_guid : htobe64(0);
597	}
598
599	release_ca(&ca);
600	DEBUG("%s: %d ports", ca_name, ports);
601
602	return ports;
603}
604
605int umad_get_issm_path(const char *ca_name, int portnum, char path[], int max)
606{
607	int umad_id;
608
609	TRACE("ca %s port %d", ca_name, portnum);
610
611	if (!(ca_name = resolve_ca_name(ca_name, &portnum)))
612		return -ENODEV;
613
614	if ((umad_id = dev_to_umad_id(ca_name, portnum)) < 0)
615		return -EINVAL;
616
617	snprintf(path, max, "%s/issm%u", UMAD_DEV_DIR, umad_id);
618
619	return 0;
620}
621
622int umad_open_port(const char *ca_name, int portnum)
623{
624	char dev_file[UMAD_DEV_FILE_SZ];
625	int umad_id, fd;
626
627	TRACE("ca %s port %d", ca_name, portnum);
628
629	if (!(ca_name = resolve_ca_name(ca_name, &portnum)))
630		return -ENODEV;
631
632	DEBUG("opening %s port %d", ca_name, portnum);
633
634	if ((umad_id = dev_to_umad_id(ca_name, portnum)) < 0)
635		return -EINVAL;
636
637	snprintf(dev_file, sizeof(dev_file), "%s/umad%d",
638		 UMAD_DEV_DIR, umad_id);
639
640	if ((fd = open(dev_file, O_RDWR | O_NONBLOCK)) < 0) {
641		DEBUG("open %s failed: %s", dev_file, strerror(errno));
642		return -EIO;
643	}
644
645	if (abi_version > 5 || !ioctl(fd, IB_USER_MAD_ENABLE_PKEY, NULL))
646		new_user_mad_api = 1;
647	else
648		new_user_mad_api = 0;
649
650	DEBUG("opened %s fd %d portid %d", dev_file, fd, umad_id);
651	return fd;
652}
653
654int umad_get_ca(const char *ca_name, umad_ca_t * ca)
655{
656	int r;
657
658	TRACE("ca_name %s", ca_name);
659	if (!(ca_name = resolve_ca_name(ca_name, NULL)))
660		return -ENODEV;
661
662	if (find_cached_ca(ca_name, ca) > 0)
663		return 0;
664
665	if ((r = get_ca(ca_name, ca)) < 0)
666		return r;
667
668	DEBUG("opened %s", ca_name);
669	return 0;
670}
671
672int umad_release_ca(umad_ca_t * ca)
673{
674	int r;
675
676	TRACE("ca_name %s", ca->ca_name);
677	if (!ca)
678		return -ENODEV;
679
680	if ((r = release_ca(ca)) < 0)
681		return r;
682
683	DEBUG("releasing %s", ca->ca_name);
684	return 0;
685}
686
687int umad_get_port(const char *ca_name, int portnum, umad_port_t * port)
688{
689	char dir_name[256];
690
691	TRACE("ca_name %s portnum %d", ca_name, portnum);
692
693	if (!(ca_name = resolve_ca_name(ca_name, &portnum)))
694		return -ENODEV;
695
696	snprintf(dir_name, sizeof(dir_name), "%s/%s/%s",
697		 SYS_INFINIBAND, ca_name, SYS_CA_PORTS_DIR);
698
699	return get_port(ca_name, dir_name, portnum, port);
700}
701
702int umad_release_port(umad_port_t * port)
703{
704	int r;
705
706	TRACE("port %s:%d", port->ca_name, port->portnum);
707	if (!port)
708		return -ENODEV;
709
710	if ((r = release_port(port)) < 0)
711		return r;
712
713	DEBUG("releasing %s:%d", port->ca_name, port->portnum);
714	return 0;
715}
716
717int umad_close_port(int fd)
718{
719	close(fd);
720	DEBUG("closed fd %d", fd);
721	return 0;
722}
723
724void *umad_get_mad(void *umad)
725{
726	return new_user_mad_api ? ((struct ib_user_mad *)umad)->data :
727	    (void *)&((struct ib_user_mad *)umad)->addr.pkey_index;
728}
729
730size_t umad_size(void)
731{
732	return new_user_mad_api ? sizeof(struct ib_user_mad) :
733	    sizeof(struct ib_user_mad) - 8;
734}
735
736int umad_set_grh(void *umad, void *mad_addr)
737{
738	struct ib_user_mad *mad = umad;
739	struct ib_mad_addr *addr = mad_addr;
740
741	if (mad_addr) {
742		mad->addr.grh_present = 1;
743		mad->addr.ib_gid = addr->ib_gid;
744		/* The definition for umad_set_grh requires that the input be
745		 * in host order */
746		mad->addr.flow_label = htobe32((uint32_t)addr->flow_label);
747		mad->addr.hop_limit = addr->hop_limit;
748		mad->addr.traffic_class = addr->traffic_class;
749	} else
750		mad->addr.grh_present = 0;
751	return 0;
752}
753
754int umad_set_pkey(void *umad, int pkey_index)
755{
756	struct ib_user_mad *mad = umad;
757
758	if (new_user_mad_api)
759		mad->addr.pkey_index = pkey_index;
760
761	return 0;
762}
763
764int umad_get_pkey(void *umad)
765{
766	struct ib_user_mad *mad = umad;
767
768	if (new_user_mad_api)
769		return mad->addr.pkey_index;
770
771	return 0;
772}
773
774int umad_set_addr(void *umad, int dlid, int dqp, int sl, int qkey)
775{
776	struct ib_user_mad *mad = umad;
777
778	TRACE("umad %p dlid %u dqp %d sl %d, qkey %x",
779	      umad, dlid, dqp, sl, qkey);
780	mad->addr.qpn = htobe32(dqp);
781	mad->addr.lid = htobe16(dlid);
782	mad->addr.qkey = htobe32(qkey);
783	mad->addr.sl = sl;
784
785	return 0;
786}
787
788int umad_set_addr_net(void *umad, __be16 dlid, __be32 dqp, int sl, __be32 qkey)
789{
790	struct ib_user_mad *mad = umad;
791
792	TRACE("umad %p dlid %u dqp %d sl %d qkey %x",
793	      umad, be16toh(dlid), be32toh(dqp), sl, be32toh(qkey));
794	mad->addr.qpn = dqp;
795	mad->addr.lid = dlid;
796	mad->addr.qkey = qkey;
797	mad->addr.sl = sl;
798
799	return 0;
800}
801
802int umad_send(int fd, int agentid, void *umad, int length,
803	      int timeout_ms, int retries)
804{
805	struct ib_user_mad *mad = umad;
806	int n;
807
808	TRACE("fd %d agentid %d umad %p timeout %u",
809	      fd, agentid, umad, timeout_ms);
810	errno = 0;
811
812	mad->timeout_ms = timeout_ms;
813	mad->retries = retries;
814	mad->agent_id = agentid;
815
816	if (umaddebug > 1)
817		umad_dump(mad);
818
819	n = write(fd, mad, length + umad_size());
820	if (n == length + umad_size())
821		return 0;
822
823	DEBUG("write returned %d != sizeof umad %zu + length %d (%m)",
824	      n, umad_size(), length);
825	if (!errno)
826		errno = EIO;
827	return -EIO;
828}
829
830static int dev_poll(int fd, int timeout_ms)
831{
832	struct pollfd ufds;
833	int n;
834
835	ufds.fd = fd;
836	ufds.events = POLLIN;
837
838	if ((n = poll(&ufds, 1, timeout_ms)) == 1)
839		return 0;
840
841	if (n == 0)
842		return -ETIMEDOUT;
843
844	return -EIO;
845}
846
847int umad_recv(int fd, void *umad, int *length, int timeout_ms)
848{
849	struct ib_user_mad *mad = umad;
850	int n;
851
852	errno = 0;
853	TRACE("fd %d umad %p timeout %u", fd, umad, timeout_ms);
854
855	if (!umad || !length) {
856		errno = EINVAL;
857		return -EINVAL;
858	}
859
860	if (timeout_ms && (n = dev_poll(fd, timeout_ms)) < 0) {
861		if (!errno)
862			errno = -n;
863		return n;
864	}
865
866	n = read(fd, umad, umad_size() + *length);
867
868	VALGRIND_MAKE_MEM_DEFINED(umad, umad_size() + *length);
869
870	if ((n >= 0) && (n <= umad_size() + *length)) {
871		DEBUG("mad received by agent %d length %d", mad->agent_id, n);
872		if (n > umad_size())
873			*length = n - umad_size();
874		else
875			*length = 0;
876		return mad->agent_id;
877	}
878
879	if (n == -EWOULDBLOCK) {
880		if (!errno)
881			errno = EWOULDBLOCK;
882		return n;
883	}
884
885	DEBUG("read returned %zu > sizeof umad %zu + length %d (%m)",
886	      mad->length - umad_size(), umad_size(), *length);
887
888	*length = mad->length - umad_size();
889	if (!errno)
890		errno = EIO;
891	return -errno;
892}
893
894int umad_poll(int fd, int timeout_ms)
895{
896	TRACE("fd %d timeout %u", fd, timeout_ms);
897	return dev_poll(fd, timeout_ms);
898}
899
900int umad_get_fd(int fd)
901{
902	TRACE("fd %d", fd);
903	return fd;
904}
905
906int umad_register_oui(int fd, int mgmt_class, uint8_t rmpp_version,
907		      uint8_t oui[3], long method_mask[])
908{
909	struct ib_user_mad_reg_req req;
910
911	TRACE("fd %d mgmt_class %u rmpp_version %d oui 0x%x%x%x method_mask %p",
912	      fd, mgmt_class, (int)rmpp_version, (int)oui[0], (int)oui[1],
913	      (int)oui[2], method_mask);
914
915	if (mgmt_class < 0x30 || mgmt_class > 0x4f) {
916		DEBUG("mgmt class %d not in vendor range 2", mgmt_class);
917		return -EINVAL;
918	}
919
920	req.qpn = 1;
921	req.mgmt_class = mgmt_class;
922	req.mgmt_class_version = 1;
923	memcpy(req.oui, oui, sizeof req.oui);
924	req.rmpp_version = rmpp_version;
925
926	if (method_mask)
927		memcpy(req.method_mask, method_mask, sizeof req.method_mask);
928	else
929		memset(req.method_mask, 0, sizeof req.method_mask);
930
931	VALGRIND_MAKE_MEM_DEFINED(&req, sizeof req);
932
933	if (!ioctl(fd, IB_USER_MAD_REGISTER_AGENT, (void *)&req)) {
934		DEBUG
935		    ("fd %d registered to use agent %d qp %d class 0x%x oui %p",
936		     fd, req.id, req.qpn, req.mgmt_class, oui);
937		return req.id;	/* return agentid */
938	}
939
940	DEBUG("fd %d registering qp %d class 0x%x version %d oui %p failed: %m",
941	      fd, req.qpn, req.mgmt_class, req.mgmt_class_version, oui);
942	return -EPERM;
943}
944
945int umad_register(int fd, int mgmt_class, int mgmt_version,
946		  uint8_t rmpp_version, long method_mask[])
947{
948	struct ib_user_mad_reg_req req;
949	__be32 oui = htobe32(IB_OPENIB_OUI);
950	int qp;
951
952	TRACE
953	    ("fd %d mgmt_class %u mgmt_version %u rmpp_version %d method_mask %p",
954	     fd, mgmt_class, mgmt_version, rmpp_version, method_mask);
955
956	req.qpn = qp = (mgmt_class == 0x1 || mgmt_class == 0x81) ? 0 : 1;
957	req.mgmt_class = mgmt_class;
958	req.mgmt_class_version = mgmt_version;
959	req.rmpp_version = rmpp_version;
960
961	if (method_mask)
962		memcpy(req.method_mask, method_mask, sizeof req.method_mask);
963	else
964		memset(req.method_mask, 0, sizeof req.method_mask);
965
966	memcpy(&req.oui, (char *)&oui + 1, sizeof req.oui);
967
968	VALGRIND_MAKE_MEM_DEFINED(&req, sizeof req);
969
970	if (!ioctl(fd, IB_USER_MAD_REGISTER_AGENT, (void *)&req)) {
971		DEBUG("fd %d registered to use agent %d qp %d", fd, req.id, qp);
972		return req.id;	/* return agentid */
973	}
974
975	DEBUG("fd %d registering qp %d class 0x%x version %d failed: %m",
976	      fd, qp, mgmt_class, mgmt_version);
977	return -EPERM;
978}
979
980int umad_register2(int port_fd, struct umad_reg_attr *attr, uint32_t *agent_id)
981{
982	struct ib_user_mad_reg_req2 req;
983	int rc;
984
985	if (!attr || !agent_id)
986		return EINVAL;
987
988	TRACE("fd %d mgmt_class %u mgmt_class_version %u flags 0x%08x "
989	      "method_mask 0x%016" PRIx64 " %016" PRIx64
990	      "oui 0x%06x rmpp_version %u ",
991	      port_fd, attr->mgmt_class, attr->mgmt_class_version,
992	      attr->flags, attr->method_mask[0], attr->method_mask[1],
993	      attr->oui, attr->rmpp_version);
994
995	if (attr->mgmt_class >= 0x30 && attr->mgmt_class <= 0x4f &&
996	    ((attr->oui & 0x00ffffff) == 0 || (attr->oui & 0xff000000) != 0)) {
997		DEBUG("mgmt class %d is in vendor range 2 but oui (0x%08x) is invalid",
998		      attr->mgmt_class, attr->oui);
999		return EINVAL;
1000	}
1001
1002	memset(&req, 0, sizeof(req));
1003
1004	req.mgmt_class = attr->mgmt_class;
1005	req.mgmt_class_version = attr->mgmt_class_version;
1006	req.qpn = (attr->mgmt_class == 0x1 || attr->mgmt_class == 0x81) ? 0 : 1;
1007	req.flags = attr->flags;
1008	memcpy(req.method_mask, attr->method_mask, sizeof req.method_mask);
1009	req.oui = attr->oui;
1010	req.rmpp_version = attr->rmpp_version;
1011
1012	VALGRIND_MAKE_MEM_DEFINED(&req, sizeof req);
1013
1014	if ((rc = ioctl(port_fd, IB_USER_MAD_REGISTER_AGENT2, (void *)&req)) == 0) {
1015		DEBUG("fd %d registered to use agent %d qp %d class 0x%x oui 0x%06x",
1016		      port_fd, req.id, req.qpn, req.mgmt_class, attr->oui);
1017		*agent_id = req.id;
1018		return 0;
1019	}
1020
1021	if (errno == ENOTTY || errno == EINVAL) {
1022
1023		TRACE("no kernel support for registration flags");
1024		req.flags = 0;
1025
1026		if (attr->flags == 0) {
1027			struct ib_user_mad_reg_req req_v1;
1028
1029			TRACE("attempting original register ioctl");
1030
1031			memset(&req_v1, 0, sizeof(req_v1));
1032			req_v1.mgmt_class = req.mgmt_class;
1033			req_v1.mgmt_class_version = req.mgmt_class_version;
1034			req_v1.qpn = req.qpn;
1035			req_v1.rmpp_version = req.rmpp_version;
1036			req_v1.oui[0] = (req.oui & 0xff0000) >> 16;
1037			req_v1.oui[1] = (req.oui & 0x00ff00) >> 8;
1038			req_v1.oui[2] =  req.oui & 0x0000ff;
1039
1040			memcpy(req_v1.method_mask, req.method_mask, sizeof req_v1.method_mask);
1041
1042			if ((rc = ioctl(port_fd, IB_USER_MAD_REGISTER_AGENT,
1043					(void *)&req_v1)) == 0) {
1044				DEBUG("fd %d registered to use agent %d qp %d class 0x%x oui 0x%06x",
1045				      port_fd, req_v1.id, req_v1.qpn, req_v1.mgmt_class, attr->oui);
1046				*agent_id = req_v1.id;
1047				return 0;
1048			}
1049		}
1050	}
1051
1052	rc = errno;
1053	attr->flags = req.flags;
1054
1055	DEBUG("fd %d registering qp %d class 0x%x version %d "
1056	      "oui 0x%06x failed flags returned 0x%x : %m",
1057	      port_fd, req.qpn, req.mgmt_class, req.mgmt_class_version,
1058	      attr->oui, req.flags);
1059
1060	return rc;
1061}
1062
1063int umad_unregister(int fd, int agentid)
1064{
1065	TRACE("fd %d unregistering agent %d", fd, agentid);
1066	return ioctl(fd, IB_USER_MAD_UNREGISTER_AGENT, &agentid);
1067}
1068
1069int umad_status(void *umad)
1070{
1071	struct ib_user_mad *mad = umad;
1072
1073	return mad->status;
1074}
1075
1076ib_mad_addr_t *umad_get_mad_addr(void *umad)
1077{
1078	struct ib_user_mad *mad = umad;
1079
1080	return &mad->addr;
1081}
1082
1083int umad_debug(int level)
1084{
1085	if (level >= 0)
1086		umaddebug = level;
1087	return umaddebug;
1088}
1089
1090void umad_addr_dump(ib_mad_addr_t * addr)
1091{
1092#define HEX(x)  ((x) < 10 ? '0' + (x) : 'a' + ((x) -10))
1093	char gid_str[64];
1094	int i;
1095
1096	for (i = 0; i < sizeof addr->gid; i++) {
1097		gid_str[i * 2] = HEX(addr->gid[i] >> 4);
1098		gid_str[i * 2 + 1] = HEX(addr->gid[i] & 0xf);
1099	}
1100	gid_str[i * 2] = 0;
1101	IBWARN("qpn %d qkey 0x%x lid %u sl %d\n"
1102	       "grh_present %d gid_index %d hop_limit %d traffic_class %d flow_label 0x%x pkey_index 0x%x\n"
1103	       "Gid 0x%s",
1104	       be32toh(addr->qpn), be32toh(addr->qkey), be16toh(addr->lid), addr->sl,
1105	       addr->grh_present, (int)addr->gid_index, (int)addr->hop_limit,
1106	       (int)addr->traffic_class, addr->flow_label, addr->pkey_index,
1107	       gid_str);
1108}
1109
1110void umad_dump(void *umad)
1111{
1112	struct ib_user_mad *mad = umad;
1113
1114	IBWARN("agent id %d status %x timeout %d",
1115	       mad->agent_id, mad->status, mad->timeout_ms);
1116	umad_addr_dump(&mad->addr);
1117}
1118