1/*
2 * Copyright (c) 2006-2016 Chelsio, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses.  You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 *     Redistribution and use in source and binary forms, with or
11 *     without modification, are permitted provided that the following
12 *     conditions are met:
13 *
14 *      - Redistributions of source code must retain the above
15 *        copyright notice, this list of conditions and the following
16 *        disclaimer.
17 *
18 *      - Redistributions in binary form must reproduce the above
19 *        copyright notice, this list of conditions and the following
20 *        disclaimer in the documentation and/or other materials
21 *        provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32#include <config.h>
33
34#include <stdio.h>
35#include <stdlib.h>
36#include <unistd.h>
37#include <errno.h>
38#include <sys/mman.h>
39#include <pthread.h>
40#include <string.h>
41#include <signal.h>
42#include <stdbool.h>
43
44#include "libcxgb4.h"
45#include "cxgb4-abi.h"
46
47#define PCI_VENDOR_ID_CHELSIO		0x1425
48
49/*
50 * Macros needed to support the PCI Device ID Table ...
51 */
52#define CH_PCI_DEVICE_ID_TABLE_DEFINE_BEGIN \
53	static struct { \
54		unsigned vendor; \
55		unsigned device; \
56	} hca_table[] = {
57
58#define CH_PCI_DEVICE_ID_FUNCTION \
59		0x4
60
61#define CH_PCI_ID_TABLE_ENTRY(__DeviceID) \
62		{ \
63			.vendor = PCI_VENDOR_ID_CHELSIO, \
64			.device = (__DeviceID), \
65		}
66
67#define CH_PCI_DEVICE_ID_TABLE_DEFINE_END \
68	}
69
70#include "t4_chip_type.h"
71#include "t4_pci_id_tbl.h"
72
73unsigned long c4iw_page_size;
74unsigned long c4iw_page_shift;
75unsigned long c4iw_page_mask;
76int ma_wr;
77int t5_en_wc = 1;
78
79static TAILQ_HEAD(,c4iw_dev) devices = TAILQ_HEAD_INITIALIZER(devices);
80
81static struct ibv_context_ops c4iw_ctx_ops = {
82	.query_device = c4iw_query_device,
83	.query_port = c4iw_query_port,
84	.alloc_pd = c4iw_alloc_pd,
85	.dealloc_pd = c4iw_free_pd,
86	.reg_mr = c4iw_reg_mr,
87	.dereg_mr = c4iw_dereg_mr,
88	.create_cq = c4iw_create_cq,
89	.resize_cq = c4iw_resize_cq,
90	.destroy_cq = c4iw_destroy_cq,
91	.create_srq = c4iw_create_srq,
92	.modify_srq = c4iw_modify_srq,
93	.destroy_srq = c4iw_destroy_srq,
94	.create_qp = c4iw_create_qp,
95	.modify_qp = c4iw_modify_qp,
96	.destroy_qp = c4iw_destroy_qp,
97	.query_qp = c4iw_query_qp,
98	.create_ah = c4iw_create_ah,
99	.destroy_ah = c4iw_destroy_ah,
100	.attach_mcast = c4iw_attach_mcast,
101	.detach_mcast = c4iw_detach_mcast,
102	.post_srq_recv = c4iw_post_srq_recv,
103	.req_notify_cq = c4iw_arm_cq,
104};
105
106static struct ibv_context *c4iw_alloc_context(struct ibv_device *ibdev,
107					      int cmd_fd)
108{
109	struct c4iw_context *context;
110	struct ibv_get_context cmd;
111	struct c4iw_alloc_ucontext_resp resp;
112	struct c4iw_dev *rhp = to_c4iw_dev(ibdev);
113	struct ibv_query_device qcmd;
114	uint64_t raw_fw_ver;
115	struct ibv_device_attr attr;
116
117	context = malloc(sizeof *context);
118	if (!context)
119		return NULL;
120
121	memset(context, 0, sizeof *context);
122	context->ibv_ctx.cmd_fd = cmd_fd;
123
124	resp.status_page_size = 0;
125	resp.reserved = 0;
126	if (ibv_cmd_get_context(&context->ibv_ctx, &cmd, sizeof cmd,
127				&resp.ibv_resp, sizeof resp))
128		goto err_free;
129
130	if (resp.reserved)
131		PDBG("%s c4iw_alloc_ucontext_resp reserved field modified by kernel\n",
132		     __FUNCTION__);
133
134	context->status_page_size = resp.status_page_size;
135	if (resp.status_page_size) {
136		context->status_page = mmap(NULL, resp.status_page_size,
137					    PROT_READ, MAP_SHARED, cmd_fd,
138					    resp.status_page_key);
139		if (context->status_page == MAP_FAILED)
140			goto err_free;
141	}
142
143	context->ibv_ctx.device = ibdev;
144	context->ibv_ctx.ops = c4iw_ctx_ops;
145
146	switch (rhp->chip_version) {
147	case CHELSIO_T6:
148		PDBG("%s T6/T5/T4 device\n", __FUNCTION__);
149	case CHELSIO_T5:
150		PDBG("%s T5/T4 device\n", __FUNCTION__);
151	case CHELSIO_T4:
152		PDBG("%s T4 device\n", __FUNCTION__);
153		context->ibv_ctx.ops.async_event = c4iw_async_event;
154		context->ibv_ctx.ops.post_send = c4iw_post_send;
155		context->ibv_ctx.ops.post_recv = c4iw_post_receive;
156		context->ibv_ctx.ops.poll_cq = c4iw_poll_cq;
157		context->ibv_ctx.ops.req_notify_cq = c4iw_arm_cq;
158		break;
159	default:
160		PDBG("%s unknown hca type %d\n", __FUNCTION__,
161		     rhp->chip_version);
162		goto err_unmap;
163		break;
164	}
165
166	if (!rhp->mmid2ptr) {
167		int ret;
168
169		ret = ibv_cmd_query_device(&context->ibv_ctx, &attr, &raw_fw_ver, &qcmd,
170					   sizeof qcmd);
171		if (ret)
172			goto err_unmap;
173		rhp->max_mr = attr.max_mr;
174		rhp->mmid2ptr = calloc(attr.max_mr, sizeof(void *));
175		if (!rhp->mmid2ptr) {
176			goto err_unmap;
177		}
178		if (rhp->abi_version < 3) {
179			fprintf(stderr, "Warning: iw_cxgb4 driver is of older version"
180					" than libcxgb4:: %d\n", rhp->abi_version);
181			rhp->max_qp = T4_QID_BASE + attr.max_qp;
182		} else {
183			rhp->max_qp = context->status_page->qp_start +
184					context->status_page->qp_size;
185		}
186		rhp->qpid2ptr = calloc(rhp->max_qp, sizeof(void *));
187		if (!rhp->qpid2ptr) {
188			goto err_unmap;
189		}
190		if (rhp->abi_version < 3)
191			rhp->max_cq = T4_QID_BASE + attr.max_cq;
192		else
193			rhp->max_cq = context->status_page->cq_start +
194					context->status_page->cq_size;
195		rhp->cqid2ptr = calloc(rhp->max_cq, sizeof(void *));
196		if (!rhp->cqid2ptr)
197			goto err_unmap;
198
199		/* Disable userspace WC if architecture/adapter does not
200		 * support WC.
201		 * Note: To forcefully disable WC in kernel driver use the
202		 * loader tunable "hw.cxl.write_combine=0"
203		 */
204		if (t5_en_wc && !context->status_page->wc_supported) {
205			t5_en_wc = 0;
206		}
207	}
208
209	return &context->ibv_ctx;
210
211err_unmap:
212	munmap(context->status_page, context->status_page_size);
213err_free:
214	if (rhp->cqid2ptr)
215		free(rhp->cqid2ptr);
216	if (rhp->qpid2ptr)
217		free(rhp->cqid2ptr);
218	if (rhp->mmid2ptr)
219		free(rhp->cqid2ptr);
220	free(context);
221	return NULL;
222}
223
224static void c4iw_free_context(struct ibv_context *ibctx)
225{
226	struct c4iw_context *context = to_c4iw_context(ibctx);
227
228	if (context->status_page_size)
229		munmap(context->status_page, context->status_page_size);
230	free(context);
231}
232
233static struct verbs_device_ops c4iw_dev_ops = {
234	.alloc_context = c4iw_alloc_context,
235	.free_context = c4iw_free_context
236};
237
238#ifdef STALL_DETECTION
239
240int stall_to;
241
242static void dump_cq(struct c4iw_cq *chp)
243{
244	int i;
245
246	fprintf(stderr,
247 		"CQ: %p id %u queue %p cidx 0x%08x sw_queue %p sw_cidx %d sw_pidx %d sw_in_use %d depth %u error %u gen %d "
248		"cidx_inc %d bits_type_ts %016" PRIx64 " notempty %d\n", chp,
249                chp->cq.cqid, chp->cq.queue, chp->cq.cidx,
250	 	chp->cq.sw_queue, chp->cq.sw_cidx, chp->cq.sw_pidx, chp->cq.sw_in_use,
251                chp->cq.size, chp->cq.error, chp->cq.gen, chp->cq.cidx_inc, be64toh(chp->cq.bits_type_ts),
252		t4_cq_notempty(&chp->cq));
253
254	for (i=0; i < chp->cq.size; i++) {
255		u64 *p = (u64 *)(chp->cq.queue + i);
256
257		fprintf(stderr, "%02x: %016" PRIx64 " %016" PRIx64, i, be64toh(p[0]), be64toh(p[1]));
258		if (i == chp->cq.cidx)
259			fprintf(stderr, " <-- cidx\n");
260		else
261			fprintf(stderr, "\n");
262		p+= 2;
263		fprintf(stderr, "%02x: %016" PRIx64 " %016" PRIx64 "\n", i, be64toh(p[0]), be64toh(p[1]));
264		p+= 2;
265		fprintf(stderr, "%02x: %016" PRIx64 " %016" PRIx64 "\n", i, be64toh(p[0]), be64toh(p[1]));
266		p+= 2;
267		fprintf(stderr, "%02x: %016" PRIx64 " %016" PRIx64 "\n", i, be64toh(p[0]), be64toh(p[1]));
268		p+= 2;
269	}
270}
271
272static void dump_qp(struct c4iw_qp *qhp)
273{
274	int i;
275	int j;
276	struct t4_swsqe *swsqe;
277	struct t4_swrqe *swrqe;
278	u16 cidx, pidx;
279	u64 *p;
280
281	fprintf(stderr,
282		"QP: %p id %u error %d flushed %d qid_mask 0x%x\n"
283		"    SQ: id %u queue %p sw_queue %p cidx %u pidx %u in_use %u wq_pidx %u depth %u flags 0x%x flush_cidx %d\n"
284		"    RQ: id %u queue %p sw_queue %p cidx %u pidx %u in_use %u depth %u\n",
285		qhp,
286		qhp->wq.sq.qid,
287		qhp->wq.error,
288		qhp->wq.flushed,
289		qhp->wq.qid_mask,
290		qhp->wq.sq.qid,
291		qhp->wq.sq.queue,
292		qhp->wq.sq.sw_sq,
293		qhp->wq.sq.cidx,
294		qhp->wq.sq.pidx,
295		qhp->wq.sq.in_use,
296		qhp->wq.sq.wq_pidx,
297		qhp->wq.sq.size,
298		qhp->wq.sq.flags,
299		qhp->wq.sq.flush_cidx,
300		qhp->wq.rq.qid,
301		qhp->wq.rq.queue,
302		qhp->wq.rq.sw_rq,
303		qhp->wq.rq.cidx,
304		qhp->wq.rq.pidx,
305		qhp->wq.rq.in_use,
306		qhp->wq.rq.size);
307	cidx = qhp->wq.sq.cidx;
308	pidx = qhp->wq.sq.pidx;
309	if (cidx != pidx)
310		fprintf(stderr, "SQ: \n");
311	while (cidx != pidx) {
312		swsqe = &qhp->wq.sq.sw_sq[cidx];
313		fprintf(stderr, "%04u: wr_id %016" PRIx64
314			" sq_wptr %08x read_len %u opcode 0x%x "
315			"complete %u signaled %u cqe %016" PRIx64 " %016" PRIx64 " %016" PRIx64 " %016" PRIx64 "\n",
316			cidx,
317			swsqe->wr_id,
318			swsqe->idx,
319			swsqe->read_len,
320			swsqe->opcode,
321			swsqe->complete,
322			swsqe->signaled,
323			htobe64(((uint64_t *)&swsqe->cqe)[0]),
324			htobe64(((uint64_t *)&swsqe->cqe)[1]),
325			htobe64(((uint64_t *)&swsqe->cqe)[2]),
326			htobe64(((uint64_t *)&swsqe->cqe)[3]));
327		if (++cidx == qhp->wq.sq.size)
328			cidx = 0;
329	}
330
331	fprintf(stderr, "SQ WQ: \n");
332	p = (u64 *)qhp->wq.sq.queue;
333	for (i=0; i < qhp->wq.sq.size * T4_SQ_NUM_SLOTS; i++) {
334		for (j=0; j < T4_EQ_ENTRY_SIZE / 16; j++) {
335			fprintf(stderr, "%04u %016" PRIx64 " %016" PRIx64 " ",
336				i, be64toh(p[0]), be64toh(p[1]));
337			if (j == 0 && i == qhp->wq.sq.wq_pidx)
338				fprintf(stderr, " <-- pidx");
339			fprintf(stderr, "\n");
340			p += 2;
341		}
342	}
343	cidx = qhp->wq.rq.cidx;
344	pidx = qhp->wq.rq.pidx;
345	if (cidx != pidx)
346		fprintf(stderr, "RQ: \n");
347	while (cidx != pidx) {
348		swrqe = &qhp->wq.rq.sw_rq[cidx];
349		fprintf(stderr, "%04u: wr_id %016" PRIx64 "\n",
350			cidx,
351			swrqe->wr_id );
352		if (++cidx == qhp->wq.rq.size)
353			cidx = 0;
354	}
355
356	fprintf(stderr, "RQ WQ: \n");
357	p = (u64 *)qhp->wq.rq.queue;
358	for (i=0; i < qhp->wq.rq.size * T4_RQ_NUM_SLOTS; i++) {
359		for (j=0; j < T4_EQ_ENTRY_SIZE / 16; j++) {
360			fprintf(stderr, "%04u %016" PRIx64 " %016" PRIx64 " ",
361				i, be64toh(p[0]), be64toh(p[1]));
362			if (j == 0 && i == qhp->wq.rq.pidx)
363				fprintf(stderr, " <-- pidx");
364			if (j == 0 && i == qhp->wq.rq.cidx)
365				fprintf(stderr, " <-- cidx");
366			fprintf(stderr, "\n");
367			p+=2;
368		}
369	}
370}
371
372void dump_state(void)
373{
374	struct c4iw_dev *dev;
375	int i;
376
377	fprintf(stderr, "STALL DETECTED:\n");
378	TAILQ_FOREACH(dev, &devices, list) {
379		//pthread_spin_lock(&dev->lock);
380		fprintf(stderr, "Device %s\n", dev->ibv_dev.name);
381		for (i=0; i < dev->max_cq; i++) {
382			if (dev->cqid2ptr[i]) {
383				struct c4iw_cq *chp = dev->cqid2ptr[i];
384				//pthread_spin_lock(&chp->lock);
385				dump_cq(chp);
386				//pthread_spin_unlock(&chp->lock);
387			}
388		}
389		for (i=0; i < dev->max_qp; i++) {
390			if (dev->qpid2ptr[i]) {
391				struct c4iw_qp *qhp = dev->qpid2ptr[i];
392				//pthread_spin_lock(&qhp->lock);
393				dump_qp(qhp);
394				//pthread_spin_unlock(&qhp->lock);
395			}
396		}
397		//pthread_spin_unlock(&dev->lock);
398	}
399	fprintf(stderr, "DUMP COMPLETE:\n");
400	fflush(stderr);
401}
402#endif /* end of STALL_DETECTION */
403
404/*
405 * c4iw_abi_version is used to store ABI for iw_cxgb4 so the user mode library
406 * can know if the driver supports the kernel mode db ringing.
407 */
408int c4iw_abi_version = 1;
409
410static struct verbs_device *cxgb4_driver_init(const char *uverbs_sys_path,
411					      int abi_version)
412{
413	char devstr[IBV_SYSFS_PATH_MAX], ibdev[16], value[128], *cp;
414	char dev_str[IBV_SYSFS_PATH_MAX];
415	struct c4iw_dev *dev;
416	unsigned vendor, device, fw_maj, fw_min;
417	int i;
418	char devnum;
419	char ib_param[16];
420
421#ifndef __linux__
422	if (ibv_read_sysfs_file(uverbs_sys_path, "ibdev",
423				ibdev, sizeof ibdev) < 0)
424		return NULL;
425
426	devnum = atoi(&ibdev[5]);
427
428	if (ibdev[0] == 't' && ibdev[1] >= '4' && ibdev[1] <= '6' &&
429	    strstr(&ibdev[2], "nex") && devnum >= 0) {
430		snprintf(dev_str, sizeof(dev_str), "/dev/t%cnex/%d", ibdev[1],
431		    devnum);
432	} else
433		return NULL;
434
435	if (ibv_read_sysfs_file(dev_str, "\%pnpinfo", value, sizeof value) < 0)
436		return NULL;
437	else {
438		if (strstr(value, "vendor=")) {
439			strncpy(ib_param, strstr(value, "vendor=") +
440					strlen("vendor="), 6);
441			sscanf(ib_param, "%i", &vendor);
442		}
443
444		if (strstr(value, "device=")) {
445			strncpy(ib_param, strstr(value, "device=") +
446					strlen("device="), 6);
447			sscanf(ib_param, "%i", &device);
448		}
449	}
450#else
451	if (ibv_read_sysfs_file(uverbs_sys_path, "device/vendor",
452				value, sizeof value) < 0)
453		return NULL;
454	sscanf(value, "%i", &vendor);
455
456	if (ibv_read_sysfs_file(uverbs_sys_path, "device/device",
457				value, sizeof value) < 0)
458		return NULL;
459	sscanf(value, "%i", &device);
460#endif
461
462	for (i = 0; i < sizeof hca_table / sizeof hca_table[0]; ++i)
463		if (vendor == hca_table[i].vendor &&
464		    device == hca_table[i].device)
465			goto found;
466
467	return NULL;
468
469found:
470	c4iw_abi_version = abi_version;
471
472#ifndef __linux__
473	if (ibv_read_sysfs_file(dev_str, "firmware_version",
474				value, sizeof value) < 0)
475		return NULL;
476#else
477	/*
478	 * Verify that the firmware major number matches.  Major number
479	 * mismatches are fatal.  Minor number mismatches are tolerated.
480	 */
481	if (ibv_read_sysfs_file(uverbs_sys_path, "ibdev",
482				ibdev, sizeof ibdev) < 0)
483		return NULL;
484
485	memset(devstr, 0, sizeof devstr);
486	snprintf(devstr, sizeof devstr, "%s/class/infiniband/%s",
487		 ibv_get_sysfs_path(), ibdev);
488	if (ibv_read_sysfs_file(devstr, "fw_ver", value, sizeof value) < 0)
489		return NULL;
490#endif
491
492	cp = strtok(value+1, ".");
493	sscanf(cp, "%i", &fw_maj);
494	cp = strtok(NULL, ".");
495	sscanf(cp, "%i", &fw_min);
496
497	if ((signed int)fw_maj < FW_MAJ) {
498		fprintf(stderr, "libcxgb4: Fatal firmware version mismatch.  "
499			"Firmware major number is %u and libcxgb4 needs %u.\n",
500			fw_maj, FW_MAJ);
501		fflush(stderr);
502		return NULL;
503	}
504
505	DBGLOG("libcxgb4");
506
507	if ((signed int)fw_min < FW_MIN) {
508		PDBG("libcxgb4: non-fatal firmware version mismatch.  "
509			"Firmware minor number is %u and libcxgb4 needs %u.\n",
510			fw_min, FW_MIN);
511		fflush(stderr);
512	}
513
514	PDBG("%s found vendor %d device %d type %d\n",
515	     __FUNCTION__, vendor, device, CHELSIO_CHIP_VERSION(hca_table[i].device >> 8));
516
517	dev = calloc(1, sizeof *dev);
518	if (!dev) {
519		return NULL;
520	}
521
522	pthread_spin_init(&dev->lock, PTHREAD_PROCESS_PRIVATE);
523	dev->ibv_dev.ops = &c4iw_dev_ops;
524	dev->chip_version = CHELSIO_CHIP_VERSION(hca_table[i].device >> 8);
525	dev->abi_version = abi_version;
526
527	PDBG("%s device claimed\n", __FUNCTION__);
528	TAILQ_INSERT_TAIL(&devices, dev, list);
529#ifdef STALL_DETECTION
530{
531	char *c = getenv("CXGB4_STALL_TIMEOUT");
532	if (c) {
533		stall_to = strtol(c, NULL, 0);
534		if (errno || stall_to < 0)
535			stall_to = 0;
536	}
537}
538#endif
539{
540	char *c = getenv("CXGB4_MA_WR");
541	if (c) {
542		ma_wr = strtol(c, NULL, 0);
543		if (ma_wr != 1)
544			ma_wr = 0;
545	}
546}
547{
548	char *c = getenv("T5_ENABLE_WC");
549	if (c) {
550		t5_en_wc = strtol(c, NULL, 0);
551		if (t5_en_wc != 1)
552			t5_en_wc = 0;
553	}
554}
555
556	return &dev->ibv_dev;
557}
558
559static __attribute__((constructor)) void cxgb4_register_driver(void)
560{
561	c4iw_page_size = sysconf(_SC_PAGESIZE);
562	c4iw_page_shift = long_log2(c4iw_page_size);
563	c4iw_page_mask = ~(c4iw_page_size - 1);
564	verbs_register_driver("cxgb4", cxgb4_driver_init);
565}
566
567#ifdef STATS
568void __attribute__ ((destructor)) cs_fini(void);
569void  __attribute__ ((destructor)) cs_fini(void)
570{
571	syslog(LOG_NOTICE, "cxgb4 stats - sends %lu recv %lu read %lu "
572	       "write %lu arm %lu cqe %lu mr %lu qp %lu cq %lu\n",
573	       c4iw_stats.send, c4iw_stats.recv, c4iw_stats.read,
574	       c4iw_stats.write, c4iw_stats.arm, c4iw_stats.cqe,
575	       c4iw_stats.mr, c4iw_stats.qp, c4iw_stats.cq);
576}
577#endif
578