1/*
2 * Copyright (c) 2017 Mellanox Technologies, Inc.  All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses.  You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 *     Redistribution and use in source and binary forms, with or
11 *     without modification, are permitted provided that the following
12 *     conditions are met:
13 *
14 *      - Redistributions of source code must retain the above
15 *        copyright notice, this list of conditions and the following
16 *        disclaimer.
17 *
18 *      - Redistributions in binary form must reproduce the above
19 *        copyright notice, this list of conditions and the following
20 *        disclaimer in the documentation and/or other materials
21 *        provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#ifndef _MLX5DV_H_
34#define _MLX5DV_H_
35
36#include <infiniband/types.h> /* For the __be64 type */
37#include <infiniband/endian.h>
38
39#if defined(__SSE3__)
40#include <emmintrin.h>
41#include <tmmintrin.h>
42#endif /* defined(__SSE3__) */
43
44#include <infiniband/verbs.h>
45
46/* Always inline the functions */
47#ifdef __GNUC__
48#define MLX5DV_ALWAYS_INLINE inline __attribute__((always_inline))
49#else
50#define MLX5DV_ALWAYS_INLINE inline
51#endif
52
53enum {
54	MLX5_RCV_DBR	= 0,
55	MLX5_SND_DBR	= 1,
56};
57
58enum mlx5dv_context_comp_mask {
59	MLX5DV_CONTEXT_MASK_CQE_COMPRESION	= 1 << 0,
60	MLX5DV_CONTEXT_MASK_RESERVED		= 1 << 1,
61};
62
63struct mlx5dv_cqe_comp_caps {
64	uint32_t max_num;
65	uint32_t supported_format; /* enum mlx5dv_cqe_comp_res_format */
66};
67
68/*
69 * Direct verbs device-specific attributes
70 */
71struct mlx5dv_context {
72	uint8_t		version;
73	uint64_t	flags;
74	uint64_t	comp_mask;
75	struct mlx5dv_cqe_comp_caps	cqe_comp_caps;
76};
77
78enum mlx5dv_context_flags {
79	/*
80	 * This flag indicates if CQE version 0 or 1 is needed.
81	 */
82	MLX5DV_CONTEXT_FLAGS_CQE_V1	= (1 << 0),
83	MLX5DV_CONTEXT_FLAGS_MPW	= (1 << 1),
84};
85
86enum mlx5dv_cq_init_attr_mask {
87	MLX5DV_CQ_INIT_ATTR_MASK_COMPRESSED_CQE	= 1 << 0,
88	MLX5DV_CQ_INIT_ATTR_MASK_RESERVED	= 1 << 1,
89};
90
91struct mlx5dv_cq_init_attr {
92	uint64_t comp_mask; /* Use enum mlx5dv_cq_init_attr_mask */
93	uint8_t cqe_comp_res_format; /* Use enum mlx5dv_cqe_comp_res_format */
94};
95
96struct ibv_cq_ex *mlx5dv_create_cq(struct ibv_context *context,
97				   struct ibv_cq_init_attr_ex *cq_attr,
98				   struct mlx5dv_cq_init_attr *mlx5_cq_attr);
99/*
100 * Most device capabilities are exported by ibv_query_device(...),
101 * but there is HW device-specific information which is important
102 * for data-path, but isn't provided.
103 *
104 * Return 0 on success.
105 */
106int mlx5dv_query_device(struct ibv_context *ctx_in,
107			struct mlx5dv_context *attrs_out);
108
109struct mlx5dv_qp {
110	uint32_t		*dbrec;
111	struct {
112		void		*buf;
113		uint32_t	wqe_cnt;
114		uint32_t	stride;
115	} sq;
116	struct {
117		void		*buf;
118		uint32_t	wqe_cnt;
119		uint32_t	stride;
120	} rq;
121	struct {
122		void		*reg;
123		uint32_t	size;
124	} bf;
125	uint64_t		comp_mask;
126};
127
128struct mlx5dv_cq {
129	void			*buf;
130	uint32_t		*dbrec;
131	uint32_t		cqe_cnt;
132	uint32_t		cqe_size;
133	void			*uar;
134	uint32_t		cqn;
135	uint64_t		comp_mask;
136};
137
138struct mlx5dv_srq {
139	void			*buf;
140	uint32_t		*dbrec;
141	uint32_t		stride;
142	uint32_t		head;
143	uint32_t		tail;
144	uint64_t		comp_mask;
145};
146
147struct mlx5dv_rwq {
148	void		*buf;
149	uint32_t	*dbrec;
150	uint32_t	wqe_cnt;
151	uint32_t	stride;
152	uint64_t	comp_mask;
153};
154
155struct mlx5dv_obj {
156	struct {
157		struct ibv_qp		*in;
158		struct mlx5dv_qp	*out;
159	} qp;
160	struct {
161		struct ibv_cq		*in;
162		struct mlx5dv_cq	*out;
163	} cq;
164	struct {
165		struct ibv_srq		*in;
166		struct mlx5dv_srq	*out;
167	} srq;
168	struct {
169		struct ibv_wq		*in;
170		struct mlx5dv_rwq	*out;
171	} rwq;
172};
173
174enum mlx5dv_obj_type {
175	MLX5DV_OBJ_QP	= 1 << 0,
176	MLX5DV_OBJ_CQ	= 1 << 1,
177	MLX5DV_OBJ_SRQ	= 1 << 2,
178	MLX5DV_OBJ_RWQ	= 1 << 3,
179};
180
181/*
182 * This function will initialize mlx5dv_xxx structs based on supplied type.
183 * The information for initialization is taken from ibv_xx structs supplied
184 * as part of input.
185 *
186 * Request information of CQ marks its owned by DV for all consumer index
187 * related actions.
188 *
189 * The initialization type can be combination of several types together.
190 *
191 * Return: 0 in case of success.
192 */
193int mlx5dv_init_obj(struct mlx5dv_obj *obj, uint64_t obj_type);
194
195enum {
196	MLX5_OPCODE_NOP			= 0x00,
197	MLX5_OPCODE_SEND_INVAL		= 0x01,
198	MLX5_OPCODE_RDMA_WRITE		= 0x08,
199	MLX5_OPCODE_RDMA_WRITE_IMM	= 0x09,
200	MLX5_OPCODE_SEND		= 0x0a,
201	MLX5_OPCODE_SEND_IMM		= 0x0b,
202	MLX5_OPCODE_TSO			= 0x0e,
203	MLX5_OPCODE_RDMA_READ		= 0x10,
204	MLX5_OPCODE_ATOMIC_CS		= 0x11,
205	MLX5_OPCODE_ATOMIC_FA		= 0x12,
206	MLX5_OPCODE_ATOMIC_MASKED_CS	= 0x14,
207	MLX5_OPCODE_ATOMIC_MASKED_FA	= 0x15,
208	MLX5_OPCODE_FMR			= 0x19,
209	MLX5_OPCODE_LOCAL_INVAL		= 0x1b,
210	MLX5_OPCODE_CONFIG_CMD		= 0x1f,
211	MLX5_OPCODE_UMR			= 0x25,
212};
213
214/*
215 * CQE related part
216 */
217
218enum {
219	MLX5_INLINE_SCATTER_32	= 0x4,
220	MLX5_INLINE_SCATTER_64	= 0x8,
221};
222
223enum {
224	MLX5_CQE_SYNDROME_LOCAL_LENGTH_ERR		= 0x01,
225	MLX5_CQE_SYNDROME_LOCAL_QP_OP_ERR		= 0x02,
226	MLX5_CQE_SYNDROME_LOCAL_PROT_ERR		= 0x04,
227	MLX5_CQE_SYNDROME_WR_FLUSH_ERR			= 0x05,
228	MLX5_CQE_SYNDROME_MW_BIND_ERR			= 0x06,
229	MLX5_CQE_SYNDROME_BAD_RESP_ERR			= 0x10,
230	MLX5_CQE_SYNDROME_LOCAL_ACCESS_ERR		= 0x11,
231	MLX5_CQE_SYNDROME_REMOTE_INVAL_REQ_ERR		= 0x12,
232	MLX5_CQE_SYNDROME_REMOTE_ACCESS_ERR		= 0x13,
233	MLX5_CQE_SYNDROME_REMOTE_OP_ERR			= 0x14,
234	MLX5_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR	= 0x15,
235	MLX5_CQE_SYNDROME_RNR_RETRY_EXC_ERR		= 0x16,
236	MLX5_CQE_SYNDROME_REMOTE_ABORTED_ERR		= 0x22,
237};
238
239enum {
240	MLX5_CQE_L2_OK = 1 << 0,
241	MLX5_CQE_L3_OK = 1 << 1,
242	MLX5_CQE_L4_OK = 1 << 2,
243};
244
245enum {
246	MLX5_CQE_L3_HDR_TYPE_NONE = 0x0,
247	MLX5_CQE_L3_HDR_TYPE_IPV6 = 0x1,
248	MLX5_CQE_L3_HDR_TYPE_IPV4 = 0x2,
249};
250
251enum {
252	MLX5_CQE_OWNER_MASK	= 1,
253	MLX5_CQE_REQ		= 0,
254	MLX5_CQE_RESP_WR_IMM	= 1,
255	MLX5_CQE_RESP_SEND	= 2,
256	MLX5_CQE_RESP_SEND_IMM	= 3,
257	MLX5_CQE_RESP_SEND_INV	= 4,
258	MLX5_CQE_RESIZE_CQ	= 5,
259	MLX5_CQE_REQ_ERR	= 13,
260	MLX5_CQE_RESP_ERR	= 14,
261	MLX5_CQE_INVALID	= 15,
262};
263
264enum {
265	MLX5_CQ_DOORBELL			= 0x20
266};
267
268enum {
269	MLX5_CQ_DB_REQ_NOT_SOL	= 1 << 24,
270	MLX5_CQ_DB_REQ_NOT	= 0 << 24,
271};
272
273struct mlx5_err_cqe {
274	uint8_t		rsvd0[32];
275	uint32_t	srqn;
276	uint8_t		rsvd1[18];
277	uint8_t		vendor_err_synd;
278	uint8_t		syndrome;
279	uint32_t	s_wqe_opcode_qpn;
280	uint16_t	wqe_counter;
281	uint8_t		signature;
282	uint8_t		op_own;
283};
284
285struct mlx5_cqe64 {
286	uint8_t		rsvd0[17];
287	uint8_t		ml_path;
288	uint8_t		rsvd20[4];
289	uint16_t	slid;
290	uint32_t	flags_rqpn;
291	uint8_t		hds_ip_ext;
292	uint8_t		l4_hdr_type_etc;
293	uint16_t	vlan_info;
294	uint32_t	srqn_uidx;
295	uint32_t	imm_inval_pkey;
296	uint8_t		rsvd40[4];
297	uint32_t	byte_cnt;
298	__be64		timestamp;
299	uint32_t	sop_drop_qpn;
300	uint16_t	wqe_counter;
301	uint8_t		signature;
302	uint8_t		op_own;
303};
304
305enum mlx5dv_cqe_comp_res_format {
306	MLX5DV_CQE_RES_FORMAT_HASH		= 1 << 0,
307	MLX5DV_CQE_RES_FORMAT_CSUM		= 1 << 1,
308	MLX5DV_CQE_RES_FORMAT_RESERVED		= 1 << 2,
309};
310
311static MLX5DV_ALWAYS_INLINE
312uint8_t mlx5dv_get_cqe_owner(struct mlx5_cqe64 *cqe)
313{
314	return cqe->op_own & 0x1;
315}
316
317static MLX5DV_ALWAYS_INLINE
318void mlx5dv_set_cqe_owner(struct mlx5_cqe64 *cqe, uint8_t val)
319{
320	cqe->op_own = (val & 0x1) | (cqe->op_own & ~0x1);
321}
322
323/* Solicited event */
324static MLX5DV_ALWAYS_INLINE
325uint8_t mlx5dv_get_cqe_se(struct mlx5_cqe64 *cqe)
326{
327	return (cqe->op_own >> 1) & 0x1;
328}
329
330static MLX5DV_ALWAYS_INLINE
331uint8_t mlx5dv_get_cqe_format(struct mlx5_cqe64 *cqe)
332{
333	return (cqe->op_own >> 2) & 0x3;
334}
335
336static MLX5DV_ALWAYS_INLINE
337uint8_t mlx5dv_get_cqe_opcode(struct mlx5_cqe64 *cqe)
338{
339	return cqe->op_own >> 4;
340}
341
342/*
343 * WQE related part
344 */
345enum {
346	MLX5_INVALID_LKEY	= 0x100,
347};
348
349enum {
350	MLX5_EXTENDED_UD_AV	= 0x80000000,
351};
352
353enum {
354	MLX5_WQE_CTRL_CQ_UPDATE	= 2 << 2,
355	MLX5_WQE_CTRL_SOLICITED	= 1 << 1,
356	MLX5_WQE_CTRL_FENCE	= 4 << 5,
357	MLX5_WQE_CTRL_INITIATOR_SMALL_FENCE = 1 << 5,
358};
359
360enum {
361	MLX5_SEND_WQE_BB	= 64,
362	MLX5_SEND_WQE_SHIFT	= 6,
363};
364
365enum {
366	MLX5_INLINE_SEG	= 0x80000000,
367};
368
369enum {
370	MLX5_ETH_WQE_L3_CSUM = (1 << 6),
371	MLX5_ETH_WQE_L4_CSUM = (1 << 7),
372};
373
374struct mlx5_wqe_srq_next_seg {
375	uint8_t			rsvd0[2];
376	uint16_t		next_wqe_index;
377	uint8_t			signature;
378	uint8_t			rsvd1[11];
379};
380
381struct mlx5_wqe_data_seg {
382	uint32_t		byte_count;
383	uint32_t		lkey;
384	uint64_t		addr;
385};
386
387struct mlx5_wqe_ctrl_seg {
388	uint32_t	opmod_idx_opcode;
389	uint32_t	qpn_ds;
390	uint8_t		signature;
391	uint8_t		rsvd[2];
392	uint8_t		fm_ce_se;
393	uint32_t	imm;
394};
395
396struct mlx5_wqe_av {
397	union {
398		struct {
399			uint32_t	qkey;
400			uint32_t	reserved;
401		} qkey;
402		uint64_t	dc_key;
403	} key;
404	uint32_t	dqp_dct;
405	uint8_t		stat_rate_sl;
406	uint8_t		fl_mlid;
407	uint16_t	rlid;
408	uint8_t		reserved0[4];
409	uint8_t		rmac[6];
410	uint8_t		tclass;
411	uint8_t		hop_limit;
412	uint32_t	grh_gid_fl;
413	uint8_t		rgid[16];
414};
415
416struct mlx5_wqe_datagram_seg {
417	struct mlx5_wqe_av	av;
418};
419
420struct mlx5_wqe_raddr_seg {
421	uint64_t	raddr;
422	uint32_t	rkey;
423	uint32_t	reserved;
424};
425
426struct mlx5_wqe_atomic_seg {
427	uint64_t	swap_add;
428	uint64_t	compare;
429};
430
431struct mlx5_wqe_inl_data_seg {
432	uint32_t	byte_count;
433};
434
435struct mlx5_wqe_eth_seg {
436	uint32_t	rsvd0;
437	uint8_t		cs_flags;
438	uint8_t		rsvd1;
439	uint16_t	mss;
440	uint32_t	rsvd2;
441	uint16_t	inline_hdr_sz;
442	uint8_t		inline_hdr_start[2];
443	uint8_t		inline_hdr[16];
444};
445
446/*
447 * Control segment - contains some control information for the current WQE.
448 *
449 * Output:
450 *	seg	  - control segment to be filled
451 * Input:
452 *	pi	  - WQEBB number of the first block of this WQE.
453 *		    This number should wrap at 0xffff, regardless of
454 *		    size of the WQ.
455 *	opcode	  - Opcode of this WQE. Encodes the type of operation
456 *		    to be executed on the QP.
457 *	opmod	  - Opcode modifier.
458 *	qp_num	  - QP/SQ number this WQE is posted to.
459 *	fm_ce_se  - FM (fence mode), CE (completion and event mode)
460 *		    and SE (solicited event).
461 *	ds	  - WQE size in octowords (16-byte units). DS accounts for all
462 *		    the segments in the WQE as summarized in WQE construction.
463 *	signature - WQE signature.
464 *	imm	  - Immediate data/Invalidation key/UMR mkey.
465 */
466static MLX5DV_ALWAYS_INLINE
467void mlx5dv_set_ctrl_seg(struct mlx5_wqe_ctrl_seg *seg, uint16_t pi,
468			 uint8_t opcode, uint8_t opmod, uint32_t qp_num,
469			 uint8_t fm_ce_se, uint8_t ds,
470			 uint8_t signature, uint32_t imm)
471{
472	seg->opmod_idx_opcode	= htobe32(((uint32_t)opmod << 24) | ((uint32_t)pi << 8) | opcode);
473	seg->qpn_ds		= htobe32((qp_num << 8) | ds);
474	seg->fm_ce_se		= fm_ce_se;
475	seg->signature		= signature;
476	/*
477	 * The caller should prepare "imm" in advance based on WR opcode.
478	 * For IBV_WR_SEND_WITH_IMM and IBV_WR_RDMA_WRITE_WITH_IMM,
479	 * the "imm" should be assigned as is.
480	 * For the IBV_WR_SEND_WITH_INV, it should be htobe32(imm).
481	 */
482	seg->imm		= imm;
483}
484
485/* x86 optimized version of mlx5dv_set_ctrl_seg()
486 *
487 * This is useful when doing calculations on large data sets
488 * for parallel calculations.
489 *
490 * It doesn't suit for serialized algorithms.
491 */
492#if defined(__SSE3__)
493static MLX5DV_ALWAYS_INLINE
494void mlx5dv_x86_set_ctrl_seg(struct mlx5_wqe_ctrl_seg *seg, uint16_t pi,
495			     uint8_t opcode, uint8_t opmod, uint32_t qp_num,
496			     uint8_t fm_ce_se, uint8_t ds,
497			     uint8_t signature, uint32_t imm)
498{
499	__m128i val  = _mm_set_epi32(imm, qp_num, (ds << 16) | pi,
500				     (signature << 24) | (opcode << 16) | (opmod << 8) | fm_ce_se);
501	__m128i mask = _mm_set_epi8(15, 14, 13, 12,	/* immediate */
502				     0,			/* signal/fence_mode */
503				     0x80, 0x80,	/* reserved */
504				     3,			/* signature */
505				     6,			/* data size */
506				     8, 9, 10,		/* QP num */
507				     2,			/* opcode */
508				     4, 5,		/* sw_pi in BE */
509				     1			/* opmod */
510				     );
511	*(__m128i *) seg = _mm_shuffle_epi8(val, mask);
512}
513#endif /* defined(__SSE3__) */
514
515/*
516 * Datagram Segment - contains address information required in order
517 * to form a datagram message.
518 *
519 * Output:
520 *	seg		- datagram segment to be filled.
521 * Input:
522 *	key		- Q_key/access key.
523 *	dqp_dct		- Destination QP number for UD and DCT for DC.
524 *	ext		- Address vector extension.
525 *	stat_rate_sl	- Maximum static rate control, SL/ethernet priority.
526 *	fl_mlid		- Force loopback and source LID for IB.
527 *	rlid		- Remote LID
528 *	rmac		- Remote MAC
529 *	tclass		- GRH tclass/IPv6 tclass/IPv4 ToS
530 *	hop_limit	- GRH hop limit/IPv6 hop limit/IPv4 TTL
531 *	grh_gid_fi	- GRH, source GID address and IPv6 flow label.
532 *	rgid		- Remote GID/IP address.
533 */
534static MLX5DV_ALWAYS_INLINE
535void mlx5dv_set_dgram_seg(struct mlx5_wqe_datagram_seg *seg,
536			  uint64_t key, uint32_t dqp_dct,
537			  uint8_t ext, uint8_t stat_rate_sl,
538			  uint8_t fl_mlid, uint16_t rlid,
539			  uint8_t *rmac, uint8_t tclass,
540			  uint8_t hop_limit, uint32_t grh_gid_fi,
541			  uint8_t *rgid)
542{
543
544	/* Always put 64 bits, in q_key, the reserved part will be 0 */
545	seg->av.key.dc_key	= htobe64(key);
546	seg->av.dqp_dct		= htobe32(((uint32_t)ext << 31) | dqp_dct);
547	seg->av.stat_rate_sl	= stat_rate_sl;
548	seg->av.fl_mlid		= fl_mlid;
549	seg->av.rlid		= htobe16(rlid);
550	memcpy(seg->av.rmac, rmac, 6);
551	seg->av.tclass		= tclass;
552	seg->av.hop_limit	= hop_limit;
553	seg->av.grh_gid_fl	= htobe32(grh_gid_fi);
554	memcpy(seg->av.rgid, rgid, 16);
555}
556
557/*
558 * Data Segments - contain pointers and a byte count for the scatter/gather list.
559 * They can optionally contain data, which will save a memory read access for
560 * gather Work Requests.
561 */
562static MLX5DV_ALWAYS_INLINE
563void mlx5dv_set_data_seg(struct mlx5_wqe_data_seg *seg,
564			 uint32_t length, uint32_t lkey,
565			 uintptr_t address)
566{
567	seg->byte_count = htobe32(length);
568	seg->lkey       = htobe32(lkey);
569	seg->addr       = htobe64(address);
570}
571/*
572 * x86 optimized version of mlx5dv_set_data_seg()
573 *
574 * This is useful when doing calculations on large data sets
575 * for parallel calculations.
576 *
577 * It doesn't suit for serialized algorithms.
578 */
579#if defined(__SSE3__)
580static MLX5DV_ALWAYS_INLINE
581void mlx5dv_x86_set_data_seg(struct mlx5_wqe_data_seg *seg,
582			     uint32_t length, uint32_t lkey,
583			     uintptr_t address)
584{
585	__m128i val  = _mm_set_epi32((uint32_t)address, (uint32_t)(address >> 32), lkey, length);
586	__m128i mask = _mm_set_epi8(12, 13, 14, 15,	/* local address low */
587				     8, 9, 10, 11,	/* local address high */
588				     4, 5, 6, 7,	/* l_key */
589				     0, 1, 2, 3		/* byte count */
590				     );
591	*(__m128i *) seg = _mm_shuffle_epi8(val, mask);
592}
593#endif /* defined(__SSE3__) */
594
595/*
596 * Eth Segment - contains packet headers and information for stateless L2, L3, L4 offloading.
597 *
598 * Output:
599 *	 seg		 - Eth segment to be filled.
600 * Input:
601 *	cs_flags	 - l3cs/l3cs_inner/l4cs/l4cs_inner.
602 *	mss		 - Maximum segment size. For TSO WQEs, the number of bytes
603 *			   in the TCP payload to be transmitted in each packet. Must
604 *			   be 0 on non TSO WQEs.
605 *	inline_hdr_sz	 - Length of the inlined packet headers.
606 *	inline_hdr_start - Inlined packet header.
607 */
608static MLX5DV_ALWAYS_INLINE
609void mlx5dv_set_eth_seg(struct mlx5_wqe_eth_seg *seg, uint8_t cs_flags,
610			uint16_t mss, uint16_t inline_hdr_sz,
611			uint8_t *inline_hdr_start)
612{
613	seg->cs_flags		= cs_flags;
614	seg->mss		= htobe16(mss);
615	seg->inline_hdr_sz	= htobe16(inline_hdr_sz);
616	memcpy(seg->inline_hdr_start, inline_hdr_start, inline_hdr_sz);
617}
618#endif /* _MLX5DV_H_ */
619