1// SPDX-License-Identifier: GPL-2.0
2
3/*
4 * Copyright 2016-2022 HabanaLabs, Ltd.
5 * All Rights Reserved.
6 */
7
8#include "gaudiP.h"
9#include "../include/hw_ip/mmu/mmu_general.h"
10#include "../include/hw_ip/mmu/mmu_v1_1.h"
11#include "../include/gaudi/gaudi_masks.h"
12#include "../include/gaudi/gaudi_fw_if.h"
13#include "../include/gaudi/gaudi_reg_map.h"
14#include "../include/gaudi/gaudi_async_ids_map_extended.h"
15
16#include <linux/module.h>
17#include <linux/pci.h>
18#include <linux/firmware.h>
19#include <linux/hwmon.h>
20#include <linux/iommu.h>
21#include <linux/seq_file.h>
22
23/*
24 * Gaudi security scheme:
25 *
26 * 1. Host is protected by:
27 *        - Range registers
28 *        - MMU
29 *
30 * 2. DDR is protected by:
31 *        - Range registers (protect the first 512MB)
32 *
33 * 3. Configuration is protected by:
34 *        - Range registers
35 *        - Protection bits
36 *
37 * MMU is always enabled.
38 *
39 * QMAN DMA channels 0,1 (PCI DMAN):
40 *     - DMA is not secured.
41 *     - PQ and CQ are secured.
42 *     - CP is secured: The driver needs to parse CB but WREG should be allowed
43 *                      because of TDMA (tensor DMA). Hence, WREG is always not
44 *                      secured.
45 *
46 * When the driver needs to use DMA it will check that Gaudi is idle, set DMA
47 * channel 0 to be secured, execute the DMA and change it back to not secured.
48 * Currently, the driver doesn't use the DMA while there are compute jobs
49 * running.
50 *
51 * The current use cases for the driver to use the DMA are:
52 *     - Clear SRAM on context switch (happens on context switch when device is
53 *       idle)
54 *     - MMU page tables area clear (happens on init)
55 *
56 * QMAN DMA 2-7, TPC, MME, NIC:
57 * PQ is secured and is located on the Host (HBM CON TPC3 bug)
58 * CQ, CP and the engine are not secured
59 *
60 */
61
62#define GAUDI_BOOT_FIT_FILE	"habanalabs/gaudi/gaudi-boot-fit.itb"
63#define GAUDI_LINUX_FW_FILE	"habanalabs/gaudi/gaudi-fit.itb"
64#define GAUDI_TPC_FW_FILE	"habanalabs/gaudi/gaudi_tpc.bin"
65
66MODULE_FIRMWARE(GAUDI_BOOT_FIT_FILE);
67MODULE_FIRMWARE(GAUDI_LINUX_FW_FILE);
68MODULE_FIRMWARE(GAUDI_TPC_FW_FILE);
69
70#define GAUDI_DMA_POOL_BLK_SIZE		0x100 /* 256 bytes */
71
72#define GAUDI_RESET_TIMEOUT_MSEC	2000		/* 2000ms */
73#define GAUDI_RESET_WAIT_MSEC		1		/* 1ms */
74#define GAUDI_CPU_RESET_WAIT_MSEC	200		/* 200ms */
75#define GAUDI_TEST_QUEUE_WAIT_USEC	100000		/* 100ms */
76
77#define GAUDI_PLDM_RESET_WAIT_MSEC	1000		/* 1s */
78#define GAUDI_PLDM_HRESET_TIMEOUT_MSEC	20000		/* 20s */
79#define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC	1000000		/* 1s */
80#define GAUDI_PLDM_MMU_TIMEOUT_USEC	(MMU_CONFIG_TIMEOUT_USEC * 100)
81#define GAUDI_PLDM_QMAN0_TIMEOUT_USEC	(HL_DEVICE_TIMEOUT_USEC * 30)
82#define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC	(HL_DEVICE_TIMEOUT_USEC * 30)
83#define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC	4000000		/* 4s */
84#define GAUDI_MSG_TO_CPU_TIMEOUT_USEC	4000000		/* 4s */
85#define GAUDI_WAIT_FOR_BL_TIMEOUT_USEC	15000000	/* 15s */
86
87#define GAUDI_QMAN0_FENCE_VAL		0x72E91AB9
88
89#define GAUDI_MAX_STRING_LEN		20
90
91#define GAUDI_CB_POOL_CB_CNT		512
92#define GAUDI_CB_POOL_CB_SIZE		0x20000 /* 128KB */
93
94#define GAUDI_ALLOC_CPU_MEM_RETRY_CNT	3
95
96#define GAUDI_NUM_OF_TPC_INTR_CAUSE	20
97
98#define GAUDI_NUM_OF_QM_ERR_CAUSE	16
99
100#define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE	3
101
102#define GAUDI_ARB_WDT_TIMEOUT		0xEE6b27FF /* 8 seconds */
103
104#define HBM_SCRUBBING_TIMEOUT_US	1000000 /* 1s */
105
106#define BIN_REG_STRING_SIZE	sizeof("0b10101010101010101010101010101010")
107
108#define MONITOR_SOB_STRING_SIZE		256
109
110static u32 gaudi_stream_master[GAUDI_STREAM_MASTER_ARR_SIZE] = {
111	GAUDI_QUEUE_ID_DMA_0_0,
112	GAUDI_QUEUE_ID_DMA_0_1,
113	GAUDI_QUEUE_ID_DMA_0_2,
114	GAUDI_QUEUE_ID_DMA_0_3,
115	GAUDI_QUEUE_ID_DMA_1_0,
116	GAUDI_QUEUE_ID_DMA_1_1,
117	GAUDI_QUEUE_ID_DMA_1_2,
118	GAUDI_QUEUE_ID_DMA_1_3
119};
120
121static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = {
122	[GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0,
123	[GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1,
124	[GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2,
125	[GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3,
126	[GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4,
127	[GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_5,
128	[GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_6,
129	[GAUDI_HBM_DMA_6] = GAUDI_ENGINE_ID_DMA_7
130};
131
132static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = {
133	[0] = GAUDI_QUEUE_ID_DMA_0_0,
134	[1] = GAUDI_QUEUE_ID_DMA_0_1,
135	[2] = GAUDI_QUEUE_ID_DMA_0_2,
136	[3] = GAUDI_QUEUE_ID_DMA_0_3,
137	[4] = GAUDI_QUEUE_ID_DMA_1_0,
138	[5] = GAUDI_QUEUE_ID_DMA_1_1,
139	[6] = GAUDI_QUEUE_ID_DMA_1_2,
140	[7] = GAUDI_QUEUE_ID_DMA_1_3,
141};
142
143static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = {
144	[PACKET_WREG_32]	= sizeof(struct packet_wreg32),
145	[PACKET_WREG_BULK]	= sizeof(struct packet_wreg_bulk),
146	[PACKET_MSG_LONG]	= sizeof(struct packet_msg_long),
147	[PACKET_MSG_SHORT]	= sizeof(struct packet_msg_short),
148	[PACKET_CP_DMA]		= sizeof(struct packet_cp_dma),
149	[PACKET_REPEAT]		= sizeof(struct packet_repeat),
150	[PACKET_MSG_PROT]	= sizeof(struct packet_msg_prot),
151	[PACKET_FENCE]		= sizeof(struct packet_fence),
152	[PACKET_LIN_DMA]	= sizeof(struct packet_lin_dma),
153	[PACKET_NOP]		= sizeof(struct packet_nop),
154	[PACKET_STOP]		= sizeof(struct packet_stop),
155	[PACKET_ARB_POINT]	= sizeof(struct packet_arb_point),
156	[PACKET_WAIT]		= sizeof(struct packet_wait),
157	[PACKET_LOAD_AND_EXE]	= sizeof(struct packet_load_and_exe)
158};
159
160static inline bool validate_packet_id(enum packet_id id)
161{
162	switch (id) {
163	case PACKET_WREG_32:
164	case PACKET_WREG_BULK:
165	case PACKET_MSG_LONG:
166	case PACKET_MSG_SHORT:
167	case PACKET_CP_DMA:
168	case PACKET_REPEAT:
169	case PACKET_MSG_PROT:
170	case PACKET_FENCE:
171	case PACKET_LIN_DMA:
172	case PACKET_NOP:
173	case PACKET_STOP:
174	case PACKET_ARB_POINT:
175	case PACKET_WAIT:
176	case PACKET_LOAD_AND_EXE:
177		return true;
178	default:
179		return false;
180	}
181}
182
183static const char * const
184gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = {
185	"tpc_address_exceed_slm",
186	"tpc_div_by_0",
187	"tpc_spu_mac_overflow",
188	"tpc_spu_addsub_overflow",
189	"tpc_spu_abs_overflow",
190	"tpc_spu_fp_dst_nan_inf",
191	"tpc_spu_fp_dst_denorm",
192	"tpc_vpu_mac_overflow",
193	"tpc_vpu_addsub_overflow",
194	"tpc_vpu_abs_overflow",
195	"tpc_vpu_fp_dst_nan_inf",
196	"tpc_vpu_fp_dst_denorm",
197	"tpc_assertions",
198	"tpc_illegal_instruction",
199	"tpc_pc_wrap_around",
200	"tpc_qm_sw_err",
201	"tpc_hbw_rresp_err",
202	"tpc_hbw_bresp_err",
203	"tpc_lbw_rresp_err",
204	"tpc_lbw_bresp_err"
205};
206
207static const char * const
208gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = {
209	"PQ AXI HBW error",
210	"CQ AXI HBW error",
211	"CP AXI HBW error",
212	"CP error due to undefined OPCODE",
213	"CP encountered STOP OPCODE",
214	"CP AXI LBW error",
215	"CP WRREG32 or WRBULK returned error",
216	"N/A",
217	"FENCE 0 inc over max value and clipped",
218	"FENCE 1 inc over max value and clipped",
219	"FENCE 2 inc over max value and clipped",
220	"FENCE 3 inc over max value and clipped",
221	"FENCE 0 dec under min value and clipped",
222	"FENCE 1 dec under min value and clipped",
223	"FENCE 2 dec under min value and clipped",
224	"FENCE 3 dec under min value and clipped"
225};
226
227static const char * const
228gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = {
229	"Choice push while full error",
230	"Choice Q watchdog error",
231	"MSG AXI LBW returned with error"
232};
233
234static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = {
235	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */
236	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */
237	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_2 */
238	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_3 */
239	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_0 */
240	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_1 */
241	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_2 */
242	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_3 */
243	QUEUE_TYPE_CPU, /* GAUDI_QUEUE_ID_CPU_PQ */
244	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_0 */
245	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_1 */
246	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_2 */
247	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_3 */
248	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_0 */
249	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_1 */
250	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_2 */
251	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_3 */
252	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_0 */
253	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_1 */
254	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_2 */
255	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_3 */
256	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_0 */
257	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_1 */
258	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_2 */
259	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_3 */
260	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_0 */
261	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_1 */
262	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_2 */
263	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_3 */
264	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_0 */
265	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_1 */
266	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_2 */
267	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_3 */
268	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_0 */
269	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_1 */
270	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_2 */
271	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_3 */
272	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_0 */
273	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_1 */
274	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_2 */
275	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_3 */
276	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_0 */
277	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_1 */
278	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_2 */
279	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_3 */
280	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_0 */
281	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_1 */
282	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_2 */
283	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_3 */
284	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_0 */
285	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_1 */
286	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_2 */
287	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_3 */
288	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_0 */
289	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_1 */
290	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_2 */
291	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_3 */
292	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_0 */
293	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_1 */
294	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_2 */
295	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_3 */
296	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_0 */
297	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_1 */
298	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_2 */
299	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_3 */
300	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_0 */
301	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_1 */
302	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_2 */
303	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_3 */
304	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_0 */
305	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_1 */
306	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_2 */
307	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_3 */
308	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_0 */
309	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_1 */
310	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_2 */
311	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_3 */
312	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_0 */
313	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_1 */
314	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_2 */
315	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_3 */
316	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_0 */
317	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_1 */
318	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_2 */
319	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_3 */
320	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_0 */
321	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_1 */
322	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_2 */
323	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_3 */
324	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_0 */
325	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_1 */
326	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_2 */
327	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_3 */
328	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_0 */
329	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_1 */
330	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_2 */
331	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_3 */
332	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_0 */
333	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_1 */
334	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_2 */
335	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_3 */
336	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_0 */
337	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_1 */
338	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_2 */
339	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_3 */
340	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_0 */
341	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_1 */
342	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_2 */
343	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_3 */
344	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_0 */
345	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_1 */
346	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_2 */
347	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_3 */
348};
349
350static struct hl_hw_obj_name_entry gaudi_so_id_to_str[] = {
351	{ .id = 0,  .name = "SYNC_OBJ_DMA_DOWN_FEEDBACK" },
352	{ .id = 1,  .name = "SYNC_OBJ_DMA_UP_FEEDBACK" },
353	{ .id = 2,  .name = "SYNC_OBJ_DMA_STATIC_DRAM_SRAM_FEEDBACK" },
354	{ .id = 3,  .name = "SYNC_OBJ_DMA_SRAM_DRAM_FEEDBACK" },
355	{ .id = 4,  .name = "SYNC_OBJ_FIRST_COMPUTE_FINISH" },
356	{ .id = 5,  .name = "SYNC_OBJ_HOST_DRAM_DONE" },
357	{ .id = 6,  .name = "SYNC_OBJ_DBG_CTR_DEPRECATED" },
358	{ .id = 7,  .name = "SYNC_OBJ_DMA_ACTIVATIONS_DRAM_SRAM_FEEDBACK" },
359	{ .id = 8,  .name = "SYNC_OBJ_ENGINE_SEM_MME_0" },
360	{ .id = 9,  .name = "SYNC_OBJ_ENGINE_SEM_MME_1" },
361	{ .id = 10, .name = "SYNC_OBJ_ENGINE_SEM_TPC_0" },
362	{ .id = 11, .name = "SYNC_OBJ_ENGINE_SEM_TPC_1" },
363	{ .id = 12, .name = "SYNC_OBJ_ENGINE_SEM_TPC_2" },
364	{ .id = 13, .name = "SYNC_OBJ_ENGINE_SEM_TPC_3" },
365	{ .id = 14, .name = "SYNC_OBJ_ENGINE_SEM_TPC_4" },
366	{ .id = 15, .name = "SYNC_OBJ_ENGINE_SEM_TPC_5" },
367	{ .id = 16, .name = "SYNC_OBJ_ENGINE_SEM_TPC_6" },
368	{ .id = 17, .name = "SYNC_OBJ_ENGINE_SEM_TPC_7" },
369	{ .id = 18, .name = "SYNC_OBJ_ENGINE_SEM_DMA_1" },
370	{ .id = 19, .name = "SYNC_OBJ_ENGINE_SEM_DMA_2" },
371	{ .id = 20, .name = "SYNC_OBJ_ENGINE_SEM_DMA_3" },
372	{ .id = 21, .name = "SYNC_OBJ_ENGINE_SEM_DMA_4" },
373	{ .id = 22, .name = "SYNC_OBJ_ENGINE_SEM_DMA_5" },
374	{ .id = 23, .name = "SYNC_OBJ_ENGINE_SEM_DMA_6" },
375	{ .id = 24, .name = "SYNC_OBJ_ENGINE_SEM_DMA_7" },
376	{ .id = 25, .name = "SYNC_OBJ_DBG_CTR_0" },
377	{ .id = 26, .name = "SYNC_OBJ_DBG_CTR_1" },
378};
379
380static struct hl_hw_obj_name_entry gaudi_monitor_id_to_str[] = {
381	{ .id = 200, .name = "MON_OBJ_DMA_DOWN_FEEDBACK_RESET" },
382	{ .id = 201, .name = "MON_OBJ_DMA_UP_FEEDBACK_RESET" },
383	{ .id = 203, .name = "MON_OBJ_DRAM_TO_SRAM_QUEUE_FENCE" },
384	{ .id = 204, .name = "MON_OBJ_TPC_0_CLK_GATE" },
385	{ .id = 205, .name = "MON_OBJ_TPC_1_CLK_GATE" },
386	{ .id = 206, .name = "MON_OBJ_TPC_2_CLK_GATE" },
387	{ .id = 207, .name = "MON_OBJ_TPC_3_CLK_GATE" },
388	{ .id = 208, .name = "MON_OBJ_TPC_4_CLK_GATE" },
389	{ .id = 209, .name = "MON_OBJ_TPC_5_CLK_GATE" },
390	{ .id = 210, .name = "MON_OBJ_TPC_6_CLK_GATE" },
391	{ .id = 211, .name = "MON_OBJ_TPC_7_CLK_GATE" },
392};
393
394static s64 gaudi_state_dump_specs_props[] = {
395	[SP_SYNC_OBJ_BASE_ADDR] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0,
396	[SP_NEXT_SYNC_OBJ_ADDR] = NEXT_SYNC_OBJ_ADDR_INTERVAL,
397	[SP_SYNC_OBJ_AMOUNT] = NUM_OF_SOB_IN_BLOCK,
398	[SP_MON_OBJ_WR_ADDR_LOW] =
399		mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0,
400	[SP_MON_OBJ_WR_ADDR_HIGH] =
401		mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0,
402	[SP_MON_OBJ_WR_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_DATA_0,
403	[SP_MON_OBJ_ARM_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_ARM_0,
404	[SP_MON_OBJ_STATUS] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0,
405	[SP_MONITORS_AMOUNT] = NUM_OF_MONITORS_IN_BLOCK,
406	[SP_TPC0_CMDQ] = mmTPC0_QM_GLBL_CFG0,
407	[SP_TPC0_CFG_SO] = mmTPC0_CFG_QM_SYNC_OBJECT_ADDR,
408	[SP_NEXT_TPC] = mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0,
409	[SP_MME_CMDQ] = mmMME0_QM_GLBL_CFG0,
410	[SP_MME_CFG_SO] = mmMME0_CTRL_ARCH_DESC_SYNC_OBJECT_ADDR_LOW_LOCAL,
411	[SP_NEXT_MME] = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0,
412	[SP_DMA_CMDQ] = mmDMA0_QM_GLBL_CFG0,
413	[SP_DMA_CFG_SO] = mmDMA0_CORE_WR_COMP_ADDR_LO,
414	[SP_DMA_QUEUES_OFFSET] = mmDMA1_QM_GLBL_CFG0 - mmDMA0_QM_GLBL_CFG0,
415	[SP_NUM_OF_MME_ENGINES] = NUM_OF_MME_ENGINES,
416	[SP_SUB_MME_ENG_NUM] = NUM_OF_MME_SUB_ENGINES,
417	[SP_NUM_OF_DMA_ENGINES] = NUM_OF_DMA_ENGINES,
418	[SP_NUM_OF_TPC_ENGINES] = NUM_OF_TPC_ENGINES,
419	[SP_ENGINE_NUM_OF_QUEUES] = NUM_OF_QUEUES,
420	[SP_ENGINE_NUM_OF_STREAMS] = NUM_OF_STREAMS,
421	[SP_ENGINE_NUM_OF_FENCES] = NUM_OF_FENCES,
422	[SP_FENCE0_CNT_OFFSET] =
423		mmDMA0_QM_CP_FENCE0_CNT_0 - mmDMA0_QM_GLBL_CFG0,
424	[SP_FENCE0_RDATA_OFFSET] =
425		mmDMA0_QM_CP_FENCE0_RDATA_0 - mmDMA0_QM_GLBL_CFG0,
426	[SP_CP_STS_OFFSET] = mmDMA0_QM_CP_STS_0 - mmDMA0_QM_GLBL_CFG0,
427	[SP_NUM_CORES] = 1,
428};
429
430static const int gaudi_queue_id_to_engine_id[] = {
431	[GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3] = GAUDI_ENGINE_ID_DMA_0,
432	[GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3] = GAUDI_ENGINE_ID_DMA_1,
433	[GAUDI_QUEUE_ID_CPU_PQ] = GAUDI_ENGINE_ID_SIZE,
434	[GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3] = GAUDI_ENGINE_ID_DMA_2,
435	[GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3] = GAUDI_ENGINE_ID_DMA_3,
436	[GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3] = GAUDI_ENGINE_ID_DMA_4,
437	[GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3] = GAUDI_ENGINE_ID_DMA_5,
438	[GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3] = GAUDI_ENGINE_ID_DMA_6,
439	[GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3] = GAUDI_ENGINE_ID_DMA_7,
440	[GAUDI_QUEUE_ID_MME_0_0...GAUDI_QUEUE_ID_MME_0_3] = GAUDI_ENGINE_ID_MME_0,
441	[GAUDI_QUEUE_ID_MME_1_0...GAUDI_QUEUE_ID_MME_1_3] = GAUDI_ENGINE_ID_MME_2,
442	[GAUDI_QUEUE_ID_TPC_0_0...GAUDI_QUEUE_ID_TPC_0_3] = GAUDI_ENGINE_ID_TPC_0,
443	[GAUDI_QUEUE_ID_TPC_1_0...GAUDI_QUEUE_ID_TPC_1_3] = GAUDI_ENGINE_ID_TPC_1,
444	[GAUDI_QUEUE_ID_TPC_2_0...GAUDI_QUEUE_ID_TPC_2_3] = GAUDI_ENGINE_ID_TPC_2,
445	[GAUDI_QUEUE_ID_TPC_3_0...GAUDI_QUEUE_ID_TPC_3_3] = GAUDI_ENGINE_ID_TPC_3,
446	[GAUDI_QUEUE_ID_TPC_4_0...GAUDI_QUEUE_ID_TPC_4_3] = GAUDI_ENGINE_ID_TPC_4,
447	[GAUDI_QUEUE_ID_TPC_5_0...GAUDI_QUEUE_ID_TPC_5_3] = GAUDI_ENGINE_ID_TPC_5,
448	[GAUDI_QUEUE_ID_TPC_6_0...GAUDI_QUEUE_ID_TPC_6_3] = GAUDI_ENGINE_ID_TPC_6,
449	[GAUDI_QUEUE_ID_TPC_7_0...GAUDI_QUEUE_ID_TPC_7_3] = GAUDI_ENGINE_ID_TPC_7,
450	[GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3] = GAUDI_ENGINE_ID_NIC_0,
451	[GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3] = GAUDI_ENGINE_ID_NIC_1,
452	[GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3] = GAUDI_ENGINE_ID_NIC_2,
453	[GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3] = GAUDI_ENGINE_ID_NIC_3,
454	[GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3] = GAUDI_ENGINE_ID_NIC_4,
455	[GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3] = GAUDI_ENGINE_ID_NIC_5,
456	[GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3] = GAUDI_ENGINE_ID_NIC_6,
457	[GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3] = GAUDI_ENGINE_ID_NIC_7,
458	[GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3] = GAUDI_ENGINE_ID_NIC_8,
459	[GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3] = GAUDI_ENGINE_ID_NIC_9,
460};
461
462/* The order here is opposite to the order of the indexing in the h/w.
463 * i.e. SYNC_MGR_W_S is actually 0, SYNC_MGR_E_S is 1, etc.
464 */
465static const char * const gaudi_sync_manager_names[] = {
466	"SYNC_MGR_E_N",
467	"SYNC_MGR_W_N",
468	"SYNC_MGR_E_S",
469	"SYNC_MGR_W_S",
470	NULL
471};
472
473struct ecc_info_extract_params {
474	u64 block_address;
475	u32 num_memories;
476	bool derr;
477};
478
479static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
480								u64 phys_addr);
481static int gaudi_send_job_on_qman0(struct hl_device *hdev,
482					struct hl_cs_job *job);
483static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
484					u32 size, u64 val);
485static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
486					u32 num_regs, u32 val);
487static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
488				u32 tpc_id);
489static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev);
490static int gaudi_cpucp_info_get(struct hl_device *hdev);
491static void gaudi_disable_clock_gating(struct hl_device *hdev);
492static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid);
493static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
494				u32 size, bool eb);
495static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
496				struct hl_gen_wait_properties *prop);
497static inline enum hl_collective_mode
498get_collective_mode(struct hl_device *hdev, u32 queue_id)
499{
500	if (gaudi_queue_type[queue_id] == QUEUE_TYPE_EXT)
501		return HL_COLLECTIVE_MASTER;
502
503	if (queue_id >= GAUDI_QUEUE_ID_DMA_5_0 &&
504			queue_id <= GAUDI_QUEUE_ID_DMA_5_3)
505		return HL_COLLECTIVE_SLAVE;
506
507	if (queue_id >= GAUDI_QUEUE_ID_TPC_7_0 &&
508			queue_id <= GAUDI_QUEUE_ID_TPC_7_3)
509		return HL_COLLECTIVE_SLAVE;
510
511	if (queue_id >= GAUDI_QUEUE_ID_NIC_0_0 &&
512			queue_id <= GAUDI_QUEUE_ID_NIC_9_3)
513		return HL_COLLECTIVE_SLAVE;
514
515	return HL_COLLECTIVE_NOT_SUPPORTED;
516}
517
518static inline void set_default_power_values(struct hl_device *hdev)
519{
520	struct asic_fixed_properties *prop = &hdev->asic_prop;
521
522	if (hdev->card_type == cpucp_card_type_pmc) {
523		prop->max_power_default = MAX_POWER_DEFAULT_PMC;
524
525		if (prop->fw_security_enabled)
526			prop->dc_power_default = DC_POWER_DEFAULT_PMC_SEC;
527		else
528			prop->dc_power_default = DC_POWER_DEFAULT_PMC;
529	} else {
530		prop->max_power_default = MAX_POWER_DEFAULT_PCI;
531		prop->dc_power_default = DC_POWER_DEFAULT_PCI;
532	}
533}
534
535static int gaudi_set_fixed_properties(struct hl_device *hdev)
536{
537	struct asic_fixed_properties *prop = &hdev->asic_prop;
538	u32 num_sync_stream_queues = 0;
539	int i;
540
541	prop->max_queues = GAUDI_QUEUE_ID_SIZE;
542	prop->hw_queues_props = kcalloc(prop->max_queues,
543			sizeof(struct hw_queue_properties),
544			GFP_KERNEL);
545
546	if (!prop->hw_queues_props)
547		return -ENOMEM;
548
549	for (i = 0 ; i < prop->max_queues ; i++) {
550		if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) {
551			prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
552			prop->hw_queues_props[i].driver_only = 0;
553			prop->hw_queues_props[i].supports_sync_stream = 1;
554			prop->hw_queues_props[i].cb_alloc_flags =
555				CB_ALLOC_KERNEL;
556			num_sync_stream_queues++;
557		} else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) {
558			prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
559			prop->hw_queues_props[i].driver_only = 1;
560			prop->hw_queues_props[i].supports_sync_stream = 0;
561			prop->hw_queues_props[i].cb_alloc_flags =
562				CB_ALLOC_KERNEL;
563		} else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) {
564			prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
565			prop->hw_queues_props[i].driver_only = 0;
566			prop->hw_queues_props[i].supports_sync_stream = 0;
567			prop->hw_queues_props[i].cb_alloc_flags =
568				CB_ALLOC_USER;
569
570		}
571		prop->hw_queues_props[i].collective_mode =
572						get_collective_mode(hdev, i);
573	}
574
575	prop->cache_line_size = DEVICE_CACHE_LINE_SIZE;
576	prop->cfg_base_address = CFG_BASE;
577	prop->device_dma_offset_for_host_access = HOST_PHYS_BASE;
578	prop->host_base_address = HOST_PHYS_BASE;
579	prop->host_end_address = prop->host_base_address + HOST_PHYS_SIZE;
580	prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
581	prop->completion_mode = HL_COMPLETION_MODE_JOB;
582	prop->collective_first_sob = 0;
583	prop->collective_first_mon = 0;
584
585	/* 2 SOBs per internal queue stream are reserved for collective */
586	prop->sync_stream_first_sob =
587			ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR)
588			* QMAN_STREAMS * HL_RSVD_SOBS;
589
590	/* 1 monitor per internal queue stream are reserved for collective
591	 * 2 monitors per external queue stream are reserved for collective
592	 */
593	prop->sync_stream_first_mon =
594			(NUMBER_OF_COLLECTIVE_QUEUES * QMAN_STREAMS) +
595			(NUMBER_OF_EXT_HW_QUEUES * 2);
596
597	prop->dram_base_address = DRAM_PHYS_BASE;
598	prop->dram_size = GAUDI_HBM_SIZE_32GB;
599	prop->dram_end_address = prop->dram_base_address + prop->dram_size;
600	prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
601
602	prop->sram_base_address = SRAM_BASE_ADDR;
603	prop->sram_size = SRAM_SIZE;
604	prop->sram_end_address = prop->sram_base_address + prop->sram_size;
605	prop->sram_user_base_address =
606			prop->sram_base_address + SRAM_USER_BASE_OFFSET;
607
608	prop->mmu_cache_mng_addr = MMU_CACHE_MNG_ADDR;
609	prop->mmu_cache_mng_size = MMU_CACHE_MNG_SIZE;
610
611	prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
612	if (hdev->pldm)
613		prop->mmu_pgt_size = 0x800000; /* 8MB */
614	else
615		prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
616	prop->mmu_pte_size = HL_PTE_SIZE;
617	prop->dram_page_size = PAGE_SIZE_2MB;
618	prop->device_mem_alloc_default_page_size = prop->dram_page_size;
619	prop->dram_supports_virtual_memory = false;
620
621	prop->pmmu.hop_shifts[MMU_HOP0] = MMU_V1_1_HOP0_SHIFT;
622	prop->pmmu.hop_shifts[MMU_HOP1] = MMU_V1_1_HOP1_SHIFT;
623	prop->pmmu.hop_shifts[MMU_HOP2] = MMU_V1_1_HOP2_SHIFT;
624	prop->pmmu.hop_shifts[MMU_HOP3] = MMU_V1_1_HOP3_SHIFT;
625	prop->pmmu.hop_shifts[MMU_HOP4] = MMU_V1_1_HOP4_SHIFT;
626	prop->pmmu.hop_masks[MMU_HOP0] = MMU_V1_1_HOP0_MASK;
627	prop->pmmu.hop_masks[MMU_HOP1] = MMU_V1_1_HOP1_MASK;
628	prop->pmmu.hop_masks[MMU_HOP2] = MMU_V1_1_HOP2_MASK;
629	prop->pmmu.hop_masks[MMU_HOP3] = MMU_V1_1_HOP3_MASK;
630	prop->pmmu.hop_masks[MMU_HOP4] = MMU_V1_1_HOP4_MASK;
631	prop->pmmu.start_addr = VA_HOST_SPACE_START;
632	prop->pmmu.end_addr =
633			(VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1;
634	prop->pmmu.page_size = PAGE_SIZE_4KB;
635	prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
636	prop->pmmu.last_mask = LAST_MASK;
637	/* TODO: will be duplicated until implementing per-MMU props */
638	prop->pmmu.hop_table_size = HOP_TABLE_SIZE_512_PTE;
639	prop->pmmu.hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE;
640
641	/* PMMU and HPMMU are the same except of page size */
642	memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
643	prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
644
645	/* shifts and masks are the same in PMMU and DMMU */
646	memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu));
647	prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2);
648	prop->dmmu.end_addr = VA_HOST_SPACE_END;
649	prop->dmmu.page_size = PAGE_SIZE_2MB;
650	prop->dmmu.pgt_size = prop->mmu_pgt_size;
651
652	prop->cfg_size = CFG_SIZE;
653	prop->max_asid = MAX_ASID;
654	prop->num_of_events = GAUDI_EVENT_SIZE;
655	prop->max_num_of_engines = GAUDI_ENGINE_ID_SIZE;
656	prop->tpc_enabled_mask = TPC_ENABLED_MASK;
657
658	set_default_power_values(hdev);
659
660	prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT;
661	prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE;
662
663	prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
664	prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
665
666	strscpy_pad(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
667					CARD_NAME_MAX_LEN);
668
669	prop->max_pending_cs = GAUDI_MAX_PENDING_CS;
670
671	prop->first_available_user_sob[HL_GAUDI_WS_DCORE] =
672			prop->sync_stream_first_sob +
673			(num_sync_stream_queues * HL_RSVD_SOBS);
674	prop->first_available_user_mon[HL_GAUDI_WS_DCORE] =
675			prop->sync_stream_first_mon +
676			(num_sync_stream_queues * HL_RSVD_MONS);
677
678	prop->first_available_user_interrupt = USHRT_MAX;
679	prop->tpc_interrupt_id = USHRT_MAX;
680
681	/* single msi */
682	prop->eq_interrupt_id = 0;
683
684	for (i = 0 ; i < HL_MAX_DCORES ; i++)
685		prop->first_available_cq[i] = USHRT_MAX;
686
687	prop->fw_cpu_boot_dev_sts0_valid = false;
688	prop->fw_cpu_boot_dev_sts1_valid = false;
689	prop->hard_reset_done_by_fw = false;
690	prop->gic_interrupts_enable = true;
691
692	prop->server_type = HL_SERVER_TYPE_UNKNOWN;
693
694	prop->clk_pll_index = HL_GAUDI_MME_PLL;
695	prop->max_freq_value = GAUDI_MAX_CLK_FREQ;
696
697	prop->use_get_power_for_reset_history = true;
698
699	prop->configurable_stop_on_err = true;
700
701	prop->set_max_power_on_device_init = true;
702
703	prop->dma_mask = 48;
704
705	prop->hbw_flush_reg = mmPCIE_WRAP_RR_ELBI_RD_SEC_REG_CTRL;
706
707	return 0;
708}
709
710static int gaudi_pci_bars_map(struct hl_device *hdev)
711{
712	static const char * const name[] = {"SRAM", "CFG", "HBM"};
713	bool is_wc[3] = {false, false, true};
714	int rc;
715
716	rc = hl_pci_bars_map(hdev, name, is_wc);
717	if (rc)
718		return rc;
719
720	hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] +
721			(CFG_BASE - SPI_FLASH_BASE_ADDR);
722
723	return 0;
724}
725
726static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
727{
728	struct gaudi_device *gaudi = hdev->asic_specific;
729	struct hl_inbound_pci_region pci_region;
730	u64 old_addr = addr;
731	int rc;
732
733	if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr))
734		return old_addr;
735
736	if (hdev->asic_prop.iatu_done_by_fw)
737		return U64_MAX;
738
739	/* Inbound Region 2 - Bar 4 - Point to HBM */
740	pci_region.mode = PCI_BAR_MATCH_MODE;
741	pci_region.bar = HBM_BAR_ID;
742	pci_region.addr = addr;
743	rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);
744	if (rc)
745		return U64_MAX;
746
747	if (gaudi) {
748		old_addr = gaudi->hbm_bar_cur_addr;
749		gaudi->hbm_bar_cur_addr = addr;
750	}
751
752	return old_addr;
753}
754
755static int gaudi_init_iatu(struct hl_device *hdev)
756{
757	struct hl_inbound_pci_region inbound_region;
758	struct hl_outbound_pci_region outbound_region;
759	int rc;
760
761	if (hdev->asic_prop.iatu_done_by_fw)
762		return 0;
763
764	/* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */
765	inbound_region.mode = PCI_BAR_MATCH_MODE;
766	inbound_region.bar = SRAM_BAR_ID;
767	inbound_region.addr = SRAM_BASE_ADDR;
768	rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
769	if (rc)
770		goto done;
771
772	/* Inbound Region 1 - Bar 2 - Point to SPI FLASH */
773	inbound_region.mode = PCI_BAR_MATCH_MODE;
774	inbound_region.bar = CFG_BAR_ID;
775	inbound_region.addr = SPI_FLASH_BASE_ADDR;
776	rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
777	if (rc)
778		goto done;
779
780	/* Inbound Region 2 - Bar 4 - Point to HBM */
781	inbound_region.mode = PCI_BAR_MATCH_MODE;
782	inbound_region.bar = HBM_BAR_ID;
783	inbound_region.addr = DRAM_PHYS_BASE;
784	rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);
785	if (rc)
786		goto done;
787
788	/* Outbound Region 0 - Point to Host */
789	outbound_region.addr = HOST_PHYS_BASE;
790	outbound_region.size = HOST_PHYS_SIZE;
791	rc = hl_pci_set_outbound_region(hdev, &outbound_region);
792
793done:
794	return rc;
795}
796
797static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev)
798{
799	return RREG32(mmHW_STATE);
800}
801
802static int gaudi_early_init(struct hl_device *hdev)
803{
804	struct asic_fixed_properties *prop = &hdev->asic_prop;
805	struct pci_dev *pdev = hdev->pdev;
806	resource_size_t pci_bar_size;
807	u32 fw_boot_status;
808	int rc;
809
810	rc = gaudi_set_fixed_properties(hdev);
811	if (rc) {
812		dev_err(hdev->dev, "Failed setting fixed properties\n");
813		return rc;
814	}
815
816	/* Check BAR sizes */
817	pci_bar_size = pci_resource_len(pdev, SRAM_BAR_ID);
818
819	if (pci_bar_size != SRAM_BAR_SIZE) {
820		dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
821			SRAM_BAR_ID, &pci_bar_size, SRAM_BAR_SIZE);
822		rc = -ENODEV;
823		goto free_queue_props;
824	}
825
826	pci_bar_size = pci_resource_len(pdev, CFG_BAR_ID);
827
828	if (pci_bar_size != CFG_BAR_SIZE) {
829		dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
830			CFG_BAR_ID, &pci_bar_size, CFG_BAR_SIZE);
831		rc = -ENODEV;
832		goto free_queue_props;
833	}
834
835	prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);
836	hdev->dram_pci_bar_start = pci_resource_start(pdev, HBM_BAR_ID);
837
838	/* If FW security is enabled at this point it means no access to ELBI */
839	if (hdev->asic_prop.fw_security_enabled) {
840		hdev->asic_prop.iatu_done_by_fw = true;
841
842		/*
843		 * GIC-security-bit can ONLY be set by CPUCP, so in this stage
844		 * decision can only be taken based on PCI ID security.
845		 */
846		hdev->asic_prop.gic_interrupts_enable = false;
847		goto pci_init;
848	}
849
850	rc = hl_pci_elbi_read(hdev, CFG_BASE + mmCPU_BOOT_DEV_STS0,
851				&fw_boot_status);
852	if (rc)
853		goto free_queue_props;
854
855	/* Check whether FW is configuring iATU */
856	if ((fw_boot_status & CPU_BOOT_DEV_STS0_ENABLED) &&
857			(fw_boot_status & CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN))
858		hdev->asic_prop.iatu_done_by_fw = true;
859
860pci_init:
861	rc = hl_pci_init(hdev);
862	if (rc)
863		goto free_queue_props;
864
865	/* Before continuing in the initialization, we need to read the preboot
866	 * version to determine whether we run with a security-enabled firmware
867	 */
868	rc = hl_fw_read_preboot_status(hdev);
869	if (rc) {
870		if (hdev->reset_on_preboot_fail)
871			/* we are already on failure flow, so don't check if hw_fini fails. */
872			hdev->asic_funcs->hw_fini(hdev, true, false);
873		goto pci_fini;
874	}
875
876	if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
877		dev_dbg(hdev->dev, "H/W state is dirty, must reset before initializing\n");
878		rc = hdev->asic_funcs->hw_fini(hdev, true, false);
879		if (rc) {
880			dev_err(hdev->dev, "failed to reset HW in dirty state (%d)\n", rc);
881			goto pci_fini;
882		}
883	}
884
885	return 0;
886
887pci_fini:
888	hl_pci_fini(hdev);
889free_queue_props:
890	kfree(hdev->asic_prop.hw_queues_props);
891	return rc;
892}
893
894static int gaudi_early_fini(struct hl_device *hdev)
895{
896	kfree(hdev->asic_prop.hw_queues_props);
897	hl_pci_fini(hdev);
898
899	return 0;
900}
901
902/**
903 * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values
904 *
905 * @hdev: pointer to hl_device structure
906 *
907 */
908static int gaudi_fetch_psoc_frequency(struct hl_device *hdev)
909{
910	u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel;
911	struct asic_fixed_properties *prop = &hdev->asic_prop;
912	u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq;
913	int rc;
914
915	if ((hdev->fw_components & FW_TYPE_LINUX) &&
916			(prop->fw_app_cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_PLL_INFO_EN)) {
917		struct gaudi_device *gaudi = hdev->asic_specific;
918
919		if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
920			return 0;
921
922		rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI_CPU_PLL, pll_freq_arr);
923
924		if (rc)
925			return rc;
926
927		freq = pll_freq_arr[2];
928	} else {
929		/* Backward compatibility */
930		div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2);
931		div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2);
932		nr = RREG32(mmPSOC_CPU_PLL_NR);
933		nf = RREG32(mmPSOC_CPU_PLL_NF);
934		od = RREG32(mmPSOC_CPU_PLL_OD);
935
936		if (div_sel == DIV_SEL_REF_CLK ||
937				div_sel == DIV_SEL_DIVIDED_REF) {
938			if (div_sel == DIV_SEL_REF_CLK)
939				freq = PLL_REF_CLK;
940			else
941				freq = PLL_REF_CLK / (div_fctr + 1);
942		} else if (div_sel == DIV_SEL_PLL_CLK ||
943			div_sel == DIV_SEL_DIVIDED_PLL) {
944			pll_clk = PLL_REF_CLK * (nf + 1) /
945					((nr + 1) * (od + 1));
946			if (div_sel == DIV_SEL_PLL_CLK)
947				freq = pll_clk;
948			else
949				freq = pll_clk / (div_fctr + 1);
950		} else {
951			dev_warn(hdev->dev, "Received invalid div select value: %#x", div_sel);
952			freq = 0;
953		}
954	}
955
956	prop->psoc_timestamp_frequency = freq;
957	prop->psoc_pci_pll_nr = nr;
958	prop->psoc_pci_pll_nf = nf;
959	prop->psoc_pci_pll_od = od;
960	prop->psoc_pci_pll_div_factor = div_fctr;
961
962	return 0;
963}
964
965static int _gaudi_init_tpc_mem(struct hl_device *hdev,
966		dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size)
967{
968	struct asic_fixed_properties *prop = &hdev->asic_prop;
969	struct packet_lin_dma *init_tpc_mem_pkt;
970	struct hl_cs_job *job;
971	struct hl_cb *cb;
972	u64 dst_addr;
973	u32 cb_size, ctl;
974	u8 tpc_id;
975	int rc;
976
977	cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
978	if (!cb)
979		return -EFAULT;
980
981	init_tpc_mem_pkt = cb->kernel_address;
982	cb_size = sizeof(*init_tpc_mem_pkt);
983	memset(init_tpc_mem_pkt, 0, cb_size);
984
985	init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size);
986
987	ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
988	ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
989	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
990	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
991
992	init_tpc_mem_pkt->ctl = cpu_to_le32(ctl);
993
994	init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr);
995
996	/* TPC_CMD is configured with I$ prefetch enabled, so address should be aligned to 8KB */
997	dst_addr = FIELD_PREP(GAUDI_PKT_LIN_DMA_DST_ADDR_MASK,
998				round_up(prop->sram_user_base_address, SZ_8K));
999	init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr);
1000
1001	job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
1002	if (!job) {
1003		dev_err(hdev->dev, "Failed to allocate a new job\n");
1004		rc = -ENOMEM;
1005		goto release_cb;
1006	}
1007
1008	job->id = 0;
1009	job->user_cb = cb;
1010	atomic_inc(&job->user_cb->cs_cnt);
1011	job->user_cb_size = cb_size;
1012	job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
1013	job->patched_cb = job->user_cb;
1014	job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
1015
1016	hl_debugfs_add_job(hdev, job);
1017
1018	rc = gaudi_send_job_on_qman0(hdev, job);
1019
1020	if (rc)
1021		goto free_job;
1022
1023	for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
1024		rc = gaudi_run_tpc_kernel(hdev, dst_addr, tpc_id);
1025		if (rc)
1026			break;
1027	}
1028
1029free_job:
1030	hl_userptr_delete_list(hdev, &job->userptr_list);
1031	hl_debugfs_remove_job(hdev, job);
1032	kfree(job);
1033	atomic_dec(&cb->cs_cnt);
1034
1035release_cb:
1036	hl_cb_put(cb);
1037	hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
1038
1039	return rc;
1040}
1041
1042/*
1043 * gaudi_init_tpc_mem() - Initialize TPC memories.
1044 * @hdev: Pointer to hl_device structure.
1045 *
1046 * Copy TPC kernel fw from firmware file and run it to initialize TPC memories.
1047 *
1048 * Return: 0 for success, negative value for error.
1049 */
1050static int gaudi_init_tpc_mem(struct hl_device *hdev)
1051{
1052	const struct firmware *fw;
1053	size_t fw_size;
1054	void *cpu_addr;
1055	dma_addr_t dma_handle;
1056	int rc, count = 5;
1057
1058again:
1059	rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev);
1060	if (rc == -EINTR && count-- > 0) {
1061		msleep(50);
1062		goto again;
1063	}
1064
1065	if (rc) {
1066		dev_err(hdev->dev, "Failed to load firmware file %s\n",
1067				GAUDI_TPC_FW_FILE);
1068		goto out;
1069	}
1070
1071	fw_size = fw->size;
1072	cpu_addr = hl_asic_dma_alloc_coherent(hdev, fw_size, &dma_handle, GFP_KERNEL | __GFP_ZERO);
1073	if (!cpu_addr) {
1074		dev_err(hdev->dev,
1075			"Failed to allocate %zu of dma memory for TPC kernel\n",
1076			fw_size);
1077		rc = -ENOMEM;
1078		goto out;
1079	}
1080
1081	memcpy(cpu_addr, fw->data, fw_size);
1082
1083	rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size);
1084
1085	hl_asic_dma_free_coherent(hdev, fw->size, cpu_addr, dma_handle);
1086
1087out:
1088	release_firmware(fw);
1089	return rc;
1090}
1091
1092static void gaudi_collective_map_sobs(struct hl_device *hdev, u32 stream)
1093{
1094	struct gaudi_device *gaudi = hdev->asic_specific;
1095	struct gaudi_collective_properties *prop = &gaudi->collective_props;
1096	struct hl_hw_queue *q;
1097	u32 i, sob_id, sob_group_id, queue_id;
1098
1099	/* Iterate through SOB groups and assign a SOB for each slave queue */
1100	sob_group_id =
1101		stream * HL_RSVD_SOBS + prop->curr_sob_group_idx[stream];
1102	sob_id = prop->hw_sob_group[sob_group_id].base_sob_id;
1103
1104	queue_id = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1105	for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
1106		q = &hdev->kernel_queues[queue_id + (4 * i)];
1107		q->sync_stream_prop.collective_sob_id = sob_id + i;
1108	}
1109
1110	/* Both DMA5 and TPC7 use the same resources since only a single
1111	 * engine need to participate in the reduction process
1112	 */
1113	queue_id = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1114	q = &hdev->kernel_queues[queue_id];
1115	q->sync_stream_prop.collective_sob_id =
1116			sob_id + NIC_NUMBER_OF_ENGINES;
1117
1118	queue_id = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1119	q = &hdev->kernel_queues[queue_id];
1120	q->sync_stream_prop.collective_sob_id =
1121			sob_id + NIC_NUMBER_OF_ENGINES;
1122}
1123
1124static void gaudi_sob_group_hw_reset(struct kref *ref)
1125{
1126	struct gaudi_hw_sob_group *hw_sob_group =
1127		container_of(ref, struct gaudi_hw_sob_group, kref);
1128	struct hl_device *hdev = hw_sob_group->hdev;
1129	int i;
1130
1131	for (i = 0 ; i < NUMBER_OF_SOBS_IN_GRP ; i++)
1132		WREG32((mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
1133			(hw_sob_group->base_sob_id * 4) + (i * 4)), 0);
1134
1135	kref_init(&hw_sob_group->kref);
1136}
1137
1138static void gaudi_sob_group_reset_error(struct kref *ref)
1139{
1140	struct gaudi_hw_sob_group *hw_sob_group =
1141		container_of(ref, struct gaudi_hw_sob_group, kref);
1142	struct hl_device *hdev = hw_sob_group->hdev;
1143
1144	dev_crit(hdev->dev,
1145		"SOB release shouldn't be called here, base_sob_id: %d\n",
1146		hw_sob_group->base_sob_id);
1147}
1148
1149static void gaudi_collective_mstr_sob_mask_set(struct gaudi_device *gaudi)
1150{
1151	struct gaudi_collective_properties *prop;
1152	int i;
1153
1154	prop = &gaudi->collective_props;
1155
1156	memset(prop->mstr_sob_mask, 0, sizeof(prop->mstr_sob_mask));
1157
1158	for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++)
1159		if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + i))
1160			prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1161					BIT(i % HL_MAX_SOBS_PER_MONITOR);
1162	/* Set collective engine bit */
1163	prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1164				BIT(i % HL_MAX_SOBS_PER_MONITOR);
1165}
1166
1167static int gaudi_collective_init(struct hl_device *hdev)
1168{
1169	u32 i, sob_id, reserved_sobs_per_group;
1170	struct gaudi_collective_properties *prop;
1171	struct gaudi_device *gaudi;
1172
1173	gaudi = hdev->asic_specific;
1174	prop = &gaudi->collective_props;
1175	sob_id = hdev->asic_prop.collective_first_sob;
1176
1177	/* First sob in group must be aligned to HL_MAX_SOBS_PER_MONITOR */
1178	reserved_sobs_per_group =
1179		ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR);
1180
1181	/* Init SOB groups */
1182	for (i = 0 ; i < NUM_SOB_GROUPS; i++) {
1183		prop->hw_sob_group[i].hdev = hdev;
1184		prop->hw_sob_group[i].base_sob_id = sob_id;
1185		sob_id += reserved_sobs_per_group;
1186		gaudi_sob_group_hw_reset(&prop->hw_sob_group[i].kref);
1187	}
1188
1189	for (i = 0 ; i < QMAN_STREAMS; i++) {
1190		prop->next_sob_group_val[i] = 1;
1191		prop->curr_sob_group_idx[i] = 0;
1192		gaudi_collective_map_sobs(hdev, i);
1193	}
1194
1195	gaudi_collective_mstr_sob_mask_set(gaudi);
1196
1197	return 0;
1198}
1199
1200static void gaudi_reset_sob_group(struct hl_device *hdev, u16 sob_group)
1201{
1202	struct gaudi_device *gaudi = hdev->asic_specific;
1203	struct gaudi_collective_properties *cprop = &gaudi->collective_props;
1204
1205	kref_put(&cprop->hw_sob_group[sob_group].kref,
1206					gaudi_sob_group_hw_reset);
1207}
1208
1209static void gaudi_collective_master_init_job(struct hl_device *hdev,
1210		struct hl_cs_job *job, u32 stream, u32 sob_group_offset)
1211{
1212	u32 master_sob_base, master_monitor, queue_id, cb_size = 0;
1213	struct gaudi_collective_properties *cprop;
1214	struct hl_gen_wait_properties wait_prop;
1215	struct hl_sync_stream_properties *prop;
1216	struct gaudi_device *gaudi;
1217
1218	gaudi = hdev->asic_specific;
1219	cprop = &gaudi->collective_props;
1220	queue_id = job->hw_queue_id;
1221	prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1222
1223	master_sob_base =
1224		cprop->hw_sob_group[sob_group_offset].base_sob_id;
1225	master_monitor = prop->collective_mstr_mon_id[0];
1226
1227	cprop->hw_sob_group[sob_group_offset].queue_id = queue_id;
1228
1229	dev_dbg(hdev->dev,
1230		"Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1231		master_sob_base, cprop->mstr_sob_mask[0],
1232		cprop->next_sob_group_val[stream],
1233		master_monitor, queue_id);
1234
1235	wait_prop.data = (void *) job->patched_cb;
1236	wait_prop.sob_base = master_sob_base;
1237	wait_prop.sob_mask = cprop->mstr_sob_mask[0];
1238	wait_prop.sob_val = cprop->next_sob_group_val[stream];
1239	wait_prop.mon_id = master_monitor;
1240	wait_prop.q_idx = queue_id;
1241	wait_prop.size = cb_size;
1242	cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1243
1244	master_sob_base += HL_MAX_SOBS_PER_MONITOR;
1245	master_monitor = prop->collective_mstr_mon_id[1];
1246
1247	dev_dbg(hdev->dev,
1248		"Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1249		master_sob_base, cprop->mstr_sob_mask[1],
1250		cprop->next_sob_group_val[stream],
1251		master_monitor, queue_id);
1252
1253	wait_prop.sob_base = master_sob_base;
1254	wait_prop.sob_mask = cprop->mstr_sob_mask[1];
1255	wait_prop.mon_id = master_monitor;
1256	wait_prop.size = cb_size;
1257	cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1258}
1259
1260static void gaudi_collective_slave_init_job(struct hl_device *hdev,
1261		struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl)
1262{
1263	struct hl_gen_wait_properties wait_prop;
1264	struct hl_sync_stream_properties *prop;
1265	u32 queue_id, cb_size = 0;
1266
1267	queue_id = job->hw_queue_id;
1268	prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1269
1270	if (job->cs->encaps_signals) {
1271		/* use the encaps signal handle store earlier in the flow
1272		 * and set the SOB information from the encaps
1273		 * signals handle
1274		 */
1275		hl_hw_queue_encaps_sig_set_sob_info(hdev, job->cs, job,
1276						cs_cmpl);
1277
1278		dev_dbg(hdev->dev, "collective wait: Sequence %llu found, sob_id: %u,  wait for sob_val: %u\n",
1279				job->cs->sequence,
1280				cs_cmpl->hw_sob->sob_id,
1281				cs_cmpl->sob_val);
1282	}
1283
1284	/* Add to wait CBs using slave monitor */
1285	wait_prop.data = (void *) job->user_cb;
1286	wait_prop.sob_base = cs_cmpl->hw_sob->sob_id;
1287	wait_prop.sob_mask = 0x1;
1288	wait_prop.sob_val = cs_cmpl->sob_val;
1289	wait_prop.mon_id = prop->collective_slave_mon_id;
1290	wait_prop.q_idx = queue_id;
1291	wait_prop.size = cb_size;
1292
1293	dev_dbg(hdev->dev,
1294		"Generate slave wait CB, sob %d, val:%x, mon %d, q %d\n",
1295		cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val,
1296		prop->collective_slave_mon_id, queue_id);
1297
1298	cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1299
1300	dev_dbg(hdev->dev,
1301		"generate signal CB, sob_id: %d, sob val: 1, q_idx: %d\n",
1302		prop->collective_sob_id, queue_id);
1303
1304	cb_size += gaudi_gen_signal_cb(hdev, job->user_cb,
1305			prop->collective_sob_id, cb_size, false);
1306}
1307
1308static int gaudi_collective_wait_init_cs(struct hl_cs *cs)
1309{
1310	struct hl_cs_compl *signal_cs_cmpl =
1311		container_of(cs->signal_fence, struct hl_cs_compl, base_fence);
1312	struct hl_cs_compl *cs_cmpl =
1313		container_of(cs->fence, struct hl_cs_compl, base_fence);
1314	struct hl_cs_encaps_sig_handle *handle = cs->encaps_sig_hdl;
1315	struct gaudi_collective_properties *cprop;
1316	u32 stream, queue_id, sob_group_offset;
1317	struct gaudi_device *gaudi;
1318	struct hl_device *hdev;
1319	struct hl_cs_job *job;
1320	struct hl_ctx *ctx;
1321
1322	ctx = cs->ctx;
1323	hdev = ctx->hdev;
1324	gaudi = hdev->asic_specific;
1325	cprop = &gaudi->collective_props;
1326
1327	if (cs->encaps_signals) {
1328		cs_cmpl->hw_sob = handle->hw_sob;
1329		/* at this checkpoint we only need the hw_sob pointer
1330		 * for the completion check before start going over the jobs
1331		 * of the master/slaves, the sob_value will be taken later on
1332		 * in gaudi_collective_slave_init_job depends on each
1333		 * job wait offset value.
1334		 */
1335		cs_cmpl->sob_val = 0;
1336	} else {
1337		/* copy the SOB id and value of the signal CS */
1338		cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;
1339		cs_cmpl->sob_val = signal_cs_cmpl->sob_val;
1340	}
1341
1342	/* check again if the signal cs already completed.
1343	 * if yes then don't send any wait cs since the hw_sob
1344	 * could be in reset already. if signal is not completed
1345	 * then get refcount to hw_sob to prevent resetting the sob
1346	 * while wait cs is not submitted.
1347	 * note that this check is protected by two locks,
1348	 * hw queue lock and completion object lock,
1349	 * and the same completion object lock also protects
1350	 * the hw_sob reset handler function.
1351	 * The hw_queue lock prevent out of sync of hw_sob
1352	 * refcount value, changed by signal/wait flows.
1353	 */
1354	spin_lock(&signal_cs_cmpl->lock);
1355
1356	if (completion_done(&cs->signal_fence->completion)) {
1357		spin_unlock(&signal_cs_cmpl->lock);
1358		return -EINVAL;
1359	}
1360	/* Increment kref since all slave queues are now waiting on it */
1361	kref_get(&cs_cmpl->hw_sob->kref);
1362
1363	spin_unlock(&signal_cs_cmpl->lock);
1364
1365	/* Calculate the stream from collective master queue (1st job) */
1366	job = list_first_entry(&cs->job_list, struct hl_cs_job, cs_node);
1367	stream = job->hw_queue_id % 4;
1368	sob_group_offset =
1369		stream * HL_RSVD_SOBS + cprop->curr_sob_group_idx[stream];
1370
1371	list_for_each_entry(job, &cs->job_list, cs_node) {
1372		queue_id = job->hw_queue_id;
1373
1374		if (hdev->kernel_queues[queue_id].collective_mode ==
1375				HL_COLLECTIVE_MASTER)
1376			gaudi_collective_master_init_job(hdev, job, stream,
1377						sob_group_offset);
1378		else
1379			gaudi_collective_slave_init_job(hdev, job, cs_cmpl);
1380	}
1381
1382	cs_cmpl->sob_group = sob_group_offset;
1383
1384	/* Handle sob group kref and wraparound */
1385	kref_get(&cprop->hw_sob_group[sob_group_offset].kref);
1386	cprop->next_sob_group_val[stream]++;
1387
1388	if (cprop->next_sob_group_val[stream] == HL_MAX_SOB_VAL) {
1389		/*
1390		 * Decrement as we reached the max value.
1391		 * The release function won't be called here as we've
1392		 * just incremented the refcount.
1393		 */
1394		kref_put(&cprop->hw_sob_group[sob_group_offset].kref,
1395				gaudi_sob_group_reset_error);
1396		cprop->next_sob_group_val[stream] = 1;
1397		/* only two SOBs are currently in use */
1398		cprop->curr_sob_group_idx[stream] =
1399			(cprop->curr_sob_group_idx[stream] + 1) &
1400							(HL_RSVD_SOBS - 1);
1401
1402		gaudi_collective_map_sobs(hdev, stream);
1403
1404		dev_dbg(hdev->dev, "switched to SOB group %d, stream: %d\n",
1405				cprop->curr_sob_group_idx[stream], stream);
1406	}
1407
1408	mb();
1409	hl_fence_put(cs->signal_fence);
1410	cs->signal_fence = NULL;
1411
1412	return 0;
1413}
1414
1415static u32 gaudi_get_patched_cb_extra_size(u32 user_cb_size)
1416{
1417	u32 cacheline_end, additional_commands;
1418
1419	cacheline_end = round_up(user_cb_size, DEVICE_CACHE_LINE_SIZE);
1420	additional_commands = sizeof(struct packet_msg_prot) * 2;
1421
1422	if (user_cb_size + additional_commands > cacheline_end)
1423		return cacheline_end - user_cb_size + additional_commands;
1424	else
1425		return additional_commands;
1426}
1427
1428static int gaudi_collective_wait_create_job(struct hl_device *hdev,
1429		struct hl_ctx *ctx, struct hl_cs *cs,
1430		enum hl_collective_mode mode, u32 queue_id, u32 wait_queue_id,
1431		u32 encaps_signal_offset)
1432{
1433	struct hw_queue_properties *hw_queue_prop;
1434	struct hl_cs_counters_atomic *cntr;
1435	struct hl_cs_job *job;
1436	struct hl_cb *cb;
1437	u32 cb_size;
1438	bool patched_cb;
1439
1440	cntr = &hdev->aggregated_cs_counters;
1441
1442	if (mode == HL_COLLECTIVE_MASTER) {
1443		/* CB size of collective master queue contains
1444		 * 4 msg short packets for monitor 1 configuration
1445		 * 1 fence packet
1446		 * 4 msg short packets for monitor 2 configuration
1447		 * 1 fence packet
1448		 * 2 msg prot packets for completion and MSI
1449		 */
1450		cb_size = sizeof(struct packet_msg_short) * 8 +
1451				sizeof(struct packet_fence) * 2 +
1452				sizeof(struct packet_msg_prot) * 2;
1453		patched_cb = true;
1454	} else {
1455		/* CB size of collective slave queues contains
1456		 * 4 msg short packets for monitor configuration
1457		 * 1 fence packet
1458		 * 1 additional msg short packet for sob signal
1459		 */
1460		cb_size = sizeof(struct packet_msg_short) * 5 +
1461				sizeof(struct packet_fence);
1462		patched_cb = false;
1463	}
1464
1465	hw_queue_prop = &hdev->asic_prop.hw_queues_props[queue_id];
1466	job = hl_cs_allocate_job(hdev, hw_queue_prop->type, true);
1467	if (!job) {
1468		atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1469		atomic64_inc(&cntr->out_of_mem_drop_cnt);
1470		dev_err(hdev->dev, "Failed to allocate a new job\n");
1471		return -ENOMEM;
1472	}
1473
1474	/* Allocate internal mapped CB for non patched CBs */
1475	cb = hl_cb_kernel_create(hdev, cb_size, !patched_cb);
1476	if (!cb) {
1477		atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1478		atomic64_inc(&cntr->out_of_mem_drop_cnt);
1479		kfree(job);
1480		return -EFAULT;
1481	}
1482
1483	job->id = 0;
1484	job->cs = cs;
1485	job->user_cb = cb;
1486	atomic_inc(&job->user_cb->cs_cnt);
1487	job->user_cb_size = cb_size;
1488	job->hw_queue_id = queue_id;
1489
1490	/* since its guaranteed to have only one chunk in the collective wait
1491	 * cs, we can use this chunk to set the encapsulated signal offset
1492	 * in the jobs.
1493	 */
1494	if (cs->encaps_signals)
1495		job->encaps_sig_wait_offset = encaps_signal_offset;
1496
1497	/*
1498	 * No need in parsing, user CB is the patched CB.
1499	 * We call hl_cb_destroy() out of two reasons - we don't need
1500	 * the CB in the CB idr anymore and to decrement its refcount as
1501	 * it was incremented inside hl_cb_kernel_create().
1502	 */
1503	if (patched_cb)
1504		job->patched_cb = job->user_cb;
1505	else
1506		job->patched_cb = NULL;
1507
1508	job->job_cb_size = job->user_cb_size;
1509	hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
1510
1511	/* increment refcount as for external queues we get completion */
1512	if (hw_queue_prop->type == QUEUE_TYPE_EXT)
1513		cs_get(cs);
1514
1515	cs->jobs_in_queue_cnt[job->hw_queue_id]++;
1516
1517	list_add_tail(&job->cs_node, &cs->job_list);
1518
1519	hl_debugfs_add_job(hdev, job);
1520
1521	return 0;
1522}
1523
1524static int gaudi_collective_wait_create_jobs(struct hl_device *hdev,
1525		struct hl_ctx *ctx, struct hl_cs *cs,
1526		u32 wait_queue_id, u32 collective_engine_id,
1527		u32 encaps_signal_offset)
1528{
1529	struct gaudi_device *gaudi = hdev->asic_specific;
1530	struct hw_queue_properties *hw_queue_prop;
1531	u32 queue_id, collective_queue, num_jobs;
1532	u32 stream, nic_queue, nic_idx = 0;
1533	bool skip;
1534	int i, rc = 0;
1535
1536	/* Verify wait queue id is configured as master */
1537	hw_queue_prop = &hdev->asic_prop.hw_queues_props[wait_queue_id];
1538	if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) {
1539		dev_err(hdev->dev,
1540			"Queue %d is not configured as collective master\n",
1541			wait_queue_id);
1542		return -EINVAL;
1543	}
1544
1545	/* Verify engine id is supported */
1546	if (collective_engine_id != GAUDI_ENGINE_ID_DMA_5 &&
1547			collective_engine_id != GAUDI_ENGINE_ID_TPC_7) {
1548		dev_err(hdev->dev,
1549			"Collective wait does not support engine %u\n",
1550			collective_engine_id);
1551		return -EINVAL;
1552	}
1553
1554	stream = wait_queue_id % 4;
1555
1556	if (collective_engine_id == GAUDI_ENGINE_ID_DMA_5)
1557		collective_queue = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1558	else
1559		collective_queue = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1560
1561	num_jobs = NUMBER_OF_SOBS_IN_GRP + 1;
1562	nic_queue = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1563
1564	/* First job goes to the collective master queue, it will wait for
1565	 * the collective slave queues to finish execution.
1566	 * The synchronization is done using two monitors:
1567	 * First monitor for NICs 0-7, second monitor for NICs 8-9 and the
1568	 * reduction engine (DMA5/TPC7).
1569	 *
1570	 * Rest of the jobs goes to the collective slave queues which will
1571	 * all wait for the user to signal sob 'cs_cmpl->sob_val'.
1572	 */
1573	for (i = 0 ; i < num_jobs ; i++) {
1574		if (i == 0) {
1575			queue_id = wait_queue_id;
1576			rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1577				HL_COLLECTIVE_MASTER, queue_id,
1578				wait_queue_id, encaps_signal_offset);
1579		} else {
1580			if (nic_idx < NIC_NUMBER_OF_ENGINES) {
1581				if (gaudi->hw_cap_initialized &
1582					BIT(HW_CAP_NIC_SHIFT + nic_idx))
1583					skip = false;
1584				else
1585					skip = true;
1586
1587				queue_id = nic_queue;
1588				nic_queue += 4;
1589				nic_idx++;
1590
1591				if (skip)
1592					continue;
1593			} else {
1594				queue_id = collective_queue;
1595			}
1596
1597			rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1598				HL_COLLECTIVE_SLAVE, queue_id,
1599				wait_queue_id, encaps_signal_offset);
1600		}
1601
1602		if (rc)
1603			return rc;
1604	}
1605
1606	return rc;
1607}
1608
1609static int gaudi_late_init(struct hl_device *hdev)
1610{
1611	struct gaudi_device *gaudi = hdev->asic_specific;
1612	int rc;
1613
1614	rc = gaudi->cpucp_info_get(hdev);
1615	if (rc) {
1616		dev_err(hdev->dev, "Failed to get cpucp info\n");
1617		return rc;
1618	}
1619
1620	if ((hdev->card_type == cpucp_card_type_pci) &&
1621			(hdev->nic_ports_mask & 0x3)) {
1622		dev_info(hdev->dev,
1623			"PCI card detected, only 8 ports are enabled\n");
1624		hdev->nic_ports_mask &= ~0x3;
1625
1626		/* Stop and disable unused NIC QMANs */
1627		WREG32(mmNIC0_QM0_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1628					NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1629					NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1630
1631		WREG32(mmNIC0_QM1_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1632					NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1633					NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1634
1635		WREG32(mmNIC0_QM0_GLBL_CFG0, 0);
1636		WREG32(mmNIC0_QM1_GLBL_CFG0, 0);
1637
1638		gaudi->hw_cap_initialized &= ~(HW_CAP_NIC0 | HW_CAP_NIC1);
1639	}
1640
1641	rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS, 0x0);
1642	if (rc) {
1643		dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
1644		return rc;
1645	}
1646
1647	/* Scrub both SRAM and DRAM */
1648	rc = hdev->asic_funcs->scrub_device_mem(hdev);
1649	if (rc)
1650		goto disable_pci_access;
1651
1652	rc = gaudi_fetch_psoc_frequency(hdev);
1653	if (rc) {
1654		dev_err(hdev->dev, "Failed to fetch psoc frequency\n");
1655		goto disable_pci_access;
1656	}
1657
1658	rc = gaudi_mmu_clear_pgt_range(hdev);
1659	if (rc) {
1660		dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
1661		goto disable_pci_access;
1662	}
1663
1664	rc = gaudi_init_tpc_mem(hdev);
1665	if (rc) {
1666		dev_err(hdev->dev, "Failed to initialize TPC memories\n");
1667		goto disable_pci_access;
1668	}
1669
1670	rc = gaudi_collective_init(hdev);
1671	if (rc) {
1672		dev_err(hdev->dev, "Failed to init collective\n");
1673		goto disable_pci_access;
1674	}
1675
1676	/* We only support a single ASID for the user, so for the sake of optimization, just
1677	 * initialize the ASID one time during device initialization with the fixed value of 1
1678	 */
1679	gaudi_mmu_prepare(hdev, 1);
1680
1681	hl_fw_set_pll_profile(hdev);
1682
1683	return 0;
1684
1685disable_pci_access:
1686	hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
1687
1688	return rc;
1689}
1690
1691static void gaudi_late_fini(struct hl_device *hdev)
1692{
1693	hl_hwmon_release_resources(hdev);
1694}
1695
1696static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
1697{
1698	dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
1699	void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {};
1700	int i, j, rc = 0;
1701
1702	/*
1703	 * The device CPU works with 40-bits addresses, while bit 39 must be set
1704	 * to '1' when accessing the host.
1705	 * Bits 49:39 of the full host address are saved for a later
1706	 * configuration of the HW to perform extension to 50 bits.
1707	 * Because there is a single HW register that holds the extension bits,
1708	 * these bits must be identical in all allocated range.
1709	 */
1710
1711	for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
1712		virt_addr_arr[i] = hl_asic_dma_alloc_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE,
1713								&dma_addr_arr[i],
1714								GFP_KERNEL | __GFP_ZERO);
1715		if (!virt_addr_arr[i]) {
1716			rc = -ENOMEM;
1717			goto free_dma_mem_arr;
1718		}
1719
1720		end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
1721		if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr[i]) ==
1722				GAUDI_CPU_PCI_MSB_ADDR(end_addr))
1723			break;
1724	}
1725
1726	if (i == GAUDI_ALLOC_CPU_MEM_RETRY_CNT) {
1727		dev_err(hdev->dev,
1728			"MSB of CPU accessible DMA memory are not identical in all range\n");
1729		rc = -EFAULT;
1730		goto free_dma_mem_arr;
1731	}
1732
1733	hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
1734	hdev->cpu_accessible_dma_address = dma_addr_arr[i];
1735	hdev->cpu_pci_msb_addr =
1736		GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address);
1737
1738	if (!hdev->asic_prop.fw_security_enabled)
1739		GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address);
1740
1741free_dma_mem_arr:
1742	for (j = 0 ; j < i ; j++)
1743		hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, virt_addr_arr[j],
1744						dma_addr_arr[j]);
1745
1746	return rc;
1747}
1748
1749static void gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev)
1750{
1751	struct gaudi_device *gaudi = hdev->asic_specific;
1752	struct gaudi_internal_qman_info *q;
1753	u32 i;
1754
1755	for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1756		q = &gaudi->internal_qmans[i];
1757		if (!q->pq_kernel_addr)
1758			continue;
1759		hl_asic_dma_free_coherent(hdev, q->pq_size, q->pq_kernel_addr, q->pq_dma_addr);
1760	}
1761}
1762
1763static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev)
1764{
1765	struct gaudi_device *gaudi = hdev->asic_specific;
1766	struct gaudi_internal_qman_info *q;
1767	int rc, i;
1768
1769	for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1770		if (gaudi_queue_type[i] != QUEUE_TYPE_INT)
1771			continue;
1772
1773		q = &gaudi->internal_qmans[i];
1774
1775		switch (i) {
1776		case GAUDI_QUEUE_ID_DMA_2_0 ... GAUDI_QUEUE_ID_DMA_7_3:
1777			q->pq_size = HBM_DMA_QMAN_SIZE_IN_BYTES;
1778			break;
1779		case GAUDI_QUEUE_ID_MME_0_0 ... GAUDI_QUEUE_ID_MME_1_3:
1780			q->pq_size = MME_QMAN_SIZE_IN_BYTES;
1781			break;
1782		case GAUDI_QUEUE_ID_TPC_0_0 ... GAUDI_QUEUE_ID_TPC_7_3:
1783			q->pq_size = TPC_QMAN_SIZE_IN_BYTES;
1784			break;
1785		case GAUDI_QUEUE_ID_NIC_0_0 ... GAUDI_QUEUE_ID_NIC_9_3:
1786			q->pq_size = NIC_QMAN_SIZE_IN_BYTES;
1787			break;
1788		default:
1789			dev_err(hdev->dev, "Bad internal queue index %d", i);
1790			rc = -EINVAL;
1791			goto free_internal_qmans_pq_mem;
1792		}
1793
1794		q->pq_kernel_addr = hl_asic_dma_alloc_coherent(hdev, q->pq_size, &q->pq_dma_addr,
1795								GFP_KERNEL | __GFP_ZERO);
1796		if (!q->pq_kernel_addr) {
1797			rc = -ENOMEM;
1798			goto free_internal_qmans_pq_mem;
1799		}
1800	}
1801
1802	return 0;
1803
1804free_internal_qmans_pq_mem:
1805	gaudi_free_internal_qmans_pq_mem(hdev);
1806	return rc;
1807}
1808
1809static void gaudi_set_pci_memory_regions(struct hl_device *hdev)
1810{
1811	struct asic_fixed_properties *prop = &hdev->asic_prop;
1812	struct pci_mem_region *region;
1813
1814	/* CFG */
1815	region = &hdev->pci_mem_region[PCI_REGION_CFG];
1816	region->region_base = CFG_BASE;
1817	region->region_size = CFG_SIZE;
1818	region->offset_in_bar = CFG_BASE - SPI_FLASH_BASE_ADDR;
1819	region->bar_size = CFG_BAR_SIZE;
1820	region->bar_id = CFG_BAR_ID;
1821	region->used = 1;
1822
1823	/* SRAM */
1824	region = &hdev->pci_mem_region[PCI_REGION_SRAM];
1825	region->region_base = SRAM_BASE_ADDR;
1826	region->region_size = SRAM_SIZE;
1827	region->offset_in_bar = 0;
1828	region->bar_size = SRAM_BAR_SIZE;
1829	region->bar_id = SRAM_BAR_ID;
1830	region->used = 1;
1831
1832	/* DRAM */
1833	region = &hdev->pci_mem_region[PCI_REGION_DRAM];
1834	region->region_base = DRAM_PHYS_BASE;
1835	region->region_size = hdev->asic_prop.dram_size;
1836	region->offset_in_bar = 0;
1837	region->bar_size = prop->dram_pci_bar_size;
1838	region->bar_id = HBM_BAR_ID;
1839	region->used = 1;
1840
1841	/* SP SRAM */
1842	region = &hdev->pci_mem_region[PCI_REGION_SP_SRAM];
1843	region->region_base = PSOC_SCRATCHPAD_ADDR;
1844	region->region_size = PSOC_SCRATCHPAD_SIZE;
1845	region->offset_in_bar = PSOC_SCRATCHPAD_ADDR - SPI_FLASH_BASE_ADDR;
1846	region->bar_size = CFG_BAR_SIZE;
1847	region->bar_id = CFG_BAR_ID;
1848	region->used = 1;
1849}
1850
1851static int gaudi_sw_init(struct hl_device *hdev)
1852{
1853	struct gaudi_device *gaudi;
1854	u32 i, event_id = 0;
1855	int rc;
1856
1857	/* Allocate device structure */
1858	gaudi = kzalloc(sizeof(*gaudi), GFP_KERNEL);
1859	if (!gaudi)
1860		return -ENOMEM;
1861
1862	for (i = 0 ; i < ARRAY_SIZE(gaudi_irq_map_table) ; i++) {
1863		if (gaudi_irq_map_table[i].valid) {
1864			if (event_id == GAUDI_EVENT_SIZE) {
1865				dev_err(hdev->dev,
1866					"Event array exceeds the limit of %u events\n",
1867					GAUDI_EVENT_SIZE);
1868				rc = -EINVAL;
1869				goto free_gaudi_device;
1870			}
1871
1872			gaudi->events[event_id++] =
1873					gaudi_irq_map_table[i].fc_id;
1874		}
1875	}
1876
1877	gaudi->cpucp_info_get = gaudi_cpucp_info_get;
1878
1879	hdev->asic_specific = gaudi;
1880
1881	/* Create DMA pool for small allocations */
1882	hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
1883			&hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0);
1884	if (!hdev->dma_pool) {
1885		dev_err(hdev->dev, "failed to create DMA pool\n");
1886		rc = -ENOMEM;
1887		goto free_gaudi_device;
1888	}
1889
1890	rc = gaudi_alloc_cpu_accessible_dma_mem(hdev);
1891	if (rc)
1892		goto free_dma_pool;
1893
1894	hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
1895	if (!hdev->cpu_accessible_dma_pool) {
1896		dev_err(hdev->dev,
1897			"Failed to create CPU accessible DMA pool\n");
1898		rc = -ENOMEM;
1899		goto free_cpu_dma_mem;
1900	}
1901
1902	rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
1903				(uintptr_t) hdev->cpu_accessible_dma_mem,
1904				HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
1905	if (rc) {
1906		dev_err(hdev->dev,
1907			"Failed to add memory to CPU accessible DMA pool\n");
1908		rc = -EFAULT;
1909		goto free_cpu_accessible_dma_pool;
1910	}
1911
1912	rc = gaudi_alloc_internal_qmans_pq_mem(hdev);
1913	if (rc)
1914		goto free_cpu_accessible_dma_pool;
1915
1916	spin_lock_init(&gaudi->hw_queues_lock);
1917
1918	hdev->supports_sync_stream = true;
1919	hdev->supports_coresight = true;
1920	hdev->supports_staged_submission = true;
1921	hdev->supports_wait_for_multi_cs = true;
1922
1923	hdev->asic_funcs->set_pci_memory_regions(hdev);
1924	hdev->stream_master_qid_arr =
1925				hdev->asic_funcs->get_stream_master_qid_arr();
1926	hdev->stream_master_qid_arr_size = GAUDI_STREAM_MASTER_ARR_SIZE;
1927
1928	return 0;
1929
1930free_cpu_accessible_dma_pool:
1931	gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1932free_cpu_dma_mem:
1933	if (!hdev->asic_prop.fw_security_enabled)
1934		GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1935					hdev->cpu_pci_msb_addr);
1936	hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
1937					hdev->cpu_accessible_dma_address);
1938free_dma_pool:
1939	dma_pool_destroy(hdev->dma_pool);
1940free_gaudi_device:
1941	kfree(gaudi);
1942	return rc;
1943}
1944
1945static int gaudi_sw_fini(struct hl_device *hdev)
1946{
1947	struct gaudi_device *gaudi = hdev->asic_specific;
1948
1949	gaudi_free_internal_qmans_pq_mem(hdev);
1950
1951	gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1952
1953	if (!hdev->asic_prop.fw_security_enabled)
1954		GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1955					hdev->cpu_pci_msb_addr);
1956
1957	hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
1958					hdev->cpu_accessible_dma_address);
1959
1960	dma_pool_destroy(hdev->dma_pool);
1961
1962	kfree(gaudi);
1963
1964	return 0;
1965}
1966
1967static irqreturn_t gaudi_irq_handler_single(int irq, void *arg)
1968{
1969	struct hl_device *hdev = arg;
1970	int i;
1971
1972	if (hdev->disabled)
1973		return IRQ_HANDLED;
1974
1975	for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
1976		hl_irq_handler_cq(irq, &hdev->completion_queue[i]);
1977
1978	hl_irq_handler_eq(irq, &hdev->event_queue);
1979
1980	return IRQ_HANDLED;
1981}
1982
1983/*
1984 * For backward compatibility, new MSI interrupts should be set after the
1985 * existing CPU and NIC interrupts.
1986 */
1987static int gaudi_pci_irq_vector(struct hl_device *hdev, unsigned int nr,
1988				bool cpu_eq)
1989{
1990	int msi_vec;
1991
1992	if ((nr != GAUDI_EVENT_QUEUE_MSI_IDX) && (cpu_eq))
1993		dev_crit(hdev->dev, "CPU EQ must use IRQ %d\n",
1994				GAUDI_EVENT_QUEUE_MSI_IDX);
1995
1996	msi_vec = ((nr < GAUDI_EVENT_QUEUE_MSI_IDX) || (cpu_eq)) ? nr :
1997			(nr + NIC_NUMBER_OF_ENGINES + 1);
1998
1999	return pci_irq_vector(hdev->pdev, msi_vec);
2000}
2001
2002static int gaudi_enable_msi_single(struct hl_device *hdev)
2003{
2004	int rc, irq;
2005
2006	dev_dbg(hdev->dev, "Working in single MSI IRQ mode\n");
2007
2008	irq = gaudi_pci_irq_vector(hdev, 0, false);
2009	rc = request_irq(irq, gaudi_irq_handler_single, 0,
2010			"gaudi single msi", hdev);
2011	if (rc)
2012		dev_err(hdev->dev,
2013			"Failed to request single MSI IRQ\n");
2014
2015	return rc;
2016}
2017
2018static int gaudi_enable_msi(struct hl_device *hdev)
2019{
2020	struct gaudi_device *gaudi = hdev->asic_specific;
2021	int rc;
2022
2023	if (gaudi->hw_cap_initialized & HW_CAP_MSI)
2024		return 0;
2025
2026	rc = pci_alloc_irq_vectors(hdev->pdev, 1, 1, PCI_IRQ_MSI);
2027	if (rc < 0) {
2028		dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc);
2029		return rc;
2030	}
2031
2032	rc = gaudi_enable_msi_single(hdev);
2033	if (rc)
2034		goto free_pci_irq_vectors;
2035
2036	gaudi->hw_cap_initialized |= HW_CAP_MSI;
2037
2038	return 0;
2039
2040free_pci_irq_vectors:
2041	pci_free_irq_vectors(hdev->pdev);
2042	return rc;
2043}
2044
2045static void gaudi_sync_irqs(struct hl_device *hdev)
2046{
2047	struct gaudi_device *gaudi = hdev->asic_specific;
2048
2049	if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
2050		return;
2051
2052	/* Wait for all pending IRQs to be finished */
2053	synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false));
2054}
2055
2056static void gaudi_disable_msi(struct hl_device *hdev)
2057{
2058	struct gaudi_device *gaudi = hdev->asic_specific;
2059
2060	if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
2061		return;
2062
2063	gaudi_sync_irqs(hdev);
2064	free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev);
2065	pci_free_irq_vectors(hdev->pdev);
2066
2067	gaudi->hw_cap_initialized &= ~HW_CAP_MSI;
2068}
2069
2070static void gaudi_init_scrambler_sram(struct hl_device *hdev)
2071{
2072	struct gaudi_device *gaudi = hdev->asic_specific;
2073
2074	if (hdev->asic_prop.fw_security_enabled)
2075		return;
2076
2077	if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
2078						CPU_BOOT_DEV_STS0_SRAM_SCR_EN)
2079		return;
2080
2081	if (gaudi->hw_cap_initialized & HW_CAP_SRAM_SCRAMBLER)
2082		return;
2083
2084	WREG32(mmNIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2085			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2086	WREG32(mmNIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2087			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2088	WREG32(mmNIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2089			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2090	WREG32(mmNIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2091			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2092	WREG32(mmNIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2093			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2094	WREG32(mmNIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2095			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2096	WREG32(mmNIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2097			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2098	WREG32(mmNIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2099			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2100
2101	WREG32(mmSIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2102			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2103	WREG32(mmSIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2104			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2105	WREG32(mmSIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2106			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2107	WREG32(mmSIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2108			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2109	WREG32(mmSIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2110			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2111	WREG32(mmSIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2112			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2113	WREG32(mmSIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2114			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2115	WREG32(mmSIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2116			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2117
2118	WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_SRAM_EN,
2119			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2120	WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_SRAM_EN,
2121			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2122	WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_SRAM_EN,
2123			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2124	WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_SRAM_EN,
2125			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2126	WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_SRAM_EN,
2127			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2128	WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_SRAM_EN,
2129			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2130	WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_SRAM_EN,
2131			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2132	WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_SRAM_EN,
2133			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2134
2135	gaudi->hw_cap_initialized |= HW_CAP_SRAM_SCRAMBLER;
2136}
2137
2138static void gaudi_init_scrambler_hbm(struct hl_device *hdev)
2139{
2140	struct gaudi_device *gaudi = hdev->asic_specific;
2141
2142	if (hdev->asic_prop.fw_security_enabled)
2143		return;
2144
2145	if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2146					CPU_BOOT_DEV_STS0_DRAM_SCR_EN)
2147		return;
2148
2149	if (gaudi->hw_cap_initialized & HW_CAP_HBM_SCRAMBLER)
2150		return;
2151
2152	WREG32(mmNIF_RTR_CTRL_0_SCRAM_HBM_EN,
2153			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2154	WREG32(mmNIF_RTR_CTRL_1_SCRAM_HBM_EN,
2155			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2156	WREG32(mmNIF_RTR_CTRL_2_SCRAM_HBM_EN,
2157			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2158	WREG32(mmNIF_RTR_CTRL_3_SCRAM_HBM_EN,
2159			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2160	WREG32(mmNIF_RTR_CTRL_4_SCRAM_HBM_EN,
2161			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2162	WREG32(mmNIF_RTR_CTRL_5_SCRAM_HBM_EN,
2163			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2164	WREG32(mmNIF_RTR_CTRL_6_SCRAM_HBM_EN,
2165			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2166	WREG32(mmNIF_RTR_CTRL_7_SCRAM_HBM_EN,
2167			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2168
2169	WREG32(mmSIF_RTR_CTRL_0_SCRAM_HBM_EN,
2170			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2171	WREG32(mmSIF_RTR_CTRL_1_SCRAM_HBM_EN,
2172			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2173	WREG32(mmSIF_RTR_CTRL_2_SCRAM_HBM_EN,
2174			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2175	WREG32(mmSIF_RTR_CTRL_3_SCRAM_HBM_EN,
2176			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2177	WREG32(mmSIF_RTR_CTRL_4_SCRAM_HBM_EN,
2178			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2179	WREG32(mmSIF_RTR_CTRL_5_SCRAM_HBM_EN,
2180			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2181	WREG32(mmSIF_RTR_CTRL_6_SCRAM_HBM_EN,
2182			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2183	WREG32(mmSIF_RTR_CTRL_7_SCRAM_HBM_EN,
2184			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2185
2186	WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_HBM_EN,
2187			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2188	WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_HBM_EN,
2189			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2190	WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_HBM_EN,
2191			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2192	WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_HBM_EN,
2193			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2194	WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_HBM_EN,
2195			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2196	WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_HBM_EN,
2197			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2198	WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_HBM_EN,
2199			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2200	WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_HBM_EN,
2201			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2202
2203	gaudi->hw_cap_initialized |= HW_CAP_HBM_SCRAMBLER;
2204}
2205
2206static void gaudi_init_e2e(struct hl_device *hdev)
2207{
2208	if (hdev->asic_prop.fw_security_enabled)
2209		return;
2210
2211	if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2212					CPU_BOOT_DEV_STS0_E2E_CRED_EN)
2213		return;
2214
2215	WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 247 >> 3);
2216	WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 785 >> 3);
2217	WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 49);
2218	WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 101);
2219
2220	WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2221	WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2222	WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2223	WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2224
2225	WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2226	WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2227	WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2228	WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2229
2230	WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2231	WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2232	WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2233	WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2234
2235	WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2236	WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2237	WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2238	WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2239
2240	WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2241	WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2242	WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2243	WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2244
2245	WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2246	WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2247	WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2248	WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2249
2250	WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 297 >> 3);
2251	WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 908 >> 3);
2252	WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 19);
2253	WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 19);
2254
2255	WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 318 >> 3);
2256	WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 956 >> 3);
2257	WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 79);
2258	WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 163);
2259
2260	WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2261	WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2262	WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2263	WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2264
2265	WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2266	WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2267	WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2268	WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2269
2270	WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2271	WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2272	WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2273	WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2274
2275	WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2276	WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2277	WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2278	WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2279
2280	WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2281	WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2282	WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2283	WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2284
2285	WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2286	WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2287	WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2288	WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2289
2290	WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 318 >> 3);
2291	WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 956 >> 3);
2292	WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 79);
2293	WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 79);
2294
2295	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2296	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2297	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2298	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2299
2300	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2301	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2302	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2303	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2304
2305	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2306	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2307	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2308	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2309
2310	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2311	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2312	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2313	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2314
2315	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2316	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2317	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2318	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2319
2320	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2321	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2322	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2323	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2324
2325	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2326	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2327	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2328	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2329
2330	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2331	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2332	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2333	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2334
2335	WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_EN,
2336			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2337	WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_EN,
2338			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2339
2340	WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_EN,
2341			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2342	WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_EN,
2343			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2344
2345	WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_EN,
2346			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2347	WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_EN,
2348			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2349
2350	WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_EN,
2351			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2352	WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_EN,
2353			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2354
2355	WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_EN,
2356			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2357	WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_EN,
2358			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2359
2360	WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_EN,
2361			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2362	WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_EN,
2363			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2364
2365	WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_EN,
2366			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2367	WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_EN,
2368			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2369
2370	WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_EN,
2371			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2372	WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_EN,
2373			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2374
2375	WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_EN,
2376			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2377	WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_EN,
2378			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2379
2380	WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_EN,
2381			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2382	WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_EN,
2383			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2384
2385	WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_EN,
2386			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2387	WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_EN,
2388			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2389
2390	WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_EN,
2391			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2392	WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_EN,
2393			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2394
2395	WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_EN,
2396			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2397	WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_EN,
2398			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2399
2400	WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_EN,
2401			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2402	WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_EN,
2403			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2404
2405	WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_EN,
2406			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2407	WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_EN,
2408			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2409
2410	WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_EN,
2411			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2412	WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_EN,
2413			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2414
2415	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_EN,
2416			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2417	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_EN,
2418			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2419
2420	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_EN,
2421			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2422	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_EN,
2423			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2424
2425	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_EN,
2426			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2427	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_EN,
2428			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2429
2430	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_EN,
2431			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2432	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_EN,
2433			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2434
2435	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_EN,
2436			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2437	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_EN,
2438			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2439
2440	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_EN,
2441			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2442	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_EN,
2443			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2444
2445	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_EN,
2446			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2447	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_EN,
2448			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2449
2450	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_EN,
2451			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2452	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_EN,
2453			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2454}
2455
2456static void gaudi_init_hbm_cred(struct hl_device *hdev)
2457{
2458	u32 hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd;
2459
2460	if (hdev->asic_prop.fw_security_enabled)
2461		return;
2462
2463	if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2464						CPU_BOOT_DEV_STS0_HBM_CRED_EN)
2465		return;
2466
2467	hbm0_wr = 0x33333333;
2468	hbm0_rd = 0x77777777;
2469	hbm1_wr = 0x55555555;
2470	hbm1_rd = 0xDDDDDDDD;
2471
2472	WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT, hbm0_wr);
2473	WREG32(mmDMA_IF_E_N_HBM1_WR_CRED_CNT, hbm1_wr);
2474	WREG32(mmDMA_IF_E_N_HBM0_RD_CRED_CNT, hbm0_rd);
2475	WREG32(mmDMA_IF_E_N_HBM1_RD_CRED_CNT, hbm1_rd);
2476
2477	WREG32(mmDMA_IF_E_S_HBM0_WR_CRED_CNT, hbm0_wr);
2478	WREG32(mmDMA_IF_E_S_HBM1_WR_CRED_CNT, hbm1_wr);
2479	WREG32(mmDMA_IF_E_S_HBM0_RD_CRED_CNT, hbm0_rd);
2480	WREG32(mmDMA_IF_E_S_HBM1_RD_CRED_CNT, hbm1_rd);
2481
2482	WREG32(mmDMA_IF_W_N_HBM0_WR_CRED_CNT, hbm0_wr);
2483	WREG32(mmDMA_IF_W_N_HBM1_WR_CRED_CNT, hbm1_wr);
2484	WREG32(mmDMA_IF_W_N_HBM0_RD_CRED_CNT, hbm0_rd);
2485	WREG32(mmDMA_IF_W_N_HBM1_RD_CRED_CNT, hbm1_rd);
2486
2487	WREG32(mmDMA_IF_W_S_HBM0_WR_CRED_CNT, hbm0_wr);
2488	WREG32(mmDMA_IF_W_S_HBM1_WR_CRED_CNT, hbm1_wr);
2489	WREG32(mmDMA_IF_W_S_HBM0_RD_CRED_CNT, hbm0_rd);
2490	WREG32(mmDMA_IF_W_S_HBM1_RD_CRED_CNT, hbm1_rd);
2491
2492	WREG32(mmDMA_IF_E_N_HBM_CRED_EN_0,
2493			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2494			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2495	WREG32(mmDMA_IF_E_S_HBM_CRED_EN_0,
2496			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2497			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2498	WREG32(mmDMA_IF_W_N_HBM_CRED_EN_0,
2499			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2500			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2501	WREG32(mmDMA_IF_W_S_HBM_CRED_EN_0,
2502			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2503			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2504
2505	WREG32(mmDMA_IF_E_N_HBM_CRED_EN_1,
2506			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2507			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2508	WREG32(mmDMA_IF_E_S_HBM_CRED_EN_1,
2509			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2510			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2511	WREG32(mmDMA_IF_W_N_HBM_CRED_EN_1,
2512			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2513			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2514	WREG32(mmDMA_IF_W_S_HBM_CRED_EN_1,
2515			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2516			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2517}
2518
2519static void gaudi_init_golden_registers(struct hl_device *hdev)
2520{
2521	u32 tpc_offset;
2522	int tpc_id, i;
2523
2524	gaudi_init_e2e(hdev);
2525	gaudi_init_hbm_cred(hdev);
2526
2527	for (tpc_id = 0, tpc_offset = 0;
2528				tpc_id < TPC_NUMBER_OF_ENGINES;
2529				tpc_id++, tpc_offset += TPC_CFG_OFFSET) {
2530		/* Mask all arithmetic interrupts from TPC */
2531		WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFE);
2532		/* Set 16 cache lines */
2533		WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset,
2534				ICACHE_FETCH_LINE_NUM, 2);
2535	}
2536
2537	/* Make sure 1st 128 bytes in SRAM are 0 for Tensor DMA */
2538	for (i = 0 ; i < 128 ; i += 8)
2539		writeq(0, hdev->pcie_bar[SRAM_BAR_ID] + i);
2540
2541	WREG32(mmMME0_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2542	WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2543	WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2544	WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2545}
2546
2547static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id,
2548					int qman_id, dma_addr_t qman_pq_addr)
2549{
2550	struct cpu_dyn_regs *dyn_regs =
2551			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2552	u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2553	u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2554	u32 q_off, dma_qm_offset;
2555	u32 dma_qm_err_cfg, irq_handler_offset;
2556
2557	dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2558
2559	mtr_base_en_lo = lower_32_bits(CFG_BASE +
2560				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2561	mtr_base_en_hi = upper_32_bits(CFG_BASE +
2562				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2563	so_base_en_lo = lower_32_bits(CFG_BASE +
2564				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2565	so_base_en_hi = upper_32_bits(CFG_BASE +
2566				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2567	mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2568				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2569	mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2570				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2571	so_base_ws_lo = lower_32_bits(CFG_BASE +
2572				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2573	so_base_ws_hi = upper_32_bits(CFG_BASE +
2574				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2575
2576	q_off = dma_qm_offset + qman_id * 4;
2577
2578	WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_pq_addr));
2579	WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_pq_addr));
2580
2581	WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HL_QUEUE_LENGTH));
2582	WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2583	WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2584
2585	WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, QMAN_LDMA_SIZE_OFFSET);
2586	WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2587							QMAN_LDMA_SRC_OFFSET);
2588	WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2589							QMAN_LDMA_DST_OFFSET);
2590
2591	WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2592	WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2593	WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2594	WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2595	WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
2596	WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
2597	WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
2598	WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
2599
2600	WREG32(mmDMA0_QM_CP_BARRIER_CFG_0 + q_off, 0x100);
2601
2602	/* The following configuration is needed only once per QMAN */
2603	if (qman_id == 0) {
2604		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2605				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2606				le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2607
2608		/* Configure RAZWI IRQ */
2609		dma_qm_err_cfg = PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2610		if (hdev->stop_on_err)
2611			dma_qm_err_cfg |=
2612				PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2613
2614		WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2615
2616		WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2617			lower_32_bits(CFG_BASE + irq_handler_offset));
2618		WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2619			upper_32_bits(CFG_BASE + irq_handler_offset));
2620
2621		WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2622			gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2623									dma_id);
2624
2625		WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2626				QM_ARB_ERR_MSG_EN_MASK);
2627
2628		/* Set timeout to maximum */
2629		WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset, GAUDI_ARB_WDT_TIMEOUT);
2630
2631		WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2632				QMAN_EXTERNAL_MAKE_TRUSTED);
2633
2634		WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2635	}
2636}
2637
2638static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id)
2639{
2640	struct cpu_dyn_regs *dyn_regs =
2641			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2642	u32 dma_err_cfg = 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT;
2643	u32 dma_offset = dma_id * DMA_CORE_OFFSET;
2644	u32 irq_handler_offset;
2645
2646	/* Set to maximum possible according to physical size */
2647	WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND + dma_offset, 0);
2648	WREG32(mmDMA0_CORE_RD_MAX_SIZE + dma_offset, 0);
2649
2650	/* WA for H/W bug H3-2116 */
2651	WREG32(mmDMA0_CORE_LBW_MAX_OUTSTAND + dma_offset, 15);
2652
2653	/* STOP_ON bit implies no completion to operation in case of RAZWI */
2654	if (hdev->stop_on_err)
2655		dma_err_cfg |= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT;
2656
2657	WREG32(mmDMA0_CORE_ERR_CFG + dma_offset, dma_err_cfg);
2658
2659	irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2660			mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2661			le32_to_cpu(dyn_regs->gic_dma_core_irq_ctrl);
2662
2663	WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO + dma_offset,
2664		lower_32_bits(CFG_BASE + irq_handler_offset));
2665	WREG32(mmDMA0_CORE_ERRMSG_ADDR_HI + dma_offset,
2666		upper_32_bits(CFG_BASE + irq_handler_offset));
2667
2668	WREG32(mmDMA0_CORE_ERRMSG_WDATA + dma_offset,
2669		gaudi_irq_map_table[GAUDI_EVENT_DMA0_CORE].cpu_id + dma_id);
2670	WREG32(mmDMA0_CORE_PROT + dma_offset,
2671			1 << DMA0_CORE_PROT_ERR_VAL_SHIFT);
2672	/* If the channel is secured, it should be in MMU bypass mode */
2673	WREG32(mmDMA0_CORE_SECURE_PROPS + dma_offset,
2674			1 << DMA0_CORE_SECURE_PROPS_MMBP_SHIFT);
2675	WREG32(mmDMA0_CORE_CFG_0 + dma_offset, 1 << DMA0_CORE_CFG_0_EN_SHIFT);
2676}
2677
2678static void gaudi_enable_qman(struct hl_device *hdev, int dma_id,
2679				u32 enable_mask)
2680{
2681	u32 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2682
2683	WREG32(mmDMA0_QM_GLBL_CFG0 + dma_qm_offset, enable_mask);
2684}
2685
2686static void gaudi_init_pci_dma_qmans(struct hl_device *hdev)
2687{
2688	struct gaudi_device *gaudi = hdev->asic_specific;
2689	struct hl_hw_queue *q;
2690	int i, j, dma_id, cpu_skip, nic_skip, cq_id = 0, q_idx, msi_vec = 0;
2691
2692	if (gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)
2693		return;
2694
2695	for (i = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) {
2696		dma_id = gaudi_dma_assignment[i];
2697		/*
2698		 * For queues after the CPU Q need to add 1 to get the correct
2699		 * queue. In addition, need to add the CPU EQ and NIC IRQs in
2700		 * order to get the correct MSI register.
2701		 */
2702		if (dma_id > 1) {
2703			cpu_skip = 1;
2704			nic_skip = NIC_NUMBER_OF_ENGINES;
2705		} else {
2706			cpu_skip = 0;
2707			nic_skip = 0;
2708		}
2709
2710		for (j = 0 ; j < QMAN_STREAMS ; j++) {
2711			q_idx = 4 * dma_id + j + cpu_skip;
2712			q = &hdev->kernel_queues[q_idx];
2713			q->cq_id = cq_id++;
2714			q->msi_vec = nic_skip + cpu_skip + msi_vec++;
2715			gaudi_init_pci_dma_qman(hdev, dma_id, j,
2716						q->bus_address);
2717		}
2718
2719		gaudi_init_dma_core(hdev, dma_id);
2720
2721		gaudi_enable_qman(hdev, dma_id, PCI_DMA_QMAN_ENABLE);
2722	}
2723
2724	gaudi->hw_cap_initialized |= HW_CAP_PCI_DMA;
2725}
2726
2727static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id,
2728					int qman_id, u64 qman_base_addr)
2729{
2730	struct cpu_dyn_regs *dyn_regs =
2731			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2732	u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2733	u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2734	u32 dma_qm_err_cfg, irq_handler_offset;
2735	u32 q_off, dma_qm_offset;
2736
2737	dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2738
2739	mtr_base_en_lo = lower_32_bits(CFG_BASE +
2740			mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2741	mtr_base_en_hi = upper_32_bits(CFG_BASE +
2742				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2743	so_base_en_lo = lower_32_bits(CFG_BASE +
2744				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2745	so_base_en_hi = upper_32_bits(CFG_BASE +
2746				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2747	mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2748				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2749	mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2750				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2751	so_base_ws_lo = lower_32_bits(CFG_BASE +
2752				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2753	so_base_ws_hi = upper_32_bits(CFG_BASE +
2754				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2755
2756	q_off = dma_qm_offset + qman_id * 4;
2757
2758	if (qman_id < 4) {
2759		WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off,
2760					lower_32_bits(qman_base_addr));
2761		WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off,
2762					upper_32_bits(qman_base_addr));
2763
2764		WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HBM_DMA_QMAN_LENGTH));
2765		WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2766		WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2767
2768		WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2769							QMAN_CPDMA_SIZE_OFFSET);
2770		WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2771							QMAN_CPDMA_SRC_OFFSET);
2772		WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2773							QMAN_CPDMA_DST_OFFSET);
2774	} else {
2775		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2776				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2777				le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2778
2779		WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2780							QMAN_LDMA_SIZE_OFFSET);
2781		WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2782							QMAN_LDMA_SRC_OFFSET);
2783		WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2784							QMAN_LDMA_DST_OFFSET);
2785
2786		/* Configure RAZWI IRQ */
2787		dma_qm_err_cfg = HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2788		if (hdev->stop_on_err)
2789			dma_qm_err_cfg |=
2790				HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2791
2792		WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2793
2794		WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2795			lower_32_bits(CFG_BASE + irq_handler_offset));
2796		WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2797			upper_32_bits(CFG_BASE + irq_handler_offset));
2798
2799		WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2800			gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2801									dma_id);
2802
2803		WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2804				QM_ARB_ERR_MSG_EN_MASK);
2805
2806		/* Set timeout to maximum */
2807		WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset, GAUDI_ARB_WDT_TIMEOUT);
2808
2809		WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2810		WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2811				QMAN_INTERNAL_MAKE_TRUSTED);
2812	}
2813
2814	WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2815	WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2816	WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2817	WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2818
2819	/* Configure DMA5 CP_MSG_BASE 2/3 for sync stream collective */
2820	if (gaudi_dma_assignment[dma_id] == GAUDI_ENGINE_ID_DMA_5) {
2821		WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
2822				mtr_base_ws_lo);
2823		WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
2824				mtr_base_ws_hi);
2825		WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
2826				so_base_ws_lo);
2827		WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
2828				so_base_ws_hi);
2829	}
2830}
2831
2832static void gaudi_init_hbm_dma_qmans(struct hl_device *hdev)
2833{
2834	struct gaudi_device *gaudi = hdev->asic_specific;
2835	struct gaudi_internal_qman_info *q;
2836	u64 qman_base_addr;
2837	int i, j, dma_id, internal_q_index;
2838
2839	if (gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)
2840		return;
2841
2842	for (i = 0 ; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) {
2843		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1 + i];
2844
2845		for (j = 0 ; j < QMAN_STREAMS ; j++) {
2846			 /*
2847			  * Add the CPU queue in order to get the correct queue
2848			  * number as all internal queue are placed after it
2849			  */
2850			internal_q_index = dma_id * QMAN_STREAMS + j + 1;
2851
2852			q = &gaudi->internal_qmans[internal_q_index];
2853			qman_base_addr = (u64) q->pq_dma_addr;
2854			gaudi_init_hbm_dma_qman(hdev, dma_id, j,
2855						qman_base_addr);
2856		}
2857
2858		/* Initializing lower CP for HBM DMA QMAN */
2859		gaudi_init_hbm_dma_qman(hdev, dma_id, 4, 0);
2860
2861		gaudi_init_dma_core(hdev, dma_id);
2862
2863		gaudi_enable_qman(hdev, dma_id, HBM_DMA_QMAN_ENABLE);
2864	}
2865
2866	gaudi->hw_cap_initialized |= HW_CAP_HBM_DMA;
2867}
2868
2869static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset,
2870					int qman_id, u64 qman_base_addr)
2871{
2872	struct cpu_dyn_regs *dyn_regs =
2873			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2874	u32 mtr_base_lo, mtr_base_hi;
2875	u32 so_base_lo, so_base_hi;
2876	u32 irq_handler_offset;
2877	u32 q_off, mme_id;
2878	u32 mme_qm_err_cfg;
2879
2880	mtr_base_lo = lower_32_bits(CFG_BASE +
2881				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2882	mtr_base_hi = upper_32_bits(CFG_BASE +
2883				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2884	so_base_lo = lower_32_bits(CFG_BASE +
2885				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2886	so_base_hi = upper_32_bits(CFG_BASE +
2887				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2888
2889	q_off = mme_offset + qman_id * 4;
2890
2891	if (qman_id < 4) {
2892		WREG32(mmMME0_QM_PQ_BASE_LO_0 + q_off,
2893					lower_32_bits(qman_base_addr));
2894		WREG32(mmMME0_QM_PQ_BASE_HI_0 + q_off,
2895					upper_32_bits(qman_base_addr));
2896
2897		WREG32(mmMME0_QM_PQ_SIZE_0 + q_off, ilog2(MME_QMAN_LENGTH));
2898		WREG32(mmMME0_QM_PQ_PI_0 + q_off, 0);
2899		WREG32(mmMME0_QM_PQ_CI_0 + q_off, 0);
2900
2901		WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2902							QMAN_CPDMA_SIZE_OFFSET);
2903		WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2904							QMAN_CPDMA_SRC_OFFSET);
2905		WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2906							QMAN_CPDMA_DST_OFFSET);
2907	} else {
2908		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2909				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2910				le32_to_cpu(dyn_regs->gic_mme_qm_irq_ctrl);
2911
2912		WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2913							QMAN_LDMA_SIZE_OFFSET);
2914		WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2915							QMAN_LDMA_SRC_OFFSET);
2916		WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2917							QMAN_LDMA_DST_OFFSET);
2918
2919		/* Configure RAZWI IRQ */
2920		mme_id = mme_offset /
2921				(mmMME1_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0) / 2;
2922
2923		mme_qm_err_cfg = MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2924		if (hdev->stop_on_err)
2925			mme_qm_err_cfg |=
2926				MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2927
2928		WREG32(mmMME0_QM_GLBL_ERR_CFG + mme_offset, mme_qm_err_cfg);
2929
2930		WREG32(mmMME0_QM_GLBL_ERR_ADDR_LO + mme_offset,
2931			lower_32_bits(CFG_BASE + irq_handler_offset));
2932		WREG32(mmMME0_QM_GLBL_ERR_ADDR_HI + mme_offset,
2933			upper_32_bits(CFG_BASE + irq_handler_offset));
2934
2935		WREG32(mmMME0_QM_GLBL_ERR_WDATA + mme_offset,
2936			gaudi_irq_map_table[GAUDI_EVENT_MME0_QM].cpu_id +
2937									mme_id);
2938
2939		WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset,
2940				QM_ARB_ERR_MSG_EN_MASK);
2941
2942		/* Set timeout to maximum */
2943		WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset, GAUDI_ARB_WDT_TIMEOUT);
2944
2945		WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0);
2946		WREG32(mmMME0_QM_GLBL_PROT + mme_offset,
2947				QMAN_INTERNAL_MAKE_TRUSTED);
2948	}
2949
2950	WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
2951	WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
2952	WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
2953	WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
2954}
2955
2956static void gaudi_init_mme_qmans(struct hl_device *hdev)
2957{
2958	struct gaudi_device *gaudi = hdev->asic_specific;
2959	struct gaudi_internal_qman_info *q;
2960	u64 qman_base_addr;
2961	u32 mme_offset;
2962	int i, internal_q_index;
2963
2964	if (gaudi->hw_cap_initialized & HW_CAP_MME)
2965		return;
2966
2967	/*
2968	 * map GAUDI_QUEUE_ID_MME_0_X to the N_W_MME (mmMME2_QM_BASE)
2969	 * and GAUDI_QUEUE_ID_MME_1_X to the S_W_MME (mmMME0_QM_BASE)
2970	 */
2971
2972	mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
2973
2974	for (i = 0 ; i < MME_NUMBER_OF_QMANS ; i++) {
2975		internal_q_index = GAUDI_QUEUE_ID_MME_0_0 + i;
2976		q = &gaudi->internal_qmans[internal_q_index];
2977		qman_base_addr = (u64) q->pq_dma_addr;
2978		gaudi_init_mme_qman(hdev, mme_offset, (i & 0x3),
2979					qman_base_addr);
2980		if (i == 3)
2981			mme_offset = 0;
2982	}
2983
2984	/* Initializing lower CP for MME QMANs */
2985	mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
2986	gaudi_init_mme_qman(hdev, mme_offset, 4, 0);
2987	gaudi_init_mme_qman(hdev, 0, 4, 0);
2988
2989	WREG32(mmMME2_QM_GLBL_CFG0, QMAN_MME_ENABLE);
2990	WREG32(mmMME0_QM_GLBL_CFG0, QMAN_MME_ENABLE);
2991
2992	gaudi->hw_cap_initialized |= HW_CAP_MME;
2993}
2994
2995static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset,
2996				int qman_id, u64 qman_base_addr)
2997{
2998	struct cpu_dyn_regs *dyn_regs =
2999			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3000	u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3001	u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3002	u32 tpc_qm_err_cfg, irq_handler_offset;
3003	u32 q_off, tpc_id;
3004
3005	mtr_base_en_lo = lower_32_bits(CFG_BASE +
3006			mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3007	mtr_base_en_hi = upper_32_bits(CFG_BASE +
3008				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3009	so_base_en_lo = lower_32_bits(CFG_BASE +
3010				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3011	so_base_en_hi = upper_32_bits(CFG_BASE +
3012				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3013	mtr_base_ws_lo = lower_32_bits(CFG_BASE +
3014				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3015	mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3016				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3017	so_base_ws_lo = lower_32_bits(CFG_BASE +
3018				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3019	so_base_ws_hi = upper_32_bits(CFG_BASE +
3020				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3021
3022	q_off = tpc_offset + qman_id * 4;
3023
3024	tpc_id = tpc_offset /
3025			(mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0);
3026
3027	if (qman_id < 4) {
3028		WREG32(mmTPC0_QM_PQ_BASE_LO_0 + q_off,
3029					lower_32_bits(qman_base_addr));
3030		WREG32(mmTPC0_QM_PQ_BASE_HI_0 + q_off,
3031					upper_32_bits(qman_base_addr));
3032
3033		WREG32(mmTPC0_QM_PQ_SIZE_0 + q_off, ilog2(TPC_QMAN_LENGTH));
3034		WREG32(mmTPC0_QM_PQ_PI_0 + q_off, 0);
3035		WREG32(mmTPC0_QM_PQ_CI_0 + q_off, 0);
3036
3037		WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3038							QMAN_CPDMA_SIZE_OFFSET);
3039		WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3040							QMAN_CPDMA_SRC_OFFSET);
3041		WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3042							QMAN_CPDMA_DST_OFFSET);
3043	} else {
3044		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3045				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3046				le32_to_cpu(dyn_regs->gic_tpc_qm_irq_ctrl);
3047
3048		WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3049							QMAN_LDMA_SIZE_OFFSET);
3050		WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3051							QMAN_LDMA_SRC_OFFSET);
3052		WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3053							QMAN_LDMA_DST_OFFSET);
3054
3055		/* Configure RAZWI IRQ */
3056		tpc_qm_err_cfg = TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3057		if (hdev->stop_on_err)
3058			tpc_qm_err_cfg |=
3059				TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3060
3061		WREG32(mmTPC0_QM_GLBL_ERR_CFG + tpc_offset, tpc_qm_err_cfg);
3062
3063		WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + tpc_offset,
3064			lower_32_bits(CFG_BASE + irq_handler_offset));
3065		WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + tpc_offset,
3066			upper_32_bits(CFG_BASE + irq_handler_offset));
3067
3068		WREG32(mmTPC0_QM_GLBL_ERR_WDATA + tpc_offset,
3069			gaudi_irq_map_table[GAUDI_EVENT_TPC0_QM].cpu_id +
3070									tpc_id);
3071
3072		WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset,
3073				QM_ARB_ERR_MSG_EN_MASK);
3074
3075		/* Set timeout to maximum */
3076		WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset, GAUDI_ARB_WDT_TIMEOUT);
3077
3078		WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0);
3079		WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset,
3080				QMAN_INTERNAL_MAKE_TRUSTED);
3081	}
3082
3083	WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3084	WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3085	WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3086	WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3087
3088	/* Configure TPC7 CP_MSG_BASE 2/3 for sync stream collective */
3089	if (tpc_id == 6) {
3090		WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
3091				mtr_base_ws_lo);
3092		WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
3093				mtr_base_ws_hi);
3094		WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
3095				so_base_ws_lo);
3096		WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
3097				so_base_ws_hi);
3098	}
3099}
3100
3101static void gaudi_init_tpc_qmans(struct hl_device *hdev)
3102{
3103	struct gaudi_device *gaudi = hdev->asic_specific;
3104	struct gaudi_internal_qman_info *q;
3105	u64 qman_base_addr;
3106	u32 so_base_hi, tpc_offset = 0;
3107	u32 tpc_delta = mmTPC1_CFG_SM_BASE_ADDRESS_HIGH -
3108			mmTPC0_CFG_SM_BASE_ADDRESS_HIGH;
3109	int i, tpc_id, internal_q_index;
3110
3111	if (gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)
3112		return;
3113
3114	so_base_hi = upper_32_bits(CFG_BASE +
3115				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3116
3117	for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3118		for (i = 0 ; i < QMAN_STREAMS ; i++) {
3119			internal_q_index = GAUDI_QUEUE_ID_TPC_0_0 +
3120						tpc_id * QMAN_STREAMS + i;
3121			q = &gaudi->internal_qmans[internal_q_index];
3122			qman_base_addr = (u64) q->pq_dma_addr;
3123			gaudi_init_tpc_qman(hdev, tpc_offset, i,
3124						qman_base_addr);
3125
3126			if (i == 3) {
3127				/* Initializing lower CP for TPC QMAN */
3128				gaudi_init_tpc_qman(hdev, tpc_offset, 4, 0);
3129
3130				/* Enable the QMAN and TPC channel */
3131				WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset,
3132						QMAN_TPC_ENABLE);
3133			}
3134		}
3135
3136		WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + tpc_id * tpc_delta,
3137				so_base_hi);
3138
3139		tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3140
3141		gaudi->hw_cap_initialized |=
3142				FIELD_PREP(HW_CAP_TPC_MASK, 1 << tpc_id);
3143	}
3144}
3145
3146static void gaudi_init_nic_qman(struct hl_device *hdev, u32 nic_offset,
3147				int qman_id, u64 qman_base_addr, int nic_id)
3148{
3149	struct cpu_dyn_regs *dyn_regs =
3150			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3151	u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3152	u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3153	u32 nic_qm_err_cfg, irq_handler_offset;
3154	u32 q_off;
3155
3156	mtr_base_en_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3157			mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3158	mtr_base_en_hi = upper_32_bits(CFG_BASE +
3159				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3160	so_base_en_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3161				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3162	so_base_en_hi = upper_32_bits(CFG_BASE +
3163				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3164	mtr_base_ws_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3165				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3166	mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3167				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3168	so_base_ws_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3169				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3170	so_base_ws_hi = upper_32_bits(CFG_BASE +
3171				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3172
3173	q_off = nic_offset + qman_id * 4;
3174
3175	WREG32(mmNIC0_QM0_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_base_addr));
3176	WREG32(mmNIC0_QM0_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_base_addr));
3177
3178	WREG32(mmNIC0_QM0_PQ_SIZE_0 + q_off, ilog2(NIC_QMAN_LENGTH));
3179	WREG32(mmNIC0_QM0_PQ_PI_0 + q_off, 0);
3180	WREG32(mmNIC0_QM0_PQ_CI_0 + q_off, 0);
3181
3182	WREG32(mmNIC0_QM0_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3183							QMAN_LDMA_SIZE_OFFSET);
3184	WREG32(mmNIC0_QM0_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3185							QMAN_LDMA_SRC_OFFSET);
3186	WREG32(mmNIC0_QM0_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3187							QMAN_LDMA_DST_OFFSET);
3188
3189	WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3190	WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3191	WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3192	WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3193
3194	/* Configure NIC CP_MSG_BASE 2/3 for sync stream collective */
3195	WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
3196	WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
3197	WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
3198	WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
3199
3200	if (qman_id == 0) {
3201		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3202				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3203				le32_to_cpu(dyn_regs->gic_nic_qm_irq_ctrl);
3204
3205		/* Configure RAZWI IRQ */
3206		nic_qm_err_cfg = NIC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3207		if (hdev->stop_on_err)
3208			nic_qm_err_cfg |=
3209				NIC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3210
3211		WREG32(mmNIC0_QM0_GLBL_ERR_CFG + nic_offset, nic_qm_err_cfg);
3212
3213		WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_LO + nic_offset,
3214			lower_32_bits(CFG_BASE + irq_handler_offset));
3215		WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_HI + nic_offset,
3216			upper_32_bits(CFG_BASE + irq_handler_offset));
3217
3218		WREG32(mmNIC0_QM0_GLBL_ERR_WDATA + nic_offset,
3219			gaudi_irq_map_table[GAUDI_EVENT_NIC0_QM0].cpu_id +
3220									nic_id);
3221
3222		WREG32(mmNIC0_QM0_ARB_ERR_MSG_EN + nic_offset,
3223				QM_ARB_ERR_MSG_EN_MASK);
3224
3225		/* Set timeout to maximum */
3226		WREG32(mmNIC0_QM0_ARB_SLV_CHOISE_WDT + nic_offset, GAUDI_ARB_WDT_TIMEOUT);
3227
3228		WREG32(mmNIC0_QM0_GLBL_CFG1 + nic_offset, 0);
3229		WREG32(mmNIC0_QM0_GLBL_PROT + nic_offset,
3230				QMAN_INTERNAL_MAKE_TRUSTED);
3231	}
3232}
3233
3234static void gaudi_init_nic_qmans(struct hl_device *hdev)
3235{
3236	struct gaudi_device *gaudi = hdev->asic_specific;
3237	struct gaudi_internal_qman_info *q;
3238	u64 qman_base_addr;
3239	u32 nic_offset = 0;
3240	u32 nic_delta_between_qmans =
3241			mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3242	u32 nic_delta_between_nics =
3243			mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3244	int i, nic_id, internal_q_index;
3245
3246	if (!hdev->nic_ports_mask)
3247		return;
3248
3249	if (gaudi->hw_cap_initialized & HW_CAP_NIC_MASK)
3250		return;
3251
3252	dev_dbg(hdev->dev, "Initializing NIC QMANs\n");
3253
3254	for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3255		if (!(hdev->nic_ports_mask & (1 << nic_id))) {
3256			nic_offset += nic_delta_between_qmans;
3257			if (nic_id & 1) {
3258				nic_offset -= (nic_delta_between_qmans * 2);
3259				nic_offset += nic_delta_between_nics;
3260			}
3261			continue;
3262		}
3263
3264		for (i = 0 ; i < QMAN_STREAMS ; i++) {
3265			internal_q_index = GAUDI_QUEUE_ID_NIC_0_0 +
3266						nic_id * QMAN_STREAMS + i;
3267			q = &gaudi->internal_qmans[internal_q_index];
3268			qman_base_addr = (u64) q->pq_dma_addr;
3269			gaudi_init_nic_qman(hdev, nic_offset, (i & 0x3),
3270						qman_base_addr, nic_id);
3271		}
3272
3273		/* Enable the QMAN */
3274		WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, NIC_QMAN_ENABLE);
3275
3276		nic_offset += nic_delta_between_qmans;
3277		if (nic_id & 1) {
3278			nic_offset -= (nic_delta_between_qmans * 2);
3279			nic_offset += nic_delta_between_nics;
3280		}
3281
3282		gaudi->hw_cap_initialized |= 1 << (HW_CAP_NIC_SHIFT + nic_id);
3283	}
3284}
3285
3286static void gaudi_disable_pci_dma_qmans(struct hl_device *hdev)
3287{
3288	struct gaudi_device *gaudi = hdev->asic_specific;
3289
3290	if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3291		return;
3292
3293	WREG32(mmDMA0_QM_GLBL_CFG0, 0);
3294	WREG32(mmDMA1_QM_GLBL_CFG0, 0);
3295	WREG32(mmDMA5_QM_GLBL_CFG0, 0);
3296}
3297
3298static void gaudi_disable_hbm_dma_qmans(struct hl_device *hdev)
3299{
3300	struct gaudi_device *gaudi = hdev->asic_specific;
3301
3302	if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3303		return;
3304
3305	WREG32(mmDMA2_QM_GLBL_CFG0, 0);
3306	WREG32(mmDMA3_QM_GLBL_CFG0, 0);
3307	WREG32(mmDMA4_QM_GLBL_CFG0, 0);
3308	WREG32(mmDMA6_QM_GLBL_CFG0, 0);
3309	WREG32(mmDMA7_QM_GLBL_CFG0, 0);
3310}
3311
3312static void gaudi_disable_mme_qmans(struct hl_device *hdev)
3313{
3314	struct gaudi_device *gaudi = hdev->asic_specific;
3315
3316	if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3317		return;
3318
3319	WREG32(mmMME2_QM_GLBL_CFG0, 0);
3320	WREG32(mmMME0_QM_GLBL_CFG0, 0);
3321}
3322
3323static void gaudi_disable_tpc_qmans(struct hl_device *hdev)
3324{
3325	struct gaudi_device *gaudi = hdev->asic_specific;
3326	u32 tpc_offset = 0;
3327	int tpc_id;
3328
3329	if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3330		return;
3331
3332	for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3333		WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 0);
3334		tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3335	}
3336}
3337
3338static void gaudi_disable_nic_qmans(struct hl_device *hdev)
3339{
3340	struct gaudi_device *gaudi = hdev->asic_specific;
3341	u32 nic_mask, nic_offset = 0;
3342	u32 nic_delta_between_qmans =
3343			mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3344	u32 nic_delta_between_nics =
3345			mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3346	int nic_id;
3347
3348	for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3349		nic_mask = 1 << (HW_CAP_NIC_SHIFT + nic_id);
3350
3351		if (gaudi->hw_cap_initialized & nic_mask)
3352			WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, 0);
3353
3354		nic_offset += nic_delta_between_qmans;
3355		if (nic_id & 1) {
3356			nic_offset -= (nic_delta_between_qmans * 2);
3357			nic_offset += nic_delta_between_nics;
3358		}
3359	}
3360}
3361
3362static void gaudi_stop_pci_dma_qmans(struct hl_device *hdev)
3363{
3364	struct gaudi_device *gaudi = hdev->asic_specific;
3365
3366	if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3367		return;
3368
3369	/* Stop upper CPs of QMANs 0.0 to 1.3 and 5.0 to 5.3 */
3370	WREG32(mmDMA0_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3371	WREG32(mmDMA1_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3372	WREG32(mmDMA5_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3373}
3374
3375static void gaudi_stop_hbm_dma_qmans(struct hl_device *hdev)
3376{
3377	struct gaudi_device *gaudi = hdev->asic_specific;
3378
3379	if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3380		return;
3381
3382	/* Stop CPs of HBM DMA QMANs */
3383
3384	WREG32(mmDMA2_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3385	WREG32(mmDMA3_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3386	WREG32(mmDMA4_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3387	WREG32(mmDMA6_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3388	WREG32(mmDMA7_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3389}
3390
3391static void gaudi_stop_mme_qmans(struct hl_device *hdev)
3392{
3393	struct gaudi_device *gaudi = hdev->asic_specific;
3394
3395	if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3396		return;
3397
3398	/* Stop CPs of MME QMANs */
3399	WREG32(mmMME2_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3400	WREG32(mmMME0_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3401}
3402
3403static void gaudi_stop_tpc_qmans(struct hl_device *hdev)
3404{
3405	struct gaudi_device *gaudi = hdev->asic_specific;
3406
3407	if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3408		return;
3409
3410	WREG32(mmTPC0_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3411	WREG32(mmTPC1_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3412	WREG32(mmTPC2_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3413	WREG32(mmTPC3_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3414	WREG32(mmTPC4_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3415	WREG32(mmTPC5_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3416	WREG32(mmTPC6_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3417	WREG32(mmTPC7_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3418}
3419
3420static void gaudi_stop_nic_qmans(struct hl_device *hdev)
3421{
3422	struct gaudi_device *gaudi = hdev->asic_specific;
3423
3424	/* Stop upper CPs of QMANs */
3425
3426	if (gaudi->hw_cap_initialized & HW_CAP_NIC0)
3427		WREG32(mmNIC0_QM0_GLBL_CFG1,
3428				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3429				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3430				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3431
3432	if (gaudi->hw_cap_initialized & HW_CAP_NIC1)
3433		WREG32(mmNIC0_QM1_GLBL_CFG1,
3434				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3435				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3436				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3437
3438	if (gaudi->hw_cap_initialized & HW_CAP_NIC2)
3439		WREG32(mmNIC1_QM0_GLBL_CFG1,
3440				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3441				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3442				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3443
3444	if (gaudi->hw_cap_initialized & HW_CAP_NIC3)
3445		WREG32(mmNIC1_QM1_GLBL_CFG1,
3446				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3447				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3448				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3449
3450	if (gaudi->hw_cap_initialized & HW_CAP_NIC4)
3451		WREG32(mmNIC2_QM0_GLBL_CFG1,
3452				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3453				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3454				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3455
3456	if (gaudi->hw_cap_initialized & HW_CAP_NIC5)
3457		WREG32(mmNIC2_QM1_GLBL_CFG1,
3458				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3459				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3460				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3461
3462	if (gaudi->hw_cap_initialized & HW_CAP_NIC6)
3463		WREG32(mmNIC3_QM0_GLBL_CFG1,
3464				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3465				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3466				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3467
3468	if (gaudi->hw_cap_initialized & HW_CAP_NIC7)
3469		WREG32(mmNIC3_QM1_GLBL_CFG1,
3470				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3471				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3472				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3473
3474	if (gaudi->hw_cap_initialized & HW_CAP_NIC8)
3475		WREG32(mmNIC4_QM0_GLBL_CFG1,
3476				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3477				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3478				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3479
3480	if (gaudi->hw_cap_initialized & HW_CAP_NIC9)
3481		WREG32(mmNIC4_QM1_GLBL_CFG1,
3482				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3483				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3484				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3485}
3486
3487static void gaudi_pci_dma_stall(struct hl_device *hdev)
3488{
3489	struct gaudi_device *gaudi = hdev->asic_specific;
3490
3491	if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3492		return;
3493
3494	WREG32(mmDMA0_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3495	WREG32(mmDMA1_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3496	WREG32(mmDMA5_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3497}
3498
3499static void gaudi_hbm_dma_stall(struct hl_device *hdev)
3500{
3501	struct gaudi_device *gaudi = hdev->asic_specific;
3502
3503	if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3504		return;
3505
3506	WREG32(mmDMA2_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3507	WREG32(mmDMA3_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3508	WREG32(mmDMA4_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3509	WREG32(mmDMA6_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3510	WREG32(mmDMA7_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3511}
3512
3513static void gaudi_mme_stall(struct hl_device *hdev)
3514{
3515	struct gaudi_device *gaudi = hdev->asic_specific;
3516
3517	if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3518		return;
3519
3520	/* WA for H3-1800 bug: do ACC and SBAB writes twice */
3521	WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3522	WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3523	WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3524	WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3525	WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3526	WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3527	WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3528	WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3529	WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3530	WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3531	WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3532	WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3533	WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3534	WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3535	WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3536	WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3537}
3538
3539static void gaudi_tpc_stall(struct hl_device *hdev)
3540{
3541	struct gaudi_device *gaudi = hdev->asic_specific;
3542
3543	if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3544		return;
3545
3546	WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3547	WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3548	WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3549	WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3550	WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3551	WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3552	WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3553	WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3554}
3555
3556static void gaudi_disable_clock_gating(struct hl_device *hdev)
3557{
3558	u32 qman_offset;
3559	int i;
3560
3561	if (hdev->asic_prop.fw_security_enabled)
3562		return;
3563
3564	for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
3565		WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 0);
3566		WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 0);
3567
3568		qman_offset += (mmDMA1_QM_CGM_CFG - mmDMA0_QM_CGM_CFG);
3569	}
3570
3571	WREG32(mmMME0_QM_CGM_CFG, 0);
3572	WREG32(mmMME0_QM_CGM_CFG1, 0);
3573	WREG32(mmMME2_QM_CGM_CFG, 0);
3574	WREG32(mmMME2_QM_CGM_CFG1, 0);
3575
3576	for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
3577		WREG32(mmTPC0_QM_CGM_CFG + qman_offset, 0);
3578		WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, 0);
3579
3580		qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG);
3581	}
3582}
3583
3584static void gaudi_enable_timestamp(struct hl_device *hdev)
3585{
3586	/* Disable the timestamp counter */
3587	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3588
3589	/* Zero the lower/upper parts of the 64-bit counter */
3590	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
3591	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
3592
3593	/* Enable the counter */
3594	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
3595}
3596
3597static void gaudi_disable_timestamp(struct hl_device *hdev)
3598{
3599	/* Disable the timestamp counter */
3600	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3601}
3602
3603static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset)
3604{
3605	u32 wait_timeout_ms;
3606
3607	if (hdev->pldm)
3608		wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
3609	else
3610		wait_timeout_ms = GAUDI_RESET_WAIT_MSEC;
3611
3612	if (fw_reset)
3613		goto skip_engines;
3614
3615	gaudi_stop_nic_qmans(hdev);
3616	gaudi_stop_mme_qmans(hdev);
3617	gaudi_stop_tpc_qmans(hdev);
3618	gaudi_stop_hbm_dma_qmans(hdev);
3619	gaudi_stop_pci_dma_qmans(hdev);
3620
3621	msleep(wait_timeout_ms);
3622
3623	gaudi_pci_dma_stall(hdev);
3624	gaudi_hbm_dma_stall(hdev);
3625	gaudi_tpc_stall(hdev);
3626	gaudi_mme_stall(hdev);
3627
3628	msleep(wait_timeout_ms);
3629
3630	gaudi_disable_nic_qmans(hdev);
3631	gaudi_disable_mme_qmans(hdev);
3632	gaudi_disable_tpc_qmans(hdev);
3633	gaudi_disable_hbm_dma_qmans(hdev);
3634	gaudi_disable_pci_dma_qmans(hdev);
3635
3636	gaudi_disable_timestamp(hdev);
3637
3638skip_engines:
3639	gaudi_disable_msi(hdev);
3640}
3641
3642static int gaudi_mmu_init(struct hl_device *hdev)
3643{
3644	struct asic_fixed_properties *prop = &hdev->asic_prop;
3645	struct gaudi_device *gaudi = hdev->asic_specific;
3646	u64 hop0_addr;
3647	int rc, i;
3648
3649	if (gaudi->hw_cap_initialized & HW_CAP_MMU)
3650		return 0;
3651
3652	for (i = 0 ; i < prop->max_asid ; i++) {
3653		hop0_addr = prop->mmu_pgt_addr +
3654				(i * prop->dmmu.hop_table_size);
3655
3656		rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
3657		if (rc) {
3658			dev_err(hdev->dev,
3659				"failed to set hop0 addr for asid %d\n", i);
3660			return rc;
3661		}
3662	}
3663
3664	/* init MMU cache manage page */
3665	WREG32(mmSTLB_CACHE_INV_BASE_39_8, prop->mmu_cache_mng_addr >> 8);
3666	WREG32(mmSTLB_CACHE_INV_BASE_49_40, prop->mmu_cache_mng_addr >> 40);
3667
3668	/* mem cache invalidation */
3669	WREG32(mmSTLB_MEM_CACHE_INVALIDATION, 1);
3670
3671	rc = hl_mmu_invalidate_cache(hdev, true, 0);
3672	if (rc)
3673		return rc;
3674
3675	WREG32(mmMMU_UP_MMU_ENABLE, 1);
3676	WREG32(mmMMU_UP_SPI_MASK, 0xF);
3677
3678	WREG32(mmSTLB_HOP_CONFIGURATION, 0x30440);
3679
3680	/*
3681	 * The H/W expects the first PI after init to be 1. After wraparound
3682	 * we'll write 0.
3683	 */
3684	gaudi->mmu_cache_inv_pi = 1;
3685
3686	gaudi->hw_cap_initialized |= HW_CAP_MMU;
3687
3688	return 0;
3689}
3690
3691static int gaudi_load_firmware_to_device(struct hl_device *hdev)
3692{
3693	void __iomem *dst;
3694
3695	dst = hdev->pcie_bar[HBM_BAR_ID] + LINUX_FW_OFFSET;
3696
3697	return hl_fw_load_fw_to_device(hdev, GAUDI_LINUX_FW_FILE, dst, 0, 0);
3698}
3699
3700static int gaudi_load_boot_fit_to_device(struct hl_device *hdev)
3701{
3702	void __iomem *dst;
3703
3704	dst = hdev->pcie_bar[SRAM_BAR_ID] + BOOT_FIT_SRAM_OFFSET;
3705
3706	return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst, 0, 0);
3707}
3708
3709static void gaudi_init_dynamic_firmware_loader(struct hl_device *hdev)
3710{
3711	struct dynamic_fw_load_mgr *dynamic_loader;
3712	struct cpu_dyn_regs *dyn_regs;
3713
3714	dynamic_loader = &hdev->fw_loader.dynamic_loader;
3715
3716	/*
3717	 * here we update initial values for few specific dynamic regs (as
3718	 * before reading the first descriptor from FW those value has to be
3719	 * hard-coded) in later stages of the protocol those values will be
3720	 * updated automatically by reading the FW descriptor so data there
3721	 * will always be up-to-date
3722	 */
3723	dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs;
3724	dyn_regs->kmd_msg_to_cpu =
3725				cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU);
3726	dyn_regs->cpu_cmd_status_to_host =
3727				cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST);
3728
3729	dynamic_loader->wait_for_bl_timeout = GAUDI_WAIT_FOR_BL_TIMEOUT_USEC;
3730}
3731
3732static void gaudi_init_static_firmware_loader(struct hl_device *hdev)
3733{
3734	struct static_fw_load_mgr *static_loader;
3735
3736	static_loader = &hdev->fw_loader.static_loader;
3737
3738	static_loader->preboot_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3739	static_loader->boot_fit_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3740	static_loader->kmd_msg_to_cpu_reg = mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU;
3741	static_loader->cpu_cmd_status_to_host_reg = mmCPU_CMD_STATUS_TO_HOST;
3742	static_loader->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
3743	static_loader->cpu_boot_dev_status0_reg = mmCPU_BOOT_DEV_STS0;
3744	static_loader->cpu_boot_dev_status1_reg = mmCPU_BOOT_DEV_STS1;
3745	static_loader->boot_err0_reg = mmCPU_BOOT_ERR0;
3746	static_loader->boot_err1_reg = mmCPU_BOOT_ERR1;
3747	static_loader->preboot_version_offset_reg = mmPREBOOT_VER_OFFSET;
3748	static_loader->boot_fit_version_offset_reg = mmUBOOT_VER_OFFSET;
3749	static_loader->sram_offset_mask = ~(lower_32_bits(SRAM_BASE_ADDR));
3750	static_loader->cpu_reset_wait_msec = hdev->pldm ?
3751			GAUDI_PLDM_RESET_WAIT_MSEC :
3752			GAUDI_CPU_RESET_WAIT_MSEC;
3753}
3754
3755static void gaudi_init_firmware_preload_params(struct hl_device *hdev)
3756{
3757	struct pre_fw_load_props *pre_fw_load = &hdev->fw_loader.pre_fw_load;
3758
3759	pre_fw_load->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
3760	pre_fw_load->sts_boot_dev_sts0_reg = mmCPU_BOOT_DEV_STS0;
3761	pre_fw_load->sts_boot_dev_sts1_reg = mmCPU_BOOT_DEV_STS1;
3762	pre_fw_load->boot_err0_reg = mmCPU_BOOT_ERR0;
3763	pre_fw_load->boot_err1_reg = mmCPU_BOOT_ERR1;
3764	pre_fw_load->wait_for_preboot_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC;
3765}
3766
3767static void gaudi_init_firmware_loader(struct hl_device *hdev)
3768{
3769	struct asic_fixed_properties *prop = &hdev->asic_prop;
3770	struct fw_load_mgr *fw_loader = &hdev->fw_loader;
3771
3772	/* fill common fields */
3773	fw_loader->fw_comp_loaded = FW_TYPE_NONE;
3774	fw_loader->boot_fit_img.image_name = GAUDI_BOOT_FIT_FILE;
3775	fw_loader->linux_img.image_name = GAUDI_LINUX_FW_FILE;
3776	fw_loader->cpu_timeout = GAUDI_CPU_TIMEOUT_USEC;
3777	fw_loader->boot_fit_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC;
3778	fw_loader->skip_bmc = !hdev->bmc_enable;
3779	fw_loader->sram_bar_id = SRAM_BAR_ID;
3780	fw_loader->dram_bar_id = HBM_BAR_ID;
3781
3782	if (prop->dynamic_fw_load)
3783		gaudi_init_dynamic_firmware_loader(hdev);
3784	else
3785		gaudi_init_static_firmware_loader(hdev);
3786}
3787
3788static int gaudi_init_cpu(struct hl_device *hdev)
3789{
3790	struct gaudi_device *gaudi = hdev->asic_specific;
3791	int rc;
3792
3793	if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU))
3794		return 0;
3795
3796	if (gaudi->hw_cap_initialized & HW_CAP_CPU)
3797		return 0;
3798
3799	/*
3800	 * The device CPU works with 40 bits addresses.
3801	 * This register sets the extension to 50 bits.
3802	 */
3803	if (!hdev->asic_prop.fw_security_enabled)
3804		WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr);
3805
3806	rc = hl_fw_init_cpu(hdev);
3807
3808	if (rc)
3809		return rc;
3810
3811	gaudi->hw_cap_initialized |= HW_CAP_CPU;
3812
3813	return 0;
3814}
3815
3816static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
3817{
3818	struct cpu_dyn_regs *dyn_regs =
3819			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3820	struct asic_fixed_properties *prop = &hdev->asic_prop;
3821	struct gaudi_device *gaudi = hdev->asic_specific;
3822	u32 status, irq_handler_offset;
3823	struct hl_eq *eq;
3824	struct hl_hw_queue *cpu_pq =
3825			&hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
3826	int err;
3827
3828	if (!hdev->cpu_queues_enable)
3829		return 0;
3830
3831	if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
3832		return 0;
3833
3834	eq = &hdev->event_queue;
3835
3836	WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
3837	WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
3838
3839	WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
3840	WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
3841
3842	WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW,
3843			lower_32_bits(hdev->cpu_accessible_dma_address));
3844	WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH,
3845			upper_32_bits(hdev->cpu_accessible_dma_address));
3846
3847	WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
3848	WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
3849	WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
3850
3851	/* Used for EQ CI */
3852	WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
3853
3854	WREG32(mmCPU_IF_PF_PQ_PI, 0);
3855
3856	WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI);
3857
3858	irq_handler_offset = prop->gic_interrupts_enable ?
3859			mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3860			le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
3861
3862	WREG32(irq_handler_offset,
3863		gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
3864
3865	err = hl_poll_timeout(
3866		hdev,
3867		mmCPU_IF_QUEUE_INIT,
3868		status,
3869		(status == PQ_INIT_STATUS_READY_FOR_HOST),
3870		1000,
3871		cpu_timeout);
3872
3873	if (err) {
3874		dev_err(hdev->dev,
3875			"Failed to communicate with Device CPU (CPU-CP timeout)\n");
3876		return -EIO;
3877	}
3878
3879	/* update FW application security bits */
3880	if (prop->fw_cpu_boot_dev_sts0_valid)
3881		prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0);
3882	if (prop->fw_cpu_boot_dev_sts1_valid)
3883		prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1);
3884
3885	gaudi->hw_cap_initialized |= HW_CAP_CPU_Q;
3886	return 0;
3887}
3888
3889static void gaudi_pre_hw_init(struct hl_device *hdev)
3890{
3891	/* Perform read from the device to make sure device is up */
3892	RREG32(mmHW_STATE);
3893
3894	if (!hdev->asic_prop.fw_security_enabled) {
3895		/* Set the access through PCI bars (Linux driver only) as
3896		 * secured
3897		 */
3898		WREG32(mmPCIE_WRAP_LBW_PROT_OVR,
3899				(PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK |
3900				PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK));
3901
3902		/* Perform read to flush the waiting writes to ensure
3903		 * configuration was set in the device
3904		 */
3905		RREG32(mmPCIE_WRAP_LBW_PROT_OVR);
3906	}
3907
3908	/*
3909	 * Let's mark in the H/W that we have reached this point. We check
3910	 * this value in the reset_before_init function to understand whether
3911	 * we need to reset the chip before doing H/W init. This register is
3912	 * cleared by the H/W upon H/W reset
3913	 */
3914	WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
3915}
3916
3917static int gaudi_hw_init(struct hl_device *hdev)
3918{
3919	struct gaudi_device *gaudi = hdev->asic_specific;
3920	int rc;
3921
3922	gaudi_pre_hw_init(hdev);
3923
3924	/* If iATU is done by FW, the HBM bar ALWAYS points to DRAM_PHYS_BASE.
3925	 * So we set it here and if anyone tries to move it later to
3926	 * a different address, there will be an error
3927	 */
3928	if (hdev->asic_prop.iatu_done_by_fw)
3929		gaudi->hbm_bar_cur_addr = DRAM_PHYS_BASE;
3930
3931	/*
3932	 * Before pushing u-boot/linux to device, need to set the hbm bar to
3933	 * base address of dram
3934	 */
3935	if (gaudi_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
3936		dev_err(hdev->dev,
3937			"failed to map HBM bar to DRAM base address\n");
3938		return -EIO;
3939	}
3940
3941	rc = gaudi_init_cpu(hdev);
3942	if (rc) {
3943		dev_err(hdev->dev, "failed to initialize CPU\n");
3944		return rc;
3945	}
3946
3947	/* In case the clock gating was enabled in preboot we need to disable
3948	 * it here before touching the MME/TPC registers.
3949	 */
3950	gaudi_disable_clock_gating(hdev);
3951
3952	/* SRAM scrambler must be initialized after CPU is running from HBM */
3953	gaudi_init_scrambler_sram(hdev);
3954
3955	/* This is here just in case we are working without CPU */
3956	gaudi_init_scrambler_hbm(hdev);
3957
3958	gaudi_init_golden_registers(hdev);
3959
3960	rc = gaudi_mmu_init(hdev);
3961	if (rc)
3962		return rc;
3963
3964	gaudi_init_security(hdev);
3965
3966	gaudi_init_pci_dma_qmans(hdev);
3967
3968	gaudi_init_hbm_dma_qmans(hdev);
3969
3970	gaudi_init_mme_qmans(hdev);
3971
3972	gaudi_init_tpc_qmans(hdev);
3973
3974	gaudi_init_nic_qmans(hdev);
3975
3976	gaudi_enable_timestamp(hdev);
3977
3978	/* MSI must be enabled before CPU queues and NIC are initialized */
3979	rc = gaudi_enable_msi(hdev);
3980	if (rc)
3981		goto disable_queues;
3982
3983	/* must be called after MSI was enabled */
3984	rc = gaudi_init_cpu_queues(hdev, GAUDI_CPU_TIMEOUT_USEC);
3985	if (rc) {
3986		dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n",
3987			rc);
3988		goto disable_msi;
3989	}
3990
3991	/* Perform read from the device to flush all configuration */
3992	RREG32(mmHW_STATE);
3993
3994	return 0;
3995
3996disable_msi:
3997	gaudi_disable_msi(hdev);
3998disable_queues:
3999	gaudi_disable_mme_qmans(hdev);
4000	gaudi_disable_pci_dma_qmans(hdev);
4001
4002	return rc;
4003}
4004
4005static int gaudi_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset)
4006{
4007	struct cpu_dyn_regs *dyn_regs =
4008			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4009	u32 status, reset_timeout_ms, cpu_timeout_ms, irq_handler_offset;
4010	struct gaudi_device *gaudi = hdev->asic_specific;
4011	bool driver_performs_reset;
4012
4013	if (!hard_reset) {
4014		dev_err(hdev->dev, "GAUDI doesn't support soft-reset\n");
4015		return 0;
4016	}
4017
4018	if (hdev->pldm) {
4019		reset_timeout_ms = GAUDI_PLDM_HRESET_TIMEOUT_MSEC;
4020		cpu_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
4021	} else {
4022		reset_timeout_ms = GAUDI_RESET_TIMEOUT_MSEC;
4023		cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC;
4024	}
4025
4026	if (fw_reset) {
4027		dev_dbg(hdev->dev,
4028			"Firmware performs HARD reset, going to wait %dms\n",
4029			reset_timeout_ms);
4030
4031		goto skip_reset;
4032	}
4033
4034	driver_performs_reset = !!(!hdev->asic_prop.fw_security_enabled &&
4035					!hdev->asic_prop.hard_reset_done_by_fw);
4036
4037	/* Set device to handle FLR by H/W as we will put the device CPU to
4038	 * halt mode
4039	 */
4040	if (driver_performs_reset)
4041		WREG32(mmPCIE_AUX_FLR_CTRL, (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK |
4042					PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
4043
4044	/* If linux is loaded in the device CPU we need to communicate with it
4045	 * via the GIC. Otherwise, we need to use COMMS or the MSG_TO_CPU
4046	 * registers in case of old F/Ws
4047	 */
4048	if (hdev->fw_loader.fw_comp_loaded & FW_TYPE_LINUX) {
4049		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4050				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4051				le32_to_cpu(dyn_regs->gic_host_halt_irq);
4052
4053		WREG32(irq_handler_offset,
4054			gaudi_irq_map_table[GAUDI_EVENT_HALT_MACHINE].cpu_id);
4055
4056		/* This is a hail-mary attempt to revive the card in the small chance that the
4057		 * f/w has experienced a watchdog event, which caused it to return back to preboot.
4058		 * In that case, triggering reset through GIC won't help. We need to trigger the
4059		 * reset as if Linux wasn't loaded.
4060		 *
4061		 * We do it only if the reset cause was HB, because that would be the indication
4062		 * of such an event.
4063		 *
4064		 * In case watchdog hasn't expired but we still got HB, then this won't do any
4065		 * damage.
4066		 */
4067		if (hdev->reset_info.curr_reset_cause == HL_RESET_CAUSE_HEARTBEAT) {
4068			if (hdev->asic_prop.hard_reset_done_by_fw)
4069				hl_fw_ask_hard_reset_without_linux(hdev);
4070			else
4071				hl_fw_ask_halt_machine_without_linux(hdev);
4072		}
4073	} else {
4074		if (hdev->asic_prop.hard_reset_done_by_fw)
4075			hl_fw_ask_hard_reset_without_linux(hdev);
4076		else
4077			hl_fw_ask_halt_machine_without_linux(hdev);
4078	}
4079
4080	if (driver_performs_reset) {
4081
4082		/* Configure the reset registers. Must be done as early as
4083		 * possible in case we fail during H/W initialization
4084		 */
4085		WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H,
4086						(CFG_RST_H_DMA_MASK |
4087						CFG_RST_H_MME_MASK |
4088						CFG_RST_H_SM_MASK |
4089						CFG_RST_H_TPC_7_MASK));
4090
4091		WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK);
4092
4093		WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H,
4094						(CFG_RST_H_HBM_MASK |
4095						CFG_RST_H_TPC_7_MASK |
4096						CFG_RST_H_NIC_MASK |
4097						CFG_RST_H_SM_MASK |
4098						CFG_RST_H_DMA_MASK |
4099						CFG_RST_H_MME_MASK |
4100						CFG_RST_H_CPU_MASK |
4101						CFG_RST_H_MMU_MASK));
4102
4103		WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L,
4104						(CFG_RST_L_IF_MASK |
4105						CFG_RST_L_PSOC_MASK |
4106						CFG_RST_L_TPC_MASK));
4107
4108		msleep(cpu_timeout_ms);
4109
4110		/* Tell ASIC not to re-initialize PCIe */
4111		WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC);
4112
4113		/* Restart BTL/BLR upon hard-reset */
4114		WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1);
4115
4116		WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST,
4117			1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT);
4118
4119		dev_dbg(hdev->dev,
4120			"Issued HARD reset command, going to wait %dms\n",
4121			reset_timeout_ms);
4122	} else {
4123		dev_dbg(hdev->dev,
4124			"Firmware performs HARD reset, going to wait %dms\n",
4125			reset_timeout_ms);
4126	}
4127
4128skip_reset:
4129	/*
4130	 * After hard reset, we can't poll the BTM_FSM register because the PSOC
4131	 * itself is in reset. Need to wait until the reset is deasserted
4132	 */
4133	msleep(reset_timeout_ms);
4134
4135	status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
4136	if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK) {
4137		dev_err(hdev->dev, "Timeout while waiting for device to reset 0x%x\n", status);
4138		return -ETIMEDOUT;
4139	}
4140
4141	if (gaudi) {
4142		gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q | HW_CAP_HBM |
4143						HW_CAP_PCI_DMA | HW_CAP_MME | HW_CAP_TPC_MASK |
4144						HW_CAP_HBM_DMA | HW_CAP_PLL | HW_CAP_NIC_MASK |
4145						HW_CAP_MMU | HW_CAP_SRAM_SCRAMBLER |
4146						HW_CAP_HBM_SCRAMBLER);
4147
4148		memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat));
4149
4150		hdev->device_cpu_is_halted = false;
4151	}
4152	return 0;
4153}
4154
4155static int gaudi_suspend(struct hl_device *hdev)
4156{
4157	int rc;
4158
4159	rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
4160	if (rc)
4161		dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
4162
4163	return rc;
4164}
4165
4166static int gaudi_resume(struct hl_device *hdev)
4167{
4168	return gaudi_init_iatu(hdev);
4169}
4170
4171static int gaudi_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
4172			void *cpu_addr, dma_addr_t dma_addr, size_t size)
4173{
4174	int rc;
4175
4176	vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
4177			VM_DONTCOPY | VM_NORESERVE);
4178
4179	rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr,
4180				(dma_addr - HOST_PHYS_BASE), size);
4181	if (rc)
4182		dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
4183
4184	return rc;
4185}
4186
4187static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
4188{
4189	struct cpu_dyn_regs *dyn_regs =
4190			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4191	u32 db_reg_offset, db_value, dma_qm_offset, q_off, irq_handler_offset;
4192	struct gaudi_device *gaudi = hdev->asic_specific;
4193	bool invalid_queue = false;
4194	int dma_id;
4195
4196	switch (hw_queue_id) {
4197	case GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3:
4198		dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
4199		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4200		q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4201		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4202		break;
4203
4204	case GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3:
4205		dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
4206		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4207		q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4208		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4209		break;
4210
4211	case GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3:
4212		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1];
4213		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4214		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4215		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4216		break;
4217
4218	case GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3:
4219		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_2];
4220		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4221		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4222		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4223		break;
4224
4225	case GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3:
4226		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_3];
4227		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4228		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4229		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4230		break;
4231
4232	case GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3:
4233		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_4];
4234		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4235		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4236		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4237		break;
4238
4239	case GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3:
4240		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_5];
4241		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4242		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4243		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4244		break;
4245
4246	case GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3:
4247		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_6];
4248		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4249		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4250		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4251		break;
4252
4253	case GAUDI_QUEUE_ID_CPU_PQ:
4254		if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
4255			db_reg_offset = mmCPU_IF_PF_PQ_PI;
4256		else
4257			invalid_queue = true;
4258		break;
4259
4260	case GAUDI_QUEUE_ID_MME_0_0:
4261		db_reg_offset = mmMME2_QM_PQ_PI_0;
4262		break;
4263
4264	case GAUDI_QUEUE_ID_MME_0_1:
4265		db_reg_offset = mmMME2_QM_PQ_PI_1;
4266		break;
4267
4268	case GAUDI_QUEUE_ID_MME_0_2:
4269		db_reg_offset = mmMME2_QM_PQ_PI_2;
4270		break;
4271
4272	case GAUDI_QUEUE_ID_MME_0_3:
4273		db_reg_offset = mmMME2_QM_PQ_PI_3;
4274		break;
4275
4276	case GAUDI_QUEUE_ID_MME_1_0:
4277		db_reg_offset = mmMME0_QM_PQ_PI_0;
4278		break;
4279
4280	case GAUDI_QUEUE_ID_MME_1_1:
4281		db_reg_offset = mmMME0_QM_PQ_PI_1;
4282		break;
4283
4284	case GAUDI_QUEUE_ID_MME_1_2:
4285		db_reg_offset = mmMME0_QM_PQ_PI_2;
4286		break;
4287
4288	case GAUDI_QUEUE_ID_MME_1_3:
4289		db_reg_offset = mmMME0_QM_PQ_PI_3;
4290		break;
4291
4292	case GAUDI_QUEUE_ID_TPC_0_0:
4293		db_reg_offset = mmTPC0_QM_PQ_PI_0;
4294		break;
4295
4296	case GAUDI_QUEUE_ID_TPC_0_1:
4297		db_reg_offset = mmTPC0_QM_PQ_PI_1;
4298		break;
4299
4300	case GAUDI_QUEUE_ID_TPC_0_2:
4301		db_reg_offset = mmTPC0_QM_PQ_PI_2;
4302		break;
4303
4304	case GAUDI_QUEUE_ID_TPC_0_3:
4305		db_reg_offset = mmTPC0_QM_PQ_PI_3;
4306		break;
4307
4308	case GAUDI_QUEUE_ID_TPC_1_0:
4309		db_reg_offset = mmTPC1_QM_PQ_PI_0;
4310		break;
4311
4312	case GAUDI_QUEUE_ID_TPC_1_1:
4313		db_reg_offset = mmTPC1_QM_PQ_PI_1;
4314		break;
4315
4316	case GAUDI_QUEUE_ID_TPC_1_2:
4317		db_reg_offset = mmTPC1_QM_PQ_PI_2;
4318		break;
4319
4320	case GAUDI_QUEUE_ID_TPC_1_3:
4321		db_reg_offset = mmTPC1_QM_PQ_PI_3;
4322		break;
4323
4324	case GAUDI_QUEUE_ID_TPC_2_0:
4325		db_reg_offset = mmTPC2_QM_PQ_PI_0;
4326		break;
4327
4328	case GAUDI_QUEUE_ID_TPC_2_1:
4329		db_reg_offset = mmTPC2_QM_PQ_PI_1;
4330		break;
4331
4332	case GAUDI_QUEUE_ID_TPC_2_2:
4333		db_reg_offset = mmTPC2_QM_PQ_PI_2;
4334		break;
4335
4336	case GAUDI_QUEUE_ID_TPC_2_3:
4337		db_reg_offset = mmTPC2_QM_PQ_PI_3;
4338		break;
4339
4340	case GAUDI_QUEUE_ID_TPC_3_0:
4341		db_reg_offset = mmTPC3_QM_PQ_PI_0;
4342		break;
4343
4344	case GAUDI_QUEUE_ID_TPC_3_1:
4345		db_reg_offset = mmTPC3_QM_PQ_PI_1;
4346		break;
4347
4348	case GAUDI_QUEUE_ID_TPC_3_2:
4349		db_reg_offset = mmTPC3_QM_PQ_PI_2;
4350		break;
4351
4352	case GAUDI_QUEUE_ID_TPC_3_3:
4353		db_reg_offset = mmTPC3_QM_PQ_PI_3;
4354		break;
4355
4356	case GAUDI_QUEUE_ID_TPC_4_0:
4357		db_reg_offset = mmTPC4_QM_PQ_PI_0;
4358		break;
4359
4360	case GAUDI_QUEUE_ID_TPC_4_1:
4361		db_reg_offset = mmTPC4_QM_PQ_PI_1;
4362		break;
4363
4364	case GAUDI_QUEUE_ID_TPC_4_2:
4365		db_reg_offset = mmTPC4_QM_PQ_PI_2;
4366		break;
4367
4368	case GAUDI_QUEUE_ID_TPC_4_3:
4369		db_reg_offset = mmTPC4_QM_PQ_PI_3;
4370		break;
4371
4372	case GAUDI_QUEUE_ID_TPC_5_0:
4373		db_reg_offset = mmTPC5_QM_PQ_PI_0;
4374		break;
4375
4376	case GAUDI_QUEUE_ID_TPC_5_1:
4377		db_reg_offset = mmTPC5_QM_PQ_PI_1;
4378		break;
4379
4380	case GAUDI_QUEUE_ID_TPC_5_2:
4381		db_reg_offset = mmTPC5_QM_PQ_PI_2;
4382		break;
4383
4384	case GAUDI_QUEUE_ID_TPC_5_3:
4385		db_reg_offset = mmTPC5_QM_PQ_PI_3;
4386		break;
4387
4388	case GAUDI_QUEUE_ID_TPC_6_0:
4389		db_reg_offset = mmTPC6_QM_PQ_PI_0;
4390		break;
4391
4392	case GAUDI_QUEUE_ID_TPC_6_1:
4393		db_reg_offset = mmTPC6_QM_PQ_PI_1;
4394		break;
4395
4396	case GAUDI_QUEUE_ID_TPC_6_2:
4397		db_reg_offset = mmTPC6_QM_PQ_PI_2;
4398		break;
4399
4400	case GAUDI_QUEUE_ID_TPC_6_3:
4401		db_reg_offset = mmTPC6_QM_PQ_PI_3;
4402		break;
4403
4404	case GAUDI_QUEUE_ID_TPC_7_0:
4405		db_reg_offset = mmTPC7_QM_PQ_PI_0;
4406		break;
4407
4408	case GAUDI_QUEUE_ID_TPC_7_1:
4409		db_reg_offset = mmTPC7_QM_PQ_PI_1;
4410		break;
4411
4412	case GAUDI_QUEUE_ID_TPC_7_2:
4413		db_reg_offset = mmTPC7_QM_PQ_PI_2;
4414		break;
4415
4416	case GAUDI_QUEUE_ID_TPC_7_3:
4417		db_reg_offset = mmTPC7_QM_PQ_PI_3;
4418		break;
4419
4420	case GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3:
4421		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC0))
4422			invalid_queue = true;
4423
4424		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4425		db_reg_offset = mmNIC0_QM0_PQ_PI_0 + q_off;
4426		break;
4427
4428	case GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3:
4429		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC1))
4430			invalid_queue = true;
4431
4432		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4433		db_reg_offset = mmNIC0_QM1_PQ_PI_0 + q_off;
4434		break;
4435
4436	case GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3:
4437		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC2))
4438			invalid_queue = true;
4439
4440		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4441		db_reg_offset = mmNIC1_QM0_PQ_PI_0 + q_off;
4442		break;
4443
4444	case GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3:
4445		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC3))
4446			invalid_queue = true;
4447
4448		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4449		db_reg_offset = mmNIC1_QM1_PQ_PI_0 + q_off;
4450		break;
4451
4452	case GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3:
4453		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC4))
4454			invalid_queue = true;
4455
4456		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4457		db_reg_offset = mmNIC2_QM0_PQ_PI_0 + q_off;
4458		break;
4459
4460	case GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3:
4461		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC5))
4462			invalid_queue = true;
4463
4464		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4465		db_reg_offset = mmNIC2_QM1_PQ_PI_0 + q_off;
4466		break;
4467
4468	case GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3:
4469		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC6))
4470			invalid_queue = true;
4471
4472		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4473		db_reg_offset = mmNIC3_QM0_PQ_PI_0 + q_off;
4474		break;
4475
4476	case GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3:
4477		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC7))
4478			invalid_queue = true;
4479
4480		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4481		db_reg_offset = mmNIC3_QM1_PQ_PI_0 + q_off;
4482		break;
4483
4484	case GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3:
4485		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC8))
4486			invalid_queue = true;
4487
4488		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4489		db_reg_offset = mmNIC4_QM0_PQ_PI_0 + q_off;
4490		break;
4491
4492	case GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3:
4493		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC9))
4494			invalid_queue = true;
4495
4496		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4497		db_reg_offset = mmNIC4_QM1_PQ_PI_0 + q_off;
4498		break;
4499
4500	default:
4501		invalid_queue = true;
4502	}
4503
4504	if (invalid_queue) {
4505		/* Should never get here */
4506		dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n",
4507			hw_queue_id);
4508		return;
4509	}
4510
4511	db_value = pi;
4512
4513	/* ring the doorbell */
4514	WREG32(db_reg_offset, db_value);
4515
4516	if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ) {
4517		/* make sure device CPU will read latest data from host */
4518		mb();
4519
4520		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4521				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4522				le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
4523
4524		WREG32(irq_handler_offset,
4525			gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
4526	}
4527}
4528
4529static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe,
4530				struct hl_bd *bd)
4531{
4532	__le64 *pbd = (__le64 *) bd;
4533
4534	/* The QMANs are on the host memory so a simple copy suffice */
4535	pqe[0] = pbd[0];
4536	pqe[1] = pbd[1];
4537}
4538
4539static void *gaudi_dma_alloc_coherent(struct hl_device *hdev, size_t size,
4540					dma_addr_t *dma_handle, gfp_t flags)
4541{
4542	void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size,
4543						dma_handle, flags);
4544
4545	/* Shift to the device's base physical address of host memory */
4546	if (kernel_addr)
4547		*dma_handle += HOST_PHYS_BASE;
4548
4549	return kernel_addr;
4550}
4551
4552static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size,
4553		void *cpu_addr, dma_addr_t dma_handle)
4554{
4555	/* Cancel the device's base physical address of host memory */
4556	dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;
4557
4558	dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);
4559}
4560
4561static int gaudi_scrub_device_dram(struct hl_device *hdev, u64 val)
4562{
4563	struct asic_fixed_properties *prop = &hdev->asic_prop;
4564	u64 cur_addr = prop->dram_user_base_address;
4565	u32 chunk_size, busy;
4566	int rc, dma_id;
4567
4568	while (cur_addr < prop->dram_end_address) {
4569		for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4570			u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4571
4572			chunk_size =
4573			min((u64)SZ_2G, prop->dram_end_address - cur_addr);
4574
4575			dev_dbg(hdev->dev,
4576				"Doing HBM scrubbing for 0x%09llx - 0x%09llx\n",
4577				cur_addr, cur_addr + chunk_size);
4578
4579			WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset,
4580					lower_32_bits(val));
4581			WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset,
4582					upper_32_bits(val));
4583			WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset,
4584						lower_32_bits(cur_addr));
4585			WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset,
4586						upper_32_bits(cur_addr));
4587			WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset,
4588					chunk_size);
4589			WREG32(mmDMA0_CORE_COMMIT + dma_offset,
4590					((1 << DMA0_CORE_COMMIT_LIN_SHIFT) |
4591					(1 << DMA0_CORE_COMMIT_MEM_SET_SHIFT)));
4592
4593			cur_addr += chunk_size;
4594
4595			if (cur_addr == prop->dram_end_address)
4596				break;
4597		}
4598
4599		for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4600			u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4601
4602			rc = hl_poll_timeout(
4603				hdev,
4604				mmDMA0_CORE_STS0 + dma_offset,
4605				busy,
4606				((busy & DMA0_CORE_STS0_BUSY_MASK) == 0),
4607				1000,
4608				HBM_SCRUBBING_TIMEOUT_US);
4609
4610			if (rc) {
4611				dev_err(hdev->dev,
4612					"DMA Timeout during HBM scrubbing of DMA #%d\n",
4613					dma_id);
4614				return -EIO;
4615			}
4616		}
4617	}
4618
4619	return 0;
4620}
4621
4622static int gaudi_scrub_device_mem(struct hl_device *hdev)
4623{
4624	struct asic_fixed_properties *prop = &hdev->asic_prop;
4625	u64 wait_to_idle_time = HBM_SCRUBBING_TIMEOUT_US;
4626	u64 addr, size, val = hdev->memory_scrub_val;
4627	ktime_t timeout;
4628	int rc = 0;
4629
4630	if (!hdev->memory_scrub)
4631		return 0;
4632
4633	timeout = ktime_add_us(ktime_get(), wait_to_idle_time);
4634	while (!hdev->asic_funcs->is_device_idle(hdev, NULL, 0, NULL)) {
4635		if (ktime_compare(ktime_get(), timeout) > 0) {
4636			dev_err(hdev->dev, "waiting for idle timeout\n");
4637			return -ETIMEDOUT;
4638		}
4639		usleep_range((1000 >> 2) + 1, 1000);
4640	}
4641
4642	/* Scrub SRAM */
4643	addr = prop->sram_user_base_address;
4644	size = hdev->pldm ? 0x10000 : prop->sram_size - SRAM_USER_BASE_OFFSET;
4645
4646	dev_dbg(hdev->dev, "Scrubbing SRAM: 0x%09llx - 0x%09llx val: 0x%llx\n",
4647			addr, addr + size, val);
4648	rc = gaudi_memset_device_memory(hdev, addr, size, val);
4649	if (rc) {
4650		dev_err(hdev->dev, "Failed to clear SRAM (%d)\n", rc);
4651		return rc;
4652	}
4653
4654	/* Scrub HBM using all DMA channels in parallel */
4655	rc = gaudi_scrub_device_dram(hdev, val);
4656	if (rc) {
4657		dev_err(hdev->dev, "Failed to clear HBM (%d)\n", rc);
4658		return rc;
4659	}
4660
4661	return 0;
4662}
4663
4664static void *gaudi_get_int_queue_base(struct hl_device *hdev,
4665				u32 queue_id, dma_addr_t *dma_handle,
4666				u16 *queue_len)
4667{
4668	struct gaudi_device *gaudi = hdev->asic_specific;
4669	struct gaudi_internal_qman_info *q;
4670
4671	if (queue_id >= GAUDI_QUEUE_ID_SIZE ||
4672			gaudi_queue_type[queue_id] != QUEUE_TYPE_INT) {
4673		dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
4674		return NULL;
4675	}
4676
4677	q = &gaudi->internal_qmans[queue_id];
4678	*dma_handle = q->pq_dma_addr;
4679	*queue_len = q->pq_size / QMAN_PQ_ENTRY_SIZE;
4680
4681	return q->pq_kernel_addr;
4682}
4683
4684static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg,
4685				u16 len, u32 timeout, u64 *result)
4686{
4687	struct gaudi_device *gaudi = hdev->asic_specific;
4688
4689	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) {
4690		if (result)
4691			*result = 0;
4692		return 0;
4693	}
4694
4695	if (!timeout)
4696		timeout = GAUDI_MSG_TO_CPU_TIMEOUT_USEC;
4697
4698	return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len,
4699						timeout, result);
4700}
4701
4702static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id)
4703{
4704	struct packet_msg_prot *fence_pkt;
4705	dma_addr_t pkt_dma_addr;
4706	u32 fence_val, tmp, timeout_usec;
4707	dma_addr_t fence_dma_addr;
4708	u32 *fence_ptr;
4709	int rc;
4710
4711	if (hdev->pldm)
4712		timeout_usec = GAUDI_PLDM_TEST_QUEUE_WAIT_USEC;
4713	else
4714		timeout_usec = GAUDI_TEST_QUEUE_WAIT_USEC;
4715
4716	fence_val = GAUDI_QMAN0_FENCE_VAL;
4717
4718	fence_ptr = hl_asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, &fence_dma_addr);
4719	if (!fence_ptr) {
4720		dev_err(hdev->dev,
4721			"Failed to allocate memory for H/W queue %d testing\n",
4722			hw_queue_id);
4723		return -ENOMEM;
4724	}
4725
4726	*fence_ptr = 0;
4727
4728	fence_pkt = hl_asic_dma_pool_zalloc(hdev, sizeof(struct packet_msg_prot), GFP_KERNEL,
4729						&pkt_dma_addr);
4730	if (!fence_pkt) {
4731		dev_err(hdev->dev,
4732			"Failed to allocate packet for H/W queue %d testing\n",
4733			hw_queue_id);
4734		rc = -ENOMEM;
4735		goto free_fence_ptr;
4736	}
4737
4738	tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
4739	tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
4740	tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4741
4742	fence_pkt->ctl = cpu_to_le32(tmp);
4743	fence_pkt->value = cpu_to_le32(fence_val);
4744	fence_pkt->addr = cpu_to_le64(fence_dma_addr);
4745
4746	rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
4747					sizeof(struct packet_msg_prot),
4748					pkt_dma_addr);
4749	if (rc) {
4750		dev_err(hdev->dev,
4751			"Failed to send fence packet to H/W queue %d\n",
4752			hw_queue_id);
4753		goto free_pkt;
4754	}
4755
4756	rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
4757					1000, timeout_usec, true);
4758
4759	hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
4760
4761	if (rc == -ETIMEDOUT) {
4762		dev_err(hdev->dev,
4763			"H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
4764			hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
4765		rc = -EIO;
4766	}
4767
4768free_pkt:
4769	hl_asic_dma_pool_free(hdev, (void *) fence_pkt, pkt_dma_addr);
4770free_fence_ptr:
4771	hl_asic_dma_pool_free(hdev, (void *) fence_ptr, fence_dma_addr);
4772	return rc;
4773}
4774
4775static int gaudi_test_cpu_queue(struct hl_device *hdev)
4776{
4777	struct gaudi_device *gaudi = hdev->asic_specific;
4778
4779	/*
4780	 * check capability here as send_cpu_message() won't update the result
4781	 * value if no capability
4782	 */
4783	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
4784		return 0;
4785
4786	return hl_fw_test_cpu_queue(hdev);
4787}
4788
4789static int gaudi_test_queues(struct hl_device *hdev)
4790{
4791	int i, rc, ret_val = 0;
4792
4793	for (i = 0 ; i < hdev->asic_prop.max_queues ; i++) {
4794		if (hdev->asic_prop.hw_queues_props[i].type == QUEUE_TYPE_EXT) {
4795			rc = gaudi_test_queue(hdev, i);
4796			if (rc)
4797				ret_val = -EINVAL;
4798		}
4799	}
4800
4801	rc = gaudi_test_cpu_queue(hdev);
4802	if (rc)
4803		ret_val = -EINVAL;
4804
4805	return ret_val;
4806}
4807
4808static void *gaudi_dma_pool_zalloc(struct hl_device *hdev, size_t size,
4809		gfp_t mem_flags, dma_addr_t *dma_handle)
4810{
4811	void *kernel_addr;
4812
4813	if (size > GAUDI_DMA_POOL_BLK_SIZE)
4814		return NULL;
4815
4816	kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
4817
4818	/* Shift to the device's base physical address of host memory */
4819	if (kernel_addr)
4820		*dma_handle += HOST_PHYS_BASE;
4821
4822	return kernel_addr;
4823}
4824
4825static void gaudi_dma_pool_free(struct hl_device *hdev, void *vaddr,
4826			dma_addr_t dma_addr)
4827{
4828	/* Cancel the device's base physical address of host memory */
4829	dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;
4830
4831	dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr);
4832}
4833
4834static void *gaudi_cpu_accessible_dma_pool_alloc(struct hl_device *hdev,
4835					size_t size, dma_addr_t *dma_handle)
4836{
4837	return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
4838}
4839
4840static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev,
4841						size_t size, void *vaddr)
4842{
4843	hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
4844}
4845
4846static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev, struct sg_table *sgt)
4847{
4848	struct scatterlist *sg, *sg_next_iter;
4849	u32 count, dma_desc_cnt;
4850	u64 len, len_next;
4851	dma_addr_t addr, addr_next;
4852
4853	dma_desc_cnt = 0;
4854
4855	for_each_sgtable_dma_sg(sgt, sg, count) {
4856		len = sg_dma_len(sg);
4857		addr = sg_dma_address(sg);
4858
4859		if (len == 0)
4860			break;
4861
4862		while ((count + 1) < sgt->nents) {
4863			sg_next_iter = sg_next(sg);
4864			len_next = sg_dma_len(sg_next_iter);
4865			addr_next = sg_dma_address(sg_next_iter);
4866
4867			if (len_next == 0)
4868				break;
4869
4870			if ((addr + len == addr_next) &&
4871				(len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
4872				len += len_next;
4873				count++;
4874				sg = sg_next_iter;
4875			} else {
4876				break;
4877			}
4878		}
4879
4880		dma_desc_cnt++;
4881	}
4882
4883	return dma_desc_cnt * sizeof(struct packet_lin_dma);
4884}
4885
4886static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
4887				struct hl_cs_parser *parser,
4888				struct packet_lin_dma *user_dma_pkt,
4889				u64 addr, enum dma_data_direction dir)
4890{
4891	struct hl_userptr *userptr;
4892	int rc;
4893
4894	if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
4895			parser->job_userptr_list, &userptr))
4896		goto already_pinned;
4897
4898	userptr = kzalloc(sizeof(*userptr), GFP_KERNEL);
4899	if (!userptr)
4900		return -ENOMEM;
4901
4902	rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
4903				userptr);
4904	if (rc)
4905		goto free_userptr;
4906
4907	list_add_tail(&userptr->job_node, parser->job_userptr_list);
4908
4909	rc = hl_dma_map_sgtable(hdev, userptr->sgt, dir);
4910	if (rc) {
4911		dev_err(hdev->dev, "failed to map sgt with DMA region\n");
4912		goto unpin_memory;
4913	}
4914
4915	userptr->dma_mapped = true;
4916	userptr->dir = dir;
4917
4918already_pinned:
4919	parser->patched_cb_size +=
4920			gaudi_get_dma_desc_list_size(hdev, userptr->sgt);
4921
4922	return 0;
4923
4924unpin_memory:
4925	list_del(&userptr->job_node);
4926	hl_unpin_host_memory(hdev, userptr);
4927free_userptr:
4928	kfree(userptr);
4929	return rc;
4930}
4931
4932static int gaudi_validate_dma_pkt_host(struct hl_device *hdev,
4933				struct hl_cs_parser *parser,
4934				struct packet_lin_dma *user_dma_pkt,
4935				bool src_in_host)
4936{
4937	enum dma_data_direction dir;
4938	bool skip_host_mem_pin = false, user_memset;
4939	u64 addr;
4940	int rc = 0;
4941
4942	user_memset = (le32_to_cpu(user_dma_pkt->ctl) &
4943			GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
4944			GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
4945
4946	if (src_in_host) {
4947		if (user_memset)
4948			skip_host_mem_pin = true;
4949
4950		dev_dbg(hdev->dev, "DMA direction is HOST --> DEVICE\n");
4951		dir = DMA_TO_DEVICE;
4952		addr = le64_to_cpu(user_dma_pkt->src_addr);
4953	} else {
4954		dev_dbg(hdev->dev, "DMA direction is DEVICE --> HOST\n");
4955		dir = DMA_FROM_DEVICE;
4956		addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
4957				GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
4958				GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
4959	}
4960
4961	if (skip_host_mem_pin)
4962		parser->patched_cb_size += sizeof(*user_dma_pkt);
4963	else
4964		rc = gaudi_pin_memory_before_cs(hdev, parser, user_dma_pkt,
4965						addr, dir);
4966
4967	return rc;
4968}
4969
4970static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev,
4971				struct hl_cs_parser *parser,
4972				struct packet_lin_dma *user_dma_pkt)
4973{
4974	bool src_in_host = false;
4975	u64 dst_addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
4976			GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
4977			GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
4978
4979	dev_dbg(hdev->dev, "DMA packet details:\n");
4980	dev_dbg(hdev->dev, "source == 0x%llx\n",
4981				le64_to_cpu(user_dma_pkt->src_addr));
4982	dev_dbg(hdev->dev, "destination == 0x%llx\n", dst_addr);
4983	dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
4984
4985	/*
4986	 * Special handling for DMA with size 0. Bypass all validations
4987	 * because no transactions will be done except for WR_COMP, which
4988	 * is not a security issue
4989	 */
4990	if (!le32_to_cpu(user_dma_pkt->tsize)) {
4991		parser->patched_cb_size += sizeof(*user_dma_pkt);
4992		return 0;
4993	}
4994
4995	if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
4996		src_in_host = true;
4997
4998	return gaudi_validate_dma_pkt_host(hdev, parser, user_dma_pkt,
4999						src_in_host);
5000}
5001
5002static int gaudi_validate_load_and_exe_pkt(struct hl_device *hdev,
5003					struct hl_cs_parser *parser,
5004					struct packet_load_and_exe *user_pkt)
5005{
5006	u32 cfg;
5007
5008	cfg = le32_to_cpu(user_pkt->cfg);
5009
5010	if (cfg & GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK) {
5011		dev_err(hdev->dev,
5012			"User not allowed to use Load and Execute\n");
5013		return -EPERM;
5014	}
5015
5016	parser->patched_cb_size += sizeof(struct packet_load_and_exe);
5017
5018	return 0;
5019}
5020
5021static int gaudi_validate_cb(struct hl_device *hdev,
5022			struct hl_cs_parser *parser, bool is_mmu)
5023{
5024	u32 cb_parsed_length = 0;
5025	int rc = 0;
5026
5027	parser->patched_cb_size = 0;
5028
5029	/* cb_user_size is more than 0 so loop will always be executed */
5030	while (cb_parsed_length < parser->user_cb_size) {
5031		enum packet_id pkt_id;
5032		u16 pkt_size;
5033		struct gaudi_packet *user_pkt;
5034
5035		user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5036
5037		pkt_id = (enum packet_id) (
5038				(le64_to_cpu(user_pkt->header) &
5039				PACKET_HEADER_PACKET_ID_MASK) >>
5040					PACKET_HEADER_PACKET_ID_SHIFT);
5041
5042		if (!validate_packet_id(pkt_id)) {
5043			dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5044			rc = -EINVAL;
5045			break;
5046		}
5047
5048		pkt_size = gaudi_packet_sizes[pkt_id];
5049		cb_parsed_length += pkt_size;
5050		if (cb_parsed_length > parser->user_cb_size) {
5051			dev_err(hdev->dev,
5052				"packet 0x%x is out of CB boundary\n", pkt_id);
5053			rc = -EINVAL;
5054			break;
5055		}
5056
5057		switch (pkt_id) {
5058		case PACKET_MSG_PROT:
5059			dev_err(hdev->dev,
5060				"User not allowed to use MSG_PROT\n");
5061			rc = -EPERM;
5062			break;
5063
5064		case PACKET_CP_DMA:
5065			dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5066			rc = -EPERM;
5067			break;
5068
5069		case PACKET_STOP:
5070			dev_err(hdev->dev, "User not allowed to use STOP\n");
5071			rc = -EPERM;
5072			break;
5073
5074		case PACKET_WREG_BULK:
5075			dev_err(hdev->dev,
5076				"User not allowed to use WREG_BULK\n");
5077			rc = -EPERM;
5078			break;
5079
5080		case PACKET_LOAD_AND_EXE:
5081			rc = gaudi_validate_load_and_exe_pkt(hdev, parser,
5082				(struct packet_load_and_exe *) user_pkt);
5083			break;
5084
5085		case PACKET_LIN_DMA:
5086			parser->contains_dma_pkt = true;
5087			if (is_mmu)
5088				parser->patched_cb_size += pkt_size;
5089			else
5090				rc = gaudi_validate_dma_pkt_no_mmu(hdev, parser,
5091					(struct packet_lin_dma *) user_pkt);
5092			break;
5093
5094		case PACKET_WREG_32:
5095		case PACKET_MSG_LONG:
5096		case PACKET_MSG_SHORT:
5097		case PACKET_REPEAT:
5098		case PACKET_FENCE:
5099		case PACKET_NOP:
5100		case PACKET_ARB_POINT:
5101			parser->patched_cb_size += pkt_size;
5102			break;
5103
5104		default:
5105			dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5106				pkt_id);
5107			rc = -EINVAL;
5108			break;
5109		}
5110
5111		if (rc)
5112			break;
5113	}
5114
5115	/*
5116	 * The new CB should have space at the end for two MSG_PROT packets:
5117	 * 1. Optional NOP padding for cacheline alignment
5118	 * 2. A packet that will act as a completion packet
5119	 * 3. A packet that will generate MSI interrupt
5120	 */
5121	if (parser->completion)
5122		parser->patched_cb_size += gaudi_get_patched_cb_extra_size(
5123			parser->patched_cb_size);
5124
5125	return rc;
5126}
5127
5128static int gaudi_patch_dma_packet(struct hl_device *hdev,
5129				struct hl_cs_parser *parser,
5130				struct packet_lin_dma *user_dma_pkt,
5131				struct packet_lin_dma *new_dma_pkt,
5132				u32 *new_dma_pkt_size)
5133{
5134	struct hl_userptr *userptr;
5135	struct scatterlist *sg, *sg_next_iter;
5136	u32 count, dma_desc_cnt, user_wrcomp_en_mask, ctl;
5137	u64 len, len_next;
5138	dma_addr_t dma_addr, dma_addr_next;
5139	u64 device_memory_addr, addr;
5140	enum dma_data_direction dir;
5141	struct sg_table *sgt;
5142	bool src_in_host = false;
5143	bool skip_host_mem_pin = false;
5144	bool user_memset;
5145
5146	ctl = le32_to_cpu(user_dma_pkt->ctl);
5147
5148	if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5149		src_in_host = true;
5150
5151	user_memset = (ctl & GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
5152			GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
5153
5154	if (src_in_host) {
5155		addr = le64_to_cpu(user_dma_pkt->src_addr);
5156		device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
5157		dir = DMA_TO_DEVICE;
5158		if (user_memset)
5159			skip_host_mem_pin = true;
5160	} else {
5161		addr = le64_to_cpu(user_dma_pkt->dst_addr);
5162		device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
5163		dir = DMA_FROM_DEVICE;
5164	}
5165
5166	if ((!skip_host_mem_pin) &&
5167		(!hl_userptr_is_pinned(hdev, addr,
5168					le32_to_cpu(user_dma_pkt->tsize),
5169					parser->job_userptr_list, &userptr))) {
5170		dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
5171				addr, user_dma_pkt->tsize);
5172		return -EFAULT;
5173	}
5174
5175	if ((user_memset) && (dir == DMA_TO_DEVICE)) {
5176		memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
5177		*new_dma_pkt_size = sizeof(*user_dma_pkt);
5178		return 0;
5179	}
5180
5181	user_wrcomp_en_mask = ctl & GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5182
5183	sgt = userptr->sgt;
5184	dma_desc_cnt = 0;
5185
5186	for_each_sgtable_dma_sg(sgt, sg, count) {
5187		len = sg_dma_len(sg);
5188		dma_addr = sg_dma_address(sg);
5189
5190		if (len == 0)
5191			break;
5192
5193		while ((count + 1) < sgt->nents) {
5194			sg_next_iter = sg_next(sg);
5195			len_next = sg_dma_len(sg_next_iter);
5196			dma_addr_next = sg_dma_address(sg_next_iter);
5197
5198			if (len_next == 0)
5199				break;
5200
5201			if ((dma_addr + len == dma_addr_next) &&
5202				(len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
5203				len += len_next;
5204				count++;
5205				sg = sg_next_iter;
5206			} else {
5207				break;
5208			}
5209		}
5210
5211		ctl = le32_to_cpu(user_dma_pkt->ctl);
5212		if (likely(dma_desc_cnt))
5213			ctl &= ~GAUDI_PKT_CTL_EB_MASK;
5214		ctl &= ~GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5215		new_dma_pkt->ctl = cpu_to_le32(ctl);
5216		new_dma_pkt->tsize = cpu_to_le32(len);
5217
5218		if (dir == DMA_TO_DEVICE) {
5219			new_dma_pkt->src_addr = cpu_to_le64(dma_addr);
5220			new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);
5221		} else {
5222			new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr);
5223			new_dma_pkt->dst_addr = cpu_to_le64(dma_addr);
5224		}
5225
5226		if (!user_memset)
5227			device_memory_addr += len;
5228		dma_desc_cnt++;
5229		new_dma_pkt++;
5230	}
5231
5232	if (!dma_desc_cnt) {
5233		dev_err(hdev->dev,
5234			"Error of 0 SG entries when patching DMA packet\n");
5235		return -EFAULT;
5236	}
5237
5238	/* Fix the last dma packet - wrcomp must be as user set it */
5239	new_dma_pkt--;
5240	new_dma_pkt->ctl |= cpu_to_le32(user_wrcomp_en_mask);
5241
5242	*new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
5243
5244	return 0;
5245}
5246
5247static int gaudi_patch_cb(struct hl_device *hdev,
5248				struct hl_cs_parser *parser)
5249{
5250	u32 cb_parsed_length = 0;
5251	u32 cb_patched_cur_length = 0;
5252	int rc = 0;
5253
5254	/* cb_user_size is more than 0 so loop will always be executed */
5255	while (cb_parsed_length < parser->user_cb_size) {
5256		enum packet_id pkt_id;
5257		u16 pkt_size;
5258		u32 new_pkt_size = 0;
5259		struct gaudi_packet *user_pkt, *kernel_pkt;
5260
5261		user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5262		kernel_pkt = parser->patched_cb->kernel_address +
5263					cb_patched_cur_length;
5264
5265		pkt_id = (enum packet_id) (
5266				(le64_to_cpu(user_pkt->header) &
5267				PACKET_HEADER_PACKET_ID_MASK) >>
5268					PACKET_HEADER_PACKET_ID_SHIFT);
5269
5270		if (!validate_packet_id(pkt_id)) {
5271			dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5272			rc = -EINVAL;
5273			break;
5274		}
5275
5276		pkt_size = gaudi_packet_sizes[pkt_id];
5277		cb_parsed_length += pkt_size;
5278		if (cb_parsed_length > parser->user_cb_size) {
5279			dev_err(hdev->dev,
5280				"packet 0x%x is out of CB boundary\n", pkt_id);
5281			rc = -EINVAL;
5282			break;
5283		}
5284
5285		switch (pkt_id) {
5286		case PACKET_LIN_DMA:
5287			rc = gaudi_patch_dma_packet(hdev, parser,
5288					(struct packet_lin_dma *) user_pkt,
5289					(struct packet_lin_dma *) kernel_pkt,
5290					&new_pkt_size);
5291			cb_patched_cur_length += new_pkt_size;
5292			break;
5293
5294		case PACKET_MSG_PROT:
5295			dev_err(hdev->dev,
5296				"User not allowed to use MSG_PROT\n");
5297			rc = -EPERM;
5298			break;
5299
5300		case PACKET_CP_DMA:
5301			dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5302			rc = -EPERM;
5303			break;
5304
5305		case PACKET_STOP:
5306			dev_err(hdev->dev, "User not allowed to use STOP\n");
5307			rc = -EPERM;
5308			break;
5309
5310		case PACKET_WREG_32:
5311		case PACKET_WREG_BULK:
5312		case PACKET_MSG_LONG:
5313		case PACKET_MSG_SHORT:
5314		case PACKET_REPEAT:
5315		case PACKET_FENCE:
5316		case PACKET_NOP:
5317		case PACKET_ARB_POINT:
5318		case PACKET_LOAD_AND_EXE:
5319			memcpy(kernel_pkt, user_pkt, pkt_size);
5320			cb_patched_cur_length += pkt_size;
5321			break;
5322
5323		default:
5324			dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5325				pkt_id);
5326			rc = -EINVAL;
5327			break;
5328		}
5329
5330		if (rc)
5331			break;
5332	}
5333
5334	return rc;
5335}
5336
5337static int gaudi_parse_cb_mmu(struct hl_device *hdev,
5338		struct hl_cs_parser *parser)
5339{
5340	u64 handle;
5341	u32 patched_cb_size;
5342	struct hl_cb *user_cb;
5343	int rc;
5344
5345	/*
5346	 * The new CB should have space at the end for two MSG_PROT packets:
5347	 * 1. Optional NOP padding for cacheline alignment
5348	 * 2. A packet that will act as a completion packet
5349	 * 3. A packet that will generate MSI interrupt
5350	 */
5351	if (parser->completion)
5352		parser->patched_cb_size = parser->user_cb_size +
5353				gaudi_get_patched_cb_extra_size(parser->user_cb_size);
5354	else
5355		parser->patched_cb_size = parser->user_cb_size;
5356
5357	rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx,
5358				parser->patched_cb_size, false, false,
5359				&handle);
5360
5361	if (rc) {
5362		dev_err(hdev->dev,
5363			"Failed to allocate patched CB for DMA CS %d\n",
5364			rc);
5365		return rc;
5366	}
5367
5368	parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle);
5369	/* hl_cb_get should never fail */
5370	if (!parser->patched_cb) {
5371		dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle);
5372		rc = -EFAULT;
5373		goto out;
5374	}
5375
5376	/*
5377	 * We are protected from overflow because the check
5378	 * "parser->user_cb_size <= parser->user_cb->size" was done in get_cb_from_cs_chunk()
5379	 * in the common code. That check is done only if is_kernel_allocated_cb is true.
5380	 *
5381	 * There is no option to reach here without going through that check because:
5382	 * 1. validate_queue_index() assigns true to is_kernel_allocated_cb for any submission to
5383	 *    an external queue.
5384	 * 2. For Gaudi, we only parse CBs that were submitted to the external queues.
5385	 */
5386	memcpy(parser->patched_cb->kernel_address,
5387		parser->user_cb->kernel_address,
5388		parser->user_cb_size);
5389
5390	patched_cb_size = parser->patched_cb_size;
5391
5392	/* Validate patched CB instead of user CB */
5393	user_cb = parser->user_cb;
5394	parser->user_cb = parser->patched_cb;
5395	rc = gaudi_validate_cb(hdev, parser, true);
5396	parser->user_cb = user_cb;
5397
5398	if (rc) {
5399		hl_cb_put(parser->patched_cb);
5400		goto out;
5401	}
5402
5403	if (patched_cb_size != parser->patched_cb_size) {
5404		dev_err(hdev->dev, "user CB size mismatch\n");
5405		hl_cb_put(parser->patched_cb);
5406		rc = -EINVAL;
5407		goto out;
5408	}
5409
5410out:
5411	/*
5412	 * Always call cb destroy here because we still have 1 reference
5413	 * to it by calling cb_get earlier. After the job will be completed,
5414	 * cb_put will release it, but here we want to remove it from the
5415	 * idr
5416	 */
5417	hl_cb_destroy(&hdev->kernel_mem_mgr, handle);
5418
5419	return rc;
5420}
5421
5422static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
5423		struct hl_cs_parser *parser)
5424{
5425	u64 handle;
5426	int rc;
5427
5428	rc = gaudi_validate_cb(hdev, parser, false);
5429
5430	if (rc)
5431		goto free_userptr;
5432
5433	rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx,
5434				parser->patched_cb_size, false, false,
5435				&handle);
5436	if (rc) {
5437		dev_err(hdev->dev,
5438			"Failed to allocate patched CB for DMA CS %d\n", rc);
5439		goto free_userptr;
5440	}
5441
5442	parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle);
5443	/* hl_cb_get should never fail here */
5444	if (!parser->patched_cb) {
5445		dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle);
5446		rc = -EFAULT;
5447		goto out;
5448	}
5449
5450	rc = gaudi_patch_cb(hdev, parser);
5451
5452	if (rc)
5453		hl_cb_put(parser->patched_cb);
5454
5455out:
5456	/*
5457	 * Always call cb destroy here because we still have 1 reference
5458	 * to it by calling cb_get earlier. After the job will be completed,
5459	 * cb_put will release it, but here we want to remove it from the
5460	 * idr
5461	 */
5462	hl_cb_destroy(&hdev->kernel_mem_mgr, handle);
5463
5464free_userptr:
5465	if (rc)
5466		hl_userptr_delete_list(hdev, parser->job_userptr_list);
5467	return rc;
5468}
5469
5470static int gaudi_parse_cb_no_ext_queue(struct hl_device *hdev,
5471					struct hl_cs_parser *parser)
5472{
5473	struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
5474	struct gaudi_device *gaudi = hdev->asic_specific;
5475	u32 nic_queue_offset, nic_mask_q_id;
5476
5477	if ((parser->hw_queue_id >= GAUDI_QUEUE_ID_NIC_0_0) &&
5478			(parser->hw_queue_id <= GAUDI_QUEUE_ID_NIC_9_3)) {
5479		nic_queue_offset = parser->hw_queue_id - GAUDI_QUEUE_ID_NIC_0_0;
5480		nic_mask_q_id = 1 << (HW_CAP_NIC_SHIFT + (nic_queue_offset >> 2));
5481
5482		if (!(gaudi->hw_cap_initialized & nic_mask_q_id)) {
5483			dev_err(hdev->dev, "h/w queue %d is disabled\n", parser->hw_queue_id);
5484			return -EINVAL;
5485		}
5486	}
5487
5488	/* For internal queue jobs just check if CB address is valid */
5489	if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5490					parser->user_cb_size,
5491					asic_prop->sram_user_base_address,
5492					asic_prop->sram_end_address))
5493		return 0;
5494
5495	if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5496					parser->user_cb_size,
5497					asic_prop->dram_user_base_address,
5498					asic_prop->dram_end_address))
5499		return 0;
5500
5501	/* PMMU and HPMMU addresses are equal, check only one of them */
5502	if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5503					parser->user_cb_size,
5504					asic_prop->pmmu.start_addr,
5505					asic_prop->pmmu.end_addr))
5506		return 0;
5507
5508	dev_err(hdev->dev,
5509		"CB address 0x%px + 0x%x for internal QMAN is not valid\n",
5510		parser->user_cb, parser->user_cb_size);
5511
5512	return -EFAULT;
5513}
5514
5515static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
5516{
5517	struct gaudi_device *gaudi = hdev->asic_specific;
5518
5519	if (parser->queue_type == QUEUE_TYPE_INT)
5520		return gaudi_parse_cb_no_ext_queue(hdev, parser);
5521
5522	if (gaudi->hw_cap_initialized & HW_CAP_MMU)
5523		return gaudi_parse_cb_mmu(hdev, parser);
5524	else
5525		return gaudi_parse_cb_no_mmu(hdev, parser);
5526}
5527
5528static void gaudi_add_end_of_cb_packets(struct hl_device *hdev, void *kernel_address,
5529				u32 len, u32 original_len, u64 cq_addr, u32 cq_val,
5530				u32 msi_vec, bool eb)
5531{
5532	struct packet_msg_prot *cq_pkt;
5533	struct packet_nop *cq_padding;
5534	u64 msi_addr;
5535	u32 tmp;
5536
5537	cq_padding = kernel_address + original_len;
5538	cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2);
5539
5540	while ((void *)cq_padding < (void *)cq_pkt) {
5541		cq_padding->ctl = cpu_to_le32(FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_NOP));
5542		cq_padding++;
5543	}
5544
5545	tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5546	tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5547
5548	if (eb)
5549		tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5550
5551	cq_pkt->ctl = cpu_to_le32(tmp);
5552	cq_pkt->value = cpu_to_le32(cq_val);
5553	cq_pkt->addr = cpu_to_le64(cq_addr);
5554
5555	cq_pkt++;
5556
5557	tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5558	tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5559	cq_pkt->ctl = cpu_to_le32(tmp);
5560	cq_pkt->value = cpu_to_le32(1);
5561	msi_addr = hdev->pdev ? mmPCIE_CORE_MSI_REQ : mmPCIE_MSI_INTR_0 + msi_vec * 4;
5562	cq_pkt->addr = cpu_to_le64(CFG_BASE + msi_addr);
5563}
5564
5565static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val)
5566{
5567	WREG32(mmCPU_IF_EQ_RD_OFFS, val);
5568}
5569
5570static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
5571					u32 size, u64 val)
5572{
5573	struct packet_lin_dma *lin_dma_pkt;
5574	struct hl_cs_job *job;
5575	u32 cb_size, ctl, err_cause;
5576	struct hl_cb *cb;
5577	int rc;
5578
5579	cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
5580	if (!cb)
5581		return -EFAULT;
5582
5583	lin_dma_pkt = cb->kernel_address;
5584	memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
5585	cb_size = sizeof(*lin_dma_pkt);
5586
5587	ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
5588	ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);
5589	ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
5590	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5591	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5592
5593	lin_dma_pkt->ctl = cpu_to_le32(ctl);
5594	lin_dma_pkt->src_addr = cpu_to_le64(val);
5595	lin_dma_pkt->dst_addr |= cpu_to_le64(addr);
5596	lin_dma_pkt->tsize = cpu_to_le32(size);
5597
5598	job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5599	if (!job) {
5600		dev_err(hdev->dev, "Failed to allocate a new job\n");
5601		rc = -ENOMEM;
5602		goto release_cb;
5603	}
5604
5605	/* Verify DMA is OK */
5606	err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5607	if (err_cause && !hdev->init_done) {
5608		dev_dbg(hdev->dev,
5609			"Clearing DMA0 engine from errors (cause 0x%x)\n",
5610			err_cause);
5611		WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5612	}
5613
5614	job->id = 0;
5615	job->user_cb = cb;
5616	atomic_inc(&job->user_cb->cs_cnt);
5617	job->user_cb_size = cb_size;
5618	job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5619	job->patched_cb = job->user_cb;
5620	job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
5621
5622	hl_debugfs_add_job(hdev, job);
5623
5624	rc = gaudi_send_job_on_qman0(hdev, job);
5625	hl_debugfs_remove_job(hdev, job);
5626	kfree(job);
5627	atomic_dec(&cb->cs_cnt);
5628
5629	/* Verify DMA is OK */
5630	err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5631	if (err_cause) {
5632		dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
5633		rc = -EIO;
5634		if (!hdev->init_done) {
5635			dev_dbg(hdev->dev,
5636				"Clearing DMA0 engine from errors (cause 0x%x)\n",
5637				err_cause);
5638			WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5639		}
5640	}
5641
5642release_cb:
5643	hl_cb_put(cb);
5644	hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
5645
5646	return rc;
5647}
5648
5649static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
5650					u32 num_regs, u32 val)
5651{
5652	struct packet_msg_long *pkt;
5653	struct hl_cs_job *job;
5654	u32 cb_size, ctl;
5655	struct hl_cb *cb;
5656	int i, rc;
5657
5658	cb_size = (sizeof(*pkt) * num_regs) + sizeof(struct packet_msg_prot);
5659
5660	if (cb_size > SZ_2M) {
5661		dev_err(hdev->dev, "CB size must be smaller than %uMB", SZ_2M);
5662		return -ENOMEM;
5663	}
5664
5665	cb = hl_cb_kernel_create(hdev, cb_size, false);
5666	if (!cb)
5667		return -EFAULT;
5668
5669	pkt = cb->kernel_address;
5670
5671	ctl = FIELD_PREP(GAUDI_PKT_LONG_CTL_OP_MASK, 0); /* write the value */
5672	ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_LONG);
5673	ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5674	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5675	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5676
5677	for (i = 0; i < num_regs ; i++, pkt++) {
5678		pkt->ctl = cpu_to_le32(ctl);
5679		pkt->value = cpu_to_le32(val);
5680		pkt->addr = cpu_to_le64(reg_base + (i * 4));
5681	}
5682
5683	job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5684	if (!job) {
5685		dev_err(hdev->dev, "Failed to allocate a new job\n");
5686		rc = -ENOMEM;
5687		goto release_cb;
5688	}
5689
5690	job->id = 0;
5691	job->user_cb = cb;
5692	atomic_inc(&job->user_cb->cs_cnt);
5693	job->user_cb_size = cb_size;
5694	job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5695	job->patched_cb = job->user_cb;
5696	job->job_cb_size = cb_size;
5697
5698	hl_debugfs_add_job(hdev, job);
5699
5700	rc = gaudi_send_job_on_qman0(hdev, job);
5701	hl_debugfs_remove_job(hdev, job);
5702	kfree(job);
5703	atomic_dec(&cb->cs_cnt);
5704
5705release_cb:
5706	hl_cb_put(cb);
5707	hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
5708
5709	return rc;
5710}
5711
5712static int gaudi_restore_sm_registers(struct hl_device *hdev)
5713{
5714	u64 base_addr;
5715	u32 num_regs;
5716	int rc;
5717
5718	base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5719	num_regs = NUM_OF_SOB_IN_BLOCK;
5720	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5721	if (rc) {
5722		dev_err(hdev->dev, "failed resetting SM registers");
5723		return -ENOMEM;
5724	}
5725
5726	base_addr = CFG_BASE +  mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0;
5727	num_regs = NUM_OF_SOB_IN_BLOCK;
5728	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5729	if (rc) {
5730		dev_err(hdev->dev, "failed resetting SM registers");
5731		return -ENOMEM;
5732	}
5733
5734	base_addr = CFG_BASE +  mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5735	num_regs = NUM_OF_SOB_IN_BLOCK;
5736	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5737	if (rc) {
5738		dev_err(hdev->dev, "failed resetting SM registers");
5739		return -ENOMEM;
5740	}
5741
5742	base_addr = CFG_BASE +  mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0;
5743	num_regs = NUM_OF_MONITORS_IN_BLOCK;
5744	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5745	if (rc) {
5746		dev_err(hdev->dev, "failed resetting SM registers");
5747		return -ENOMEM;
5748	}
5749
5750	base_addr = CFG_BASE +  mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0;
5751	num_regs = NUM_OF_MONITORS_IN_BLOCK;
5752	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5753	if (rc) {
5754		dev_err(hdev->dev, "failed resetting SM registers");
5755		return -ENOMEM;
5756	}
5757
5758	base_addr = CFG_BASE +  mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0;
5759	num_regs = NUM_OF_MONITORS_IN_BLOCK;
5760	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5761	if (rc) {
5762		dev_err(hdev->dev, "failed resetting SM registers");
5763		return -ENOMEM;
5764	}
5765
5766	base_addr = CFG_BASE +  mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
5767			(GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4);
5768	num_regs = NUM_OF_SOB_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT;
5769	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5770	if (rc) {
5771		dev_err(hdev->dev, "failed resetting SM registers");
5772		return -ENOMEM;
5773	}
5774
5775	base_addr = CFG_BASE +  mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 +
5776			(GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4);
5777	num_regs = NUM_OF_MONITORS_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_MONITOR;
5778	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5779	if (rc) {
5780		dev_err(hdev->dev, "failed resetting SM registers");
5781		return -ENOMEM;
5782	}
5783
5784	return 0;
5785}
5786
5787static void gaudi_restore_dma_registers(struct hl_device *hdev)
5788{
5789	u32 sob_delta = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 -
5790			mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5791	int i;
5792
5793	for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
5794		u64 sob_addr = CFG_BASE +
5795				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 +
5796				(i * sob_delta);
5797		u32 dma_offset = i * DMA_CORE_OFFSET;
5798
5799		WREG32(mmDMA0_CORE_WR_COMP_ADDR_LO + dma_offset,
5800				lower_32_bits(sob_addr));
5801		WREG32(mmDMA0_CORE_WR_COMP_ADDR_HI + dma_offset,
5802				upper_32_bits(sob_addr));
5803		WREG32(mmDMA0_CORE_WR_COMP_WDATA + dma_offset, 0x80000001);
5804
5805		/* For DMAs 2-7, need to restore WR_AWUSER_31_11 as it can be
5806		 * modified by the user for SRAM reduction
5807		 */
5808		if (i > 1)
5809			WREG32(mmDMA0_CORE_WR_AWUSER_31_11 + dma_offset,
5810								0x00000001);
5811	}
5812}
5813
5814static void gaudi_restore_qm_registers(struct hl_device *hdev)
5815{
5816	u32 qman_offset;
5817	int i;
5818
5819	for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
5820		qman_offset = i * DMA_QMAN_OFFSET;
5821		WREG32(mmDMA0_QM_ARB_CFG_0 + qman_offset, 0);
5822	}
5823
5824	for (i = 0 ; i < MME_NUMBER_OF_MASTER_ENGINES ; i++) {
5825		qman_offset = i * (mmMME2_QM_BASE - mmMME0_QM_BASE);
5826		WREG32(mmMME0_QM_ARB_CFG_0 + qman_offset, 0);
5827	}
5828
5829	for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
5830		qman_offset = i * TPC_QMAN_OFFSET;
5831		WREG32(mmTPC0_QM_ARB_CFG_0 + qman_offset, 0);
5832	}
5833
5834	for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
5835		qman_offset = (i >> 1) * NIC_MACRO_QMAN_OFFSET +
5836				(i & 0x1) * NIC_ENGINE_QMAN_OFFSET;
5837		WREG32(mmNIC0_QM0_ARB_CFG_0 + qman_offset, 0);
5838	}
5839}
5840
5841static int gaudi_restore_user_registers(struct hl_device *hdev)
5842{
5843	int rc;
5844
5845	rc = gaudi_restore_sm_registers(hdev);
5846	if (rc)
5847		return rc;
5848
5849	gaudi_restore_dma_registers(hdev);
5850	gaudi_restore_qm_registers(hdev);
5851
5852	return 0;
5853}
5854
5855static int gaudi_context_switch(struct hl_device *hdev, u32 asid)
5856{
5857	return 0;
5858}
5859
5860static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev)
5861{
5862	u32 size = hdev->asic_prop.mmu_pgt_size +
5863			hdev->asic_prop.mmu_cache_mng_size;
5864	struct gaudi_device *gaudi = hdev->asic_specific;
5865	u64 addr = hdev->asic_prop.mmu_pgt_addr;
5866
5867	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
5868		return 0;
5869
5870	return gaudi_memset_device_memory(hdev, addr, size, 0);
5871}
5872
5873static void gaudi_restore_phase_topology(struct hl_device *hdev)
5874{
5875
5876}
5877
5878static int gaudi_dma_core_transfer(struct hl_device *hdev, int dma_id, u64 addr,
5879					u32 size_to_dma, dma_addr_t dma_addr)
5880{
5881	u32 err_cause, val;
5882	u64 dma_offset;
5883	int rc;
5884
5885	dma_offset = dma_id * DMA_CORE_OFFSET;
5886
5887	WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, lower_32_bits(addr));
5888	WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, upper_32_bits(addr));
5889	WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset, lower_32_bits(dma_addr));
5890	WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset, upper_32_bits(dma_addr));
5891	WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset, size_to_dma);
5892	WREG32(mmDMA0_CORE_COMMIT + dma_offset,
5893			(1 << DMA0_CORE_COMMIT_LIN_SHIFT));
5894
5895	rc = hl_poll_timeout(
5896		hdev,
5897		mmDMA0_CORE_STS0 + dma_offset,
5898		val,
5899		((val & DMA0_CORE_STS0_BUSY_MASK) == 0),
5900		0,
5901		1000000);
5902
5903	if (rc) {
5904		dev_err(hdev->dev,
5905			"DMA %d timed-out during reading of 0x%llx\n",
5906			dma_id, addr);
5907		return -EIO;
5908	}
5909
5910	/* Verify DMA is OK */
5911	err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
5912	if (err_cause) {
5913		dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
5914		dev_dbg(hdev->dev,
5915			"Clearing DMA0 engine from errors (cause 0x%x)\n",
5916			err_cause);
5917		WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
5918
5919		return -EIO;
5920	}
5921
5922	return 0;
5923}
5924
5925static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size,
5926				void *blob_addr)
5927{
5928	u32 dma_core_sts0, err_cause, cfg1, size_left, pos, size_to_dma;
5929	u32 qm_glbl_sts0, qm_cgm_sts;
5930	u64 dma_offset, qm_offset;
5931	dma_addr_t dma_addr;
5932	void *kernel_addr;
5933	bool is_eng_idle;
5934	int rc = 0, dma_id;
5935
5936	kernel_addr = hl_asic_dma_alloc_coherent(hdev, SZ_2M, &dma_addr, GFP_KERNEL | __GFP_ZERO);
5937
5938	if (!kernel_addr)
5939		return -ENOMEM;
5940
5941	hdev->asic_funcs->hw_queues_lock(hdev);
5942
5943	dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
5944	dma_offset = dma_id * DMA_CORE_OFFSET;
5945	qm_offset = dma_id * DMA_QMAN_OFFSET;
5946	dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
5947	qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset);
5948	qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset);
5949	is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
5950		      IS_DMA_IDLE(dma_core_sts0);
5951
5952	if (!is_eng_idle) {
5953		dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
5954		dma_offset = dma_id * DMA_CORE_OFFSET;
5955		qm_offset = dma_id * DMA_QMAN_OFFSET;
5956		dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
5957		qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset);
5958		qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset);
5959		is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
5960			      IS_DMA_IDLE(dma_core_sts0);
5961
5962		if (!is_eng_idle) {
5963			dev_err_ratelimited(hdev->dev,
5964				"Can't read via DMA because it is BUSY\n");
5965			rc = -EAGAIN;
5966			goto out;
5967		}
5968	}
5969
5970	cfg1 = RREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset);
5971	WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset,
5972			0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
5973
5974	/* TODO: remove this by mapping the DMA temporary buffer to the MMU
5975	 * using the compute ctx ASID, if exists. If not, use the kernel ctx
5976	 * ASID
5977	 */
5978	WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
5979
5980	/* Verify DMA is OK */
5981	err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
5982	if (err_cause) {
5983		dev_dbg(hdev->dev,
5984			"Clearing DMA0 engine from errors (cause 0x%x)\n",
5985			err_cause);
5986		WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
5987	}
5988
5989	pos = 0;
5990	size_left = size;
5991	size_to_dma = SZ_2M;
5992
5993	while (size_left > 0) {
5994
5995		if (size_left < SZ_2M)
5996			size_to_dma = size_left;
5997
5998		rc = gaudi_dma_core_transfer(hdev, dma_id, addr, size_to_dma,
5999						dma_addr);
6000		if (rc)
6001			break;
6002
6003		memcpy(blob_addr + pos, kernel_addr, size_to_dma);
6004
6005		if (size_left <= SZ_2M)
6006			break;
6007
6008		pos += SZ_2M;
6009		addr += SZ_2M;
6010		size_left -= SZ_2M;
6011	}
6012
6013	/* TODO: remove this by mapping the DMA temporary buffer to the MMU
6014	 * using the compute ctx ASID, if exists. If not, use the kernel ctx
6015	 * ASID
6016	 */
6017	WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
6018			~BIT(DMA0_CORE_PROT_VAL_SHIFT));
6019
6020	WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset, cfg1);
6021
6022out:
6023	hdev->asic_funcs->hw_queues_unlock(hdev);
6024
6025	hl_asic_dma_free_coherent(hdev, SZ_2M, kernel_addr, dma_addr);
6026
6027	return rc;
6028}
6029
6030static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr)
6031{
6032	struct gaudi_device *gaudi = hdev->asic_specific;
6033
6034	if (hdev->reset_info.hard_reset_pending)
6035		return U64_MAX;
6036
6037	return readq(hdev->pcie_bar[HBM_BAR_ID] +
6038			(addr - gaudi->hbm_bar_cur_addr));
6039}
6040
6041static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val)
6042{
6043	struct gaudi_device *gaudi = hdev->asic_specific;
6044
6045	if (hdev->reset_info.hard_reset_pending)
6046		return;
6047
6048	writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
6049			(addr - gaudi->hbm_bar_cur_addr));
6050}
6051
6052void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)
6053{
6054	/* mask to zero the MMBP and ASID bits */
6055	WREG32_AND(reg, ~0x7FF);
6056	WREG32_OR(reg, asid);
6057}
6058
6059static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)
6060{
6061	struct gaudi_device *gaudi = hdev->asic_specific;
6062
6063	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6064		return;
6065
6066	if (asid & ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK) {
6067		dev_crit(hdev->dev, "asid %u is too big\n", asid);
6068		return;
6069	}
6070
6071	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6072	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6073	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6074	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6075	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6076
6077	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6078	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6079	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6080	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6081	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6082
6083	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6084	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6085	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6086	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6087	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6088
6089	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6090	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6091	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6092	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6093	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6094
6095	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6096	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6097	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6098	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6099	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6100
6101	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6102	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6103	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6104	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6105	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6106
6107	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6108	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6109	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6110	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6111	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6112
6113	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6114	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6115	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6116	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6117	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6118
6119	gaudi_mmu_prepare_reg(hdev, mmDMA0_CORE_NON_SECURE_PROPS, asid);
6120	gaudi_mmu_prepare_reg(hdev, mmDMA1_CORE_NON_SECURE_PROPS, asid);
6121	gaudi_mmu_prepare_reg(hdev, mmDMA2_CORE_NON_SECURE_PROPS, asid);
6122	gaudi_mmu_prepare_reg(hdev, mmDMA3_CORE_NON_SECURE_PROPS, asid);
6123	gaudi_mmu_prepare_reg(hdev, mmDMA4_CORE_NON_SECURE_PROPS, asid);
6124	gaudi_mmu_prepare_reg(hdev, mmDMA5_CORE_NON_SECURE_PROPS, asid);
6125	gaudi_mmu_prepare_reg(hdev, mmDMA6_CORE_NON_SECURE_PROPS, asid);
6126	gaudi_mmu_prepare_reg(hdev, mmDMA7_CORE_NON_SECURE_PROPS, asid);
6127
6128	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6129	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6130	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6131	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6132	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6133	gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_ARUSER_LO, asid);
6134	gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_AWUSER_LO, asid);
6135
6136	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6137	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6138	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6139	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6140	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6141	gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_ARUSER_LO, asid);
6142	gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_AWUSER_LO, asid);
6143
6144	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6145	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6146	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6147	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6148	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6149	gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_ARUSER_LO, asid);
6150	gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_AWUSER_LO, asid);
6151
6152	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6153	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6154	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6155	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6156	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6157	gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_ARUSER_LO, asid);
6158	gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_AWUSER_LO, asid);
6159
6160	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6161	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6162	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6163	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6164	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6165	gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_ARUSER_LO, asid);
6166	gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_AWUSER_LO, asid);
6167
6168	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6169	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6170	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6171	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6172	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6173	gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_ARUSER_LO, asid);
6174	gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_AWUSER_LO, asid);
6175
6176	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6177	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6178	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6179	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6180	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6181	gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_ARUSER_LO, asid);
6182	gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_AWUSER_LO, asid);
6183
6184	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6185	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6186	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6187	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6188	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6189	gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_ARUSER_LO, asid);
6190	gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_AWUSER_LO, asid);
6191
6192	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6193	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6194	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6195	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6196	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6197	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6198	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6199	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6200	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6201	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6202
6203	gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER0, asid);
6204	gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER1, asid);
6205	gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER0, asid);
6206	gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER1, asid);
6207	gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER0, asid);
6208	gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER1, asid);
6209	gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER0, asid);
6210	gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER1, asid);
6211	gaudi_mmu_prepare_reg(hdev, mmMME0_ACC_WBC, asid);
6212	gaudi_mmu_prepare_reg(hdev, mmMME1_ACC_WBC, asid);
6213	gaudi_mmu_prepare_reg(hdev, mmMME2_ACC_WBC, asid);
6214	gaudi_mmu_prepare_reg(hdev, mmMME3_ACC_WBC, asid);
6215
6216	if (gaudi->hw_cap_initialized & HW_CAP_NIC0) {
6217		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_0,
6218				asid);
6219		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_1,
6220				asid);
6221		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_2,
6222				asid);
6223		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_3,
6224				asid);
6225		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_4,
6226				asid);
6227	}
6228
6229	if (gaudi->hw_cap_initialized & HW_CAP_NIC1) {
6230		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_0,
6231				asid);
6232		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_1,
6233				asid);
6234		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_2,
6235				asid);
6236		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_3,
6237				asid);
6238		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_4,
6239				asid);
6240	}
6241
6242	if (gaudi->hw_cap_initialized & HW_CAP_NIC2) {
6243		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_0,
6244				asid);
6245		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_1,
6246				asid);
6247		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_2,
6248				asid);
6249		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_3,
6250				asid);
6251		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_4,
6252				asid);
6253	}
6254
6255	if (gaudi->hw_cap_initialized & HW_CAP_NIC3) {
6256		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_0,
6257				asid);
6258		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_1,
6259				asid);
6260		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_2,
6261				asid);
6262		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_3,
6263				asid);
6264		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_4,
6265				asid);
6266	}
6267
6268	if (gaudi->hw_cap_initialized & HW_CAP_NIC4) {
6269		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_0,
6270				asid);
6271		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_1,
6272				asid);
6273		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_2,
6274				asid);
6275		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_3,
6276				asid);
6277		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_4,
6278				asid);
6279	}
6280
6281	if (gaudi->hw_cap_initialized & HW_CAP_NIC5) {
6282		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_0,
6283				asid);
6284		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_1,
6285				asid);
6286		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_2,
6287				asid);
6288		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_3,
6289				asid);
6290		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_4,
6291				asid);
6292	}
6293
6294	if (gaudi->hw_cap_initialized & HW_CAP_NIC6) {
6295		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_0,
6296				asid);
6297		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_1,
6298				asid);
6299		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_2,
6300				asid);
6301		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_3,
6302				asid);
6303		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_4,
6304				asid);
6305	}
6306
6307	if (gaudi->hw_cap_initialized & HW_CAP_NIC7) {
6308		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_0,
6309				asid);
6310		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_1,
6311				asid);
6312		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_2,
6313				asid);
6314		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_3,
6315				asid);
6316		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_4,
6317				asid);
6318	}
6319
6320	if (gaudi->hw_cap_initialized & HW_CAP_NIC8) {
6321		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_0,
6322				asid);
6323		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_1,
6324				asid);
6325		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_2,
6326				asid);
6327		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_3,
6328				asid);
6329		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_4,
6330				asid);
6331	}
6332
6333	if (gaudi->hw_cap_initialized & HW_CAP_NIC9) {
6334		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_0,
6335				asid);
6336		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_1,
6337				asid);
6338		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_2,
6339				asid);
6340		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_3,
6341				asid);
6342		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_4,
6343				asid);
6344	}
6345
6346	gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_ARUSER, asid);
6347	gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_AWUSER, asid);
6348}
6349
6350static int gaudi_send_job_on_qman0(struct hl_device *hdev,
6351		struct hl_cs_job *job)
6352{
6353	struct packet_msg_prot *fence_pkt;
6354	u32 *fence_ptr;
6355	dma_addr_t fence_dma_addr;
6356	struct hl_cb *cb;
6357	u32 tmp, timeout, dma_offset;
6358	int rc;
6359
6360	if (hdev->pldm)
6361		timeout = GAUDI_PLDM_QMAN0_TIMEOUT_USEC;
6362	else
6363		timeout = HL_DEVICE_TIMEOUT_USEC;
6364
6365	fence_ptr = hl_asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, &fence_dma_addr);
6366	if (!fence_ptr) {
6367		dev_err(hdev->dev,
6368			"Failed to allocate fence memory for QMAN0\n");
6369		return -ENOMEM;
6370	}
6371
6372	cb = job->patched_cb;
6373
6374	fence_pkt = cb->kernel_address +
6375			job->job_cb_size - sizeof(struct packet_msg_prot);
6376
6377	tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
6378	tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
6379	tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
6380
6381	fence_pkt->ctl = cpu_to_le32(tmp);
6382	fence_pkt->value = cpu_to_le32(GAUDI_QMAN0_FENCE_VAL);
6383	fence_pkt->addr = cpu_to_le64(fence_dma_addr);
6384
6385	dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET;
6386
6387	WREG32(mmDMA0_CORE_PROT + dma_offset,
6388			BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT) | BIT(DMA0_CORE_PROT_VAL_SHIFT));
6389
6390	rc = hl_hw_queue_send_cb_no_cmpl(hdev, GAUDI_QUEUE_ID_DMA_0_0,
6391					job->job_cb_size, cb->bus_address);
6392	if (rc) {
6393		dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);
6394		goto free_fence_ptr;
6395	}
6396
6397	rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
6398				(tmp == GAUDI_QMAN0_FENCE_VAL), 1000,
6399				timeout, true);
6400
6401	hl_hw_queue_inc_ci_kernel(hdev, GAUDI_QUEUE_ID_DMA_0_0);
6402
6403	if (rc == -ETIMEDOUT) {
6404		dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);
6405		goto free_fence_ptr;
6406	}
6407
6408free_fence_ptr:
6409	WREG32(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT));
6410
6411	hl_asic_dma_pool_free(hdev, (void *) fence_ptr, fence_dma_addr);
6412	return rc;
6413}
6414
6415static void gaudi_get_event_desc(u16 event_type, char *desc, size_t size)
6416{
6417	if (event_type >= GAUDI_EVENT_SIZE)
6418		goto event_not_supported;
6419
6420	if (!gaudi_irq_map_table[event_type].valid)
6421		goto event_not_supported;
6422
6423	snprintf(desc, size, gaudi_irq_map_table[event_type].name);
6424
6425	return;
6426
6427event_not_supported:
6428	snprintf(desc, size, "N/A");
6429}
6430
6431static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev, u32 x_y,
6432							bool is_write, u16 *engine_id_1,
6433							u16 *engine_id_2)
6434{
6435	u32 dma_id[2], dma_offset, err_cause[2], mask, i;
6436
6437	mask = is_write ? DMA0_CORE_ERR_CAUSE_HBW_WR_ERR_MASK :
6438				DMA0_CORE_ERR_CAUSE_HBW_RD_ERR_MASK;
6439
6440	switch (x_y) {
6441	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6442	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6443		dma_id[0] = 0;
6444		dma_id[1] = 2;
6445		break;
6446	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6447	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6448		dma_id[0] = 1;
6449		dma_id[1] = 3;
6450		break;
6451	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6452	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6453		dma_id[0] = 4;
6454		dma_id[1] = 6;
6455		break;
6456	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6457	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6458		dma_id[0] = 5;
6459		dma_id[1] = 7;
6460		break;
6461	default:
6462		goto unknown_initiator;
6463	}
6464
6465	for (i = 0 ; i < 2 ; i++) {
6466		dma_offset = dma_id[i] * DMA_CORE_OFFSET;
6467		err_cause[i] = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6468	}
6469
6470	switch (x_y) {
6471	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6472	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6473		if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6474			*engine_id_1 = GAUDI_ENGINE_ID_DMA_0;
6475			return "DMA0";
6476		} else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6477			*engine_id_1 = GAUDI_ENGINE_ID_DMA_2;
6478			return "DMA2";
6479		} else {
6480			*engine_id_1 = GAUDI_ENGINE_ID_DMA_0;
6481			*engine_id_2 = GAUDI_ENGINE_ID_DMA_2;
6482			return "DMA0 or DMA2";
6483		}
6484	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6485	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6486		if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6487			*engine_id_1 = GAUDI_ENGINE_ID_DMA_1;
6488			return "DMA1";
6489		} else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6490			*engine_id_1 = GAUDI_ENGINE_ID_DMA_3;
6491			return "DMA3";
6492		} else {
6493			*engine_id_1 = GAUDI_ENGINE_ID_DMA_1;
6494			*engine_id_2 = GAUDI_ENGINE_ID_DMA_3;
6495			return "DMA1 or DMA3";
6496		}
6497	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6498	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6499		if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6500			*engine_id_1 = GAUDI_ENGINE_ID_DMA_4;
6501			return "DMA4";
6502		} else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6503			*engine_id_1 = GAUDI_ENGINE_ID_DMA_6;
6504			return "DMA6";
6505		} else {
6506			*engine_id_1 = GAUDI_ENGINE_ID_DMA_4;
6507			*engine_id_2 = GAUDI_ENGINE_ID_DMA_6;
6508			return "DMA4 or DMA6";
6509		}
6510	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6511	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6512		if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6513			*engine_id_1 = GAUDI_ENGINE_ID_DMA_5;
6514			return "DMA5";
6515		} else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6516			*engine_id_1 = GAUDI_ENGINE_ID_DMA_7;
6517			return "DMA7";
6518		} else {
6519			*engine_id_1 = GAUDI_ENGINE_ID_DMA_5;
6520			*engine_id_2 = GAUDI_ENGINE_ID_DMA_7;
6521			return "DMA5 or DMA7";
6522		}
6523	}
6524
6525unknown_initiator:
6526	return "unknown initiator";
6527}
6528
6529static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev, bool is_write,
6530							u16 *engine_id_1, u16 *engine_id_2)
6531{
6532	u32 val, x_y, axi_id;
6533
6534	val = is_write ? RREG32(mmMMU_UP_RAZWI_WRITE_ID) :
6535				RREG32(mmMMU_UP_RAZWI_READ_ID);
6536	x_y = val & ((RAZWI_INITIATOR_Y_MASK << RAZWI_INITIATOR_Y_SHIFT) |
6537			(RAZWI_INITIATOR_X_MASK << RAZWI_INITIATOR_X_SHIFT));
6538	axi_id = val & (RAZWI_INITIATOR_AXI_ID_MASK <<
6539			RAZWI_INITIATOR_AXI_ID_SHIFT);
6540
6541	switch (x_y) {
6542	case RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0:
6543		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6544			*engine_id_1 = GAUDI_ENGINE_ID_TPC_0;
6545			return "TPC0";
6546		}
6547		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
6548			*engine_id_1 = GAUDI_ENGINE_ID_NIC_0;
6549			return "NIC0";
6550		}
6551		break;
6552	case RAZWI_INITIATOR_ID_X_Y_TPC1:
6553		*engine_id_1 = GAUDI_ENGINE_ID_TPC_1;
6554		return "TPC1";
6555	case RAZWI_INITIATOR_ID_X_Y_MME0_0:
6556	case RAZWI_INITIATOR_ID_X_Y_MME0_1:
6557		*engine_id_1 = GAUDI_ENGINE_ID_MME_0;
6558		return "MME0";
6559	case RAZWI_INITIATOR_ID_X_Y_MME1_0:
6560	case RAZWI_INITIATOR_ID_X_Y_MME1_1:
6561		*engine_id_1 = GAUDI_ENGINE_ID_MME_1;
6562		return "MME1";
6563	case RAZWI_INITIATOR_ID_X_Y_TPC2:
6564		*engine_id_1 = GAUDI_ENGINE_ID_TPC_2;
6565		return "TPC2";
6566	case RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC:
6567		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6568			*engine_id_1 = GAUDI_ENGINE_ID_TPC_3;
6569			return "TPC3";
6570		}
6571		/* PCI, CPU or PSOC does not have engine id*/
6572		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PCI))
6573			return "PCI";
6574		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_CPU))
6575			return "CPU";
6576		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PSOC))
6577			return "PSOC";
6578		break;
6579	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6580	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6581	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6582	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6583	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6584	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6585	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6586	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6587		return gaudi_get_razwi_initiator_dma_name(hdev, x_y, is_write,
6588				engine_id_1, engine_id_2);
6589	case RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2:
6590		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6591			*engine_id_1 = GAUDI_ENGINE_ID_TPC_4;
6592			return "TPC4";
6593		}
6594		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
6595			*engine_id_1 = GAUDI_ENGINE_ID_NIC_1;
6596			return "NIC1";
6597		}
6598		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) {
6599			*engine_id_1 = GAUDI_ENGINE_ID_NIC_2;
6600			return "NIC2";
6601		}
6602		break;
6603	case RAZWI_INITIATOR_ID_X_Y_TPC5:
6604		*engine_id_1 = GAUDI_ENGINE_ID_TPC_5;
6605		return "TPC5";
6606	case RAZWI_INITIATOR_ID_X_Y_MME2_0:
6607	case RAZWI_INITIATOR_ID_X_Y_MME2_1:
6608		*engine_id_1 = GAUDI_ENGINE_ID_MME_2;
6609		return "MME2";
6610	case RAZWI_INITIATOR_ID_X_Y_MME3_0:
6611	case RAZWI_INITIATOR_ID_X_Y_MME3_1:
6612		*engine_id_1 = GAUDI_ENGINE_ID_MME_3;
6613		return "MME3";
6614	case RAZWI_INITIATOR_ID_X_Y_TPC6:
6615		*engine_id_1 = GAUDI_ENGINE_ID_TPC_6;
6616		return "TPC6";
6617	case RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5:
6618		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6619			*engine_id_1 = GAUDI_ENGINE_ID_TPC_7;
6620			return "TPC7";
6621		}
6622		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
6623			*engine_id_1 = GAUDI_ENGINE_ID_NIC_4;
6624			return "NIC4";
6625		}
6626		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) {
6627			*engine_id_1 = GAUDI_ENGINE_ID_NIC_5;
6628			return "NIC5";
6629		}
6630		break;
6631	default:
6632		break;
6633	}
6634
6635	dev_err(hdev->dev,
6636		"Unknown RAZWI initiator ID 0x%x [Y=%d, X=%d, AXI_ID=%d]\n",
6637		val,
6638		(val >> RAZWI_INITIATOR_Y_SHIFT) & RAZWI_INITIATOR_Y_MASK,
6639		(val >> RAZWI_INITIATOR_X_SHIFT) & RAZWI_INITIATOR_X_MASK,
6640		(val >> RAZWI_INITIATOR_AXI_ID_SHIFT) &
6641			RAZWI_INITIATOR_AXI_ID_MASK);
6642
6643	return "unknown initiator";
6644}
6645
6646static void gaudi_print_and_get_razwi_info(struct hl_device *hdev, u16 *engine_id_1,
6647						u16 *engine_id_2, bool *is_read, bool *is_write)
6648{
6649
6650	if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD)) {
6651		dev_err_ratelimited(hdev->dev,
6652			"RAZWI event caused by illegal write of %s\n",
6653			gaudi_get_razwi_initiator_name(hdev, true, engine_id_1, engine_id_2));
6654		WREG32(mmMMU_UP_RAZWI_WRITE_VLD, 0);
6655		*is_write = true;
6656	}
6657
6658	if (RREG32(mmMMU_UP_RAZWI_READ_VLD)) {
6659		dev_err_ratelimited(hdev->dev,
6660			"RAZWI event caused by illegal read of %s\n",
6661			gaudi_get_razwi_initiator_name(hdev, false, engine_id_1, engine_id_2));
6662		WREG32(mmMMU_UP_RAZWI_READ_VLD, 0);
6663		*is_read = true;
6664	}
6665}
6666
6667static void gaudi_print_and_get_mmu_error_info(struct hl_device *hdev, u64 *addr, u64 *event_mask)
6668{
6669	struct gaudi_device *gaudi = hdev->asic_specific;
6670	u32 val;
6671
6672	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6673		return;
6674
6675	val = RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE);
6676	if (val & MMU_UP_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {
6677		*addr = val & MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK;
6678		*addr <<= 32;
6679		*addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA);
6680
6681		dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n", *addr);
6682		hl_handle_page_fault(hdev, *addr, 0, true, event_mask);
6683
6684		WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0);
6685	}
6686
6687	val = RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE);
6688	if (val & MMU_UP_ACCESS_ERROR_CAPTURE_ENTRY_VALID_MASK) {
6689		*addr = val & MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK;
6690		*addr <<= 32;
6691		*addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA);
6692
6693		dev_err_ratelimited(hdev->dev, "MMU access error on va 0x%llx\n", *addr);
6694
6695		WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE, 0);
6696	}
6697}
6698
6699/*
6700 *  +-------------------+------------------------------------------------------+
6701 *  | Configuration Reg |                     Description                      |
6702 *  |      Address      |                                                      |
6703 *  +-------------------+------------------------------------------------------+
6704 *  |  0xF30 - 0xF3F    |ECC single error indication (1 bit per memory wrapper)|
6705 *  |                   |0xF30 memory wrappers 31:0 (MSB to LSB)               |
6706 *  |                   |0xF34 memory wrappers 63:32                           |
6707 *  |                   |0xF38 memory wrappers 95:64                           |
6708 *  |                   |0xF3C memory wrappers 127:96                          |
6709 *  +-------------------+------------------------------------------------------+
6710 *  |  0xF40 - 0xF4F    |ECC double error indication (1 bit per memory wrapper)|
6711 *  |                   |0xF40 memory wrappers 31:0 (MSB to LSB)               |
6712 *  |                   |0xF44 memory wrappers 63:32                           |
6713 *  |                   |0xF48 memory wrappers 95:64                           |
6714 *  |                   |0xF4C memory wrappers 127:96                          |
6715 *  +-------------------+------------------------------------------------------+
6716 */
6717static int gaudi_extract_ecc_info(struct hl_device *hdev,
6718		struct ecc_info_extract_params *params, u64 *ecc_address,
6719		u64 *ecc_syndrom, u8 *memory_wrapper_idx)
6720{
6721	u32 i, num_mem_regs, reg, err_bit;
6722	u64 err_addr, err_word = 0;
6723
6724	num_mem_regs = params->num_memories / 32 +
6725			((params->num_memories % 32) ? 1 : 0);
6726
6727	if (params->block_address >= CFG_BASE)
6728		params->block_address -= CFG_BASE;
6729
6730	if (params->derr)
6731		err_addr = params->block_address + GAUDI_ECC_DERR0_OFFSET;
6732	else
6733		err_addr = params->block_address + GAUDI_ECC_SERR0_OFFSET;
6734
6735	/* Set invalid wrapper index */
6736	*memory_wrapper_idx = 0xFF;
6737
6738	/* Iterate through memory wrappers, a single bit must be set */
6739	for (i = 0 ; i < num_mem_regs ; i++) {
6740		err_addr += i * 4;
6741		err_word = RREG32(err_addr);
6742		if (err_word) {
6743			err_bit = __ffs(err_word);
6744			*memory_wrapper_idx = err_bit + (32 * i);
6745			break;
6746		}
6747	}
6748
6749	if (*memory_wrapper_idx == 0xFF) {
6750		dev_err(hdev->dev, "ECC error information cannot be found\n");
6751		return -EINVAL;
6752	}
6753
6754	WREG32(params->block_address + GAUDI_ECC_MEM_SEL_OFFSET,
6755			*memory_wrapper_idx);
6756
6757	*ecc_address =
6758		RREG32(params->block_address + GAUDI_ECC_ADDRESS_OFFSET);
6759	*ecc_syndrom =
6760		RREG32(params->block_address + GAUDI_ECC_SYNDROME_OFFSET);
6761
6762	/* Clear error indication */
6763	reg = RREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET);
6764	if (params->derr)
6765		reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_DERR_MASK, 1);
6766	else
6767		reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_SERR_MASK, 1);
6768
6769	WREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET, reg);
6770
6771	return 0;
6772}
6773
6774/*
6775 * gaudi_queue_idx_dec - decrement queue index (pi/ci) and handle wrap
6776 *
6777 * @idx: the current pi/ci value
6778 * @q_len: the queue length (power of 2)
6779 *
6780 * @return the cyclically decremented index
6781 */
6782static inline u32 gaudi_queue_idx_dec(u32 idx, u32 q_len)
6783{
6784	u32 mask = q_len - 1;
6785
6786	/*
6787	 * modular decrement is equivalent to adding (queue_size -1)
6788	 * later we take LSBs to make sure the value is in the
6789	 * range [0, queue_len - 1]
6790	 */
6791	return (idx + q_len - 1) & mask;
6792}
6793
6794/**
6795 * gaudi_handle_sw_config_stream_data - print SW config stream data
6796 *
6797 * @hdev: pointer to the habanalabs device structure
6798 * @stream: the QMAN's stream
6799 * @qman_base: base address of QMAN registers block
6800 * @event_mask: mask of the last events occurred
6801 */
6802static void gaudi_handle_sw_config_stream_data(struct hl_device *hdev, u32 stream,
6803						u64 qman_base, u64 event_mask)
6804{
6805	u64 cq_ptr_lo, cq_ptr_hi, cq_tsize, cq_ptr;
6806	u32 cq_ptr_lo_off, size;
6807
6808	cq_ptr_lo_off = mmTPC0_QM_CQ_PTR_LO_1 - mmTPC0_QM_CQ_PTR_LO_0;
6809
6810	cq_ptr_lo = qman_base + (mmTPC0_QM_CQ_PTR_LO_0 - mmTPC0_QM_BASE) +
6811						stream * cq_ptr_lo_off;
6812	cq_ptr_hi = cq_ptr_lo +
6813				(mmTPC0_QM_CQ_PTR_HI_0 - mmTPC0_QM_CQ_PTR_LO_0);
6814	cq_tsize = cq_ptr_lo +
6815				(mmTPC0_QM_CQ_TSIZE_0 - mmTPC0_QM_CQ_PTR_LO_0);
6816
6817	cq_ptr = (((u64) RREG32(cq_ptr_hi)) << 32) | RREG32(cq_ptr_lo);
6818	size = RREG32(cq_tsize);
6819	dev_info(hdev->dev, "stop on err: stream: %u, addr: %#llx, size: %u\n",
6820							stream, cq_ptr, size);
6821
6822	if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) {
6823		hdev->captured_err_info.undef_opcode.cq_addr = cq_ptr;
6824		hdev->captured_err_info.undef_opcode.cq_size = size;
6825		hdev->captured_err_info.undef_opcode.stream_id = stream;
6826	}
6827}
6828
6829/**
6830 * gaudi_handle_last_pqes_on_err - print last PQEs on error
6831 *
6832 * @hdev: pointer to the habanalabs device structure
6833 * @qid_base: first QID of the QMAN (out of 4 streams)
6834 * @stream: the QMAN's stream
6835 * @qman_base: base address of QMAN registers block
6836 * @event_mask: mask of the last events occurred
6837 * @pr_sw_conf: if true print the SW config stream data (CQ PTR and SIZE)
6838 */
6839static void gaudi_handle_last_pqes_on_err(struct hl_device *hdev, u32 qid_base,
6840						u32 stream, u64 qman_base,
6841						u64 event_mask,
6842						bool pr_sw_conf)
6843{
6844	u32 ci, qm_ci_stream_off, queue_len;
6845	struct hl_hw_queue *q;
6846	u64 pq_ci, addr[PQ_FETCHER_CACHE_SIZE];
6847	int i;
6848
6849	q = &hdev->kernel_queues[qid_base + stream];
6850
6851	qm_ci_stream_off = mmTPC0_QM_PQ_CI_1 - mmTPC0_QM_PQ_CI_0;
6852	pq_ci = qman_base + (mmTPC0_QM_PQ_CI_0 - mmTPC0_QM_BASE) +
6853						stream * qm_ci_stream_off;
6854
6855	queue_len = (q->queue_type == QUEUE_TYPE_INT) ?
6856					q->int_queue_len : HL_QUEUE_LENGTH;
6857
6858	hdev->asic_funcs->hw_queues_lock(hdev);
6859
6860	if (pr_sw_conf)
6861		gaudi_handle_sw_config_stream_data(hdev, stream, qman_base, event_mask);
6862
6863	ci = RREG32(pq_ci);
6864
6865	/* we should start printing form ci -1 */
6866	ci = gaudi_queue_idx_dec(ci, queue_len);
6867	memset(addr, 0, sizeof(addr));
6868
6869	for (i = 0; i < PQ_FETCHER_CACHE_SIZE; i++) {
6870		struct hl_bd *bd;
6871		u32 len;
6872
6873		bd = q->kernel_address;
6874		bd += ci;
6875
6876		len = le32_to_cpu(bd->len);
6877		/* len 0 means uninitialized entry- break */
6878		if (!len)
6879			break;
6880
6881		addr[i] = le64_to_cpu(bd->ptr);
6882
6883		dev_info(hdev->dev, "stop on err PQE(stream %u): ci: %u, addr: %#llx, size: %u\n",
6884							stream, ci, addr[i], len);
6885
6886		/* get previous ci, wrap if needed */
6887		ci = gaudi_queue_idx_dec(ci, queue_len);
6888	}
6889
6890	if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) {
6891		struct undefined_opcode_info *undef_opcode = &hdev->captured_err_info.undef_opcode;
6892		u32 arr_idx = undef_opcode->cb_addr_streams_len;
6893
6894		if (arr_idx == 0) {
6895			undef_opcode->timestamp = ktime_get();
6896			undef_opcode->engine_id = gaudi_queue_id_to_engine_id[qid_base];
6897		}
6898
6899		memcpy(undef_opcode->cb_addr_streams[arr_idx], addr, sizeof(addr));
6900		undef_opcode->cb_addr_streams_len++;
6901	}
6902
6903	hdev->asic_funcs->hw_queues_unlock(hdev);
6904}
6905
6906/**
6907 * handle_qman_data_on_err - extract QMAN data on error
6908 *
6909 * @hdev: pointer to the habanalabs device structure
6910 * @qid_base: first QID of the QMAN (out of 4 streams)
6911 * @stream: the QMAN's stream
6912 * @qman_base: base address of QMAN registers block
6913 * @event_mask: mask of the last events occurred
6914 *
6915 * This function attempt to exatract as much data as possible on QMAN error.
6916 * On upper CP print the SW config stream data and last 8 PQEs.
6917 * On lower CP print SW config data and last PQEs of ALL 4 upper CPs
6918 */
6919static void handle_qman_data_on_err(struct hl_device *hdev, u32 qid_base,
6920				   u32 stream, u64 qman_base, u64 event_mask)
6921{
6922	u32 i;
6923
6924	if (stream != QMAN_STREAMS) {
6925		gaudi_handle_last_pqes_on_err(hdev, qid_base, stream,
6926			qman_base, event_mask, true);
6927		return;
6928	}
6929
6930	/* handle Lower-CP */
6931	gaudi_handle_sw_config_stream_data(hdev, stream, qman_base, event_mask);
6932
6933	for (i = 0; i < QMAN_STREAMS; i++)
6934		gaudi_handle_last_pqes_on_err(hdev, qid_base, i,
6935			qman_base, event_mask, false);
6936}
6937
6938static void gaudi_handle_qman_err_generic(struct hl_device *hdev,
6939					  const char *qm_name,
6940					  u64 qman_base,
6941					  u32 qid_base,
6942					  u64 *event_mask)
6943{
6944	u32 i, j, glbl_sts_val, arb_err_val, glbl_sts_clr_val;
6945	u64 glbl_sts_addr, arb_err_addr;
6946	char reg_desc[32];
6947
6948	glbl_sts_addr = qman_base + (mmTPC0_QM_GLBL_STS1_0 - mmTPC0_QM_BASE);
6949	arb_err_addr = qman_base + (mmTPC0_QM_ARB_ERR_CAUSE - mmTPC0_QM_BASE);
6950
6951	/* Iterate through all stream GLBL_STS1 registers + Lower CP */
6952	for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
6953		glbl_sts_clr_val = 0;
6954		glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
6955
6956		if (!glbl_sts_val)
6957			continue;
6958
6959		if (i == QMAN_STREAMS)
6960			snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP");
6961		else
6962			snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
6963
6964		for (j = 0 ; j < GAUDI_NUM_OF_QM_ERR_CAUSE ; j++) {
6965			if (glbl_sts_val & BIT(j)) {
6966				dev_err_ratelimited(hdev->dev,
6967						"%s %s. err cause: %s\n",
6968						qm_name, reg_desc,
6969						gaudi_qman_error_cause[j]);
6970				glbl_sts_clr_val |= BIT(j);
6971			}
6972		}
6973		/* check for undefined opcode */
6974		if (glbl_sts_val & TPC0_QM_GLBL_STS1_CP_UNDEF_CMD_ERR_MASK &&
6975				hdev->captured_err_info.undef_opcode.write_enable) {
6976			memset(&hdev->captured_err_info.undef_opcode, 0,
6977						sizeof(hdev->captured_err_info.undef_opcode));
6978
6979			hdev->captured_err_info.undef_opcode.write_enable = false;
6980			*event_mask |= HL_NOTIFIER_EVENT_UNDEFINED_OPCODE;
6981		}
6982
6983		/* Write 1 clear errors */
6984		if (!hdev->stop_on_err)
6985			WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val);
6986		else
6987			handle_qman_data_on_err(hdev, qid_base, i, qman_base, *event_mask);
6988	}
6989
6990	arb_err_val = RREG32(arb_err_addr);
6991
6992	if (!arb_err_val)
6993		return;
6994
6995	for (j = 0 ; j < GAUDI_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
6996		if (arb_err_val & BIT(j)) {
6997			dev_err_ratelimited(hdev->dev,
6998					"%s ARB_ERR. err cause: %s\n",
6999					qm_name,
7000					gaudi_qman_arb_error_cause[j]);
7001		}
7002	}
7003}
7004
7005static void gaudi_print_sm_sei_info(struct hl_device *hdev, u16 event_type,
7006		struct hl_eq_sm_sei_data *sei_data)
7007{
7008	u32 index = event_type - GAUDI_EVENT_DMA_IF_SEI_0;
7009
7010	/* Flip the bits as the enum is ordered in the opposite way */
7011	index = (index ^ 0x3) & 0x3;
7012
7013	switch (sei_data->sei_cause) {
7014	case SM_SEI_SO_OVERFLOW:
7015		dev_err_ratelimited(hdev->dev,
7016			"%s SEI Error: SOB Group %u overflow/underflow",
7017			gaudi_sync_manager_names[index],
7018			le32_to_cpu(sei_data->sei_log));
7019		break;
7020	case SM_SEI_LBW_4B_UNALIGNED:
7021		dev_err_ratelimited(hdev->dev,
7022			"%s SEI Error: Unaligned 4B LBW access, monitor agent address low - %#x",
7023			gaudi_sync_manager_names[index],
7024			le32_to_cpu(sei_data->sei_log));
7025		break;
7026	case SM_SEI_AXI_RESPONSE_ERR:
7027		dev_err_ratelimited(hdev->dev,
7028			"%s SEI Error: AXI ID %u response error",
7029			gaudi_sync_manager_names[index],
7030			le32_to_cpu(sei_data->sei_log));
7031		break;
7032	default:
7033		dev_err_ratelimited(hdev->dev, "Unknown SM SEI cause %u",
7034				le32_to_cpu(sei_data->sei_log));
7035		break;
7036	}
7037}
7038
7039static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
7040		struct hl_eq_ecc_data *ecc_data)
7041{
7042	struct ecc_info_extract_params params;
7043	u64 ecc_address = 0, ecc_syndrom = 0;
7044	u8 index, memory_wrapper_idx = 0;
7045	bool extract_info_from_fw;
7046	int rc;
7047
7048	if (hdev->asic_prop.fw_security_enabled) {
7049		extract_info_from_fw = true;
7050		goto extract_ecc_info;
7051	}
7052
7053	switch (event_type) {
7054	case GAUDI_EVENT_PCIE_CORE_SERR ... GAUDI_EVENT_PCIE_PHY_DERR:
7055	case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_MMU_DERR:
7056		extract_info_from_fw = true;
7057		break;
7058	case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
7059		index = event_type - GAUDI_EVENT_TPC0_SERR;
7060		params.block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7061		params.num_memories = 90;
7062		params.derr = false;
7063		extract_info_from_fw = false;
7064		break;
7065	case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7066		index = event_type - GAUDI_EVENT_TPC0_DERR;
7067		params.block_address =
7068			mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7069		params.num_memories = 90;
7070		params.derr = true;
7071		extract_info_from_fw = false;
7072		break;
7073	case GAUDI_EVENT_MME0_ACC_SERR:
7074	case GAUDI_EVENT_MME1_ACC_SERR:
7075	case GAUDI_EVENT_MME2_ACC_SERR:
7076	case GAUDI_EVENT_MME3_ACC_SERR:
7077		index = (event_type - GAUDI_EVENT_MME0_ACC_SERR) / 4;
7078		params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7079		params.num_memories = 128;
7080		params.derr = false;
7081		extract_info_from_fw = false;
7082		break;
7083	case GAUDI_EVENT_MME0_ACC_DERR:
7084	case GAUDI_EVENT_MME1_ACC_DERR:
7085	case GAUDI_EVENT_MME2_ACC_DERR:
7086	case GAUDI_EVENT_MME3_ACC_DERR:
7087		index = (event_type - GAUDI_EVENT_MME0_ACC_DERR) / 4;
7088		params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7089		params.num_memories = 128;
7090		params.derr = true;
7091		extract_info_from_fw = false;
7092		break;
7093	case GAUDI_EVENT_MME0_SBAB_SERR:
7094	case GAUDI_EVENT_MME1_SBAB_SERR:
7095	case GAUDI_EVENT_MME2_SBAB_SERR:
7096	case GAUDI_EVENT_MME3_SBAB_SERR:
7097		index = (event_type - GAUDI_EVENT_MME0_SBAB_SERR) / 4;
7098		params.block_address =
7099			mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7100		params.num_memories = 33;
7101		params.derr = false;
7102		extract_info_from_fw = false;
7103		break;
7104	case GAUDI_EVENT_MME0_SBAB_DERR:
7105	case GAUDI_EVENT_MME1_SBAB_DERR:
7106	case GAUDI_EVENT_MME2_SBAB_DERR:
7107	case GAUDI_EVENT_MME3_SBAB_DERR:
7108		index = (event_type - GAUDI_EVENT_MME0_SBAB_DERR) / 4;
7109		params.block_address =
7110			mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7111		params.num_memories = 33;
7112		params.derr = true;
7113		extract_info_from_fw = false;
7114		break;
7115	default:
7116		return;
7117	}
7118
7119extract_ecc_info:
7120	if (extract_info_from_fw) {
7121		ecc_address = le64_to_cpu(ecc_data->ecc_address);
7122		ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);
7123		memory_wrapper_idx = ecc_data->memory_wrapper_idx;
7124	} else {
7125		rc = gaudi_extract_ecc_info(hdev, &params, &ecc_address,
7126				&ecc_syndrom, &memory_wrapper_idx);
7127		if (rc)
7128			return;
7129	}
7130
7131	dev_err(hdev->dev,
7132		"ECC error detected. address: %#llx. Syndrom: %#llx. block id %u\n",
7133		ecc_address, ecc_syndrom, memory_wrapper_idx);
7134}
7135
7136static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type, u64 *event_mask)
7137{
7138	u64 qman_base;
7139	char desc[32];
7140	u32 qid_base;
7141	u8 index;
7142
7143	switch (event_type) {
7144	case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
7145		index = event_type - GAUDI_EVENT_TPC0_QM;
7146		qid_base = GAUDI_QUEUE_ID_TPC_0_0 + index * QMAN_STREAMS;
7147		qman_base = mmTPC0_QM_BASE + index * TPC_QMAN_OFFSET;
7148		snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC_QM", index);
7149		break;
7150	case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
7151		if (event_type == GAUDI_EVENT_MME0_QM) {
7152			index = 0;
7153			qid_base = GAUDI_QUEUE_ID_MME_0_0;
7154		} else { /* event_type == GAUDI_EVENT_MME2_QM */
7155			index = 2;
7156			qid_base = GAUDI_QUEUE_ID_MME_1_0;
7157		}
7158		qman_base = mmMME0_QM_BASE + index * MME_QMAN_OFFSET;
7159		snprintf(desc, ARRAY_SIZE(desc), "%s%d", "MME_QM", index);
7160		break;
7161	case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
7162		index = event_type - GAUDI_EVENT_DMA0_QM;
7163		qid_base = GAUDI_QUEUE_ID_DMA_0_0 + index * QMAN_STREAMS;
7164		/* skip GAUDI_QUEUE_ID_CPU_PQ if necessary */
7165		if (index > 1)
7166			qid_base++;
7167		qman_base = mmDMA0_QM_BASE + index * DMA_QMAN_OFFSET;
7168		snprintf(desc, ARRAY_SIZE(desc), "%s%d", "DMA_QM", index);
7169		break;
7170	case GAUDI_EVENT_NIC0_QM0:
7171		qid_base = GAUDI_QUEUE_ID_NIC_0_0;
7172		qman_base = mmNIC0_QM0_BASE;
7173		snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM0");
7174		break;
7175	case GAUDI_EVENT_NIC0_QM1:
7176		qid_base = GAUDI_QUEUE_ID_NIC_1_0;
7177		qman_base = mmNIC0_QM1_BASE;
7178		snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM1");
7179		break;
7180	case GAUDI_EVENT_NIC1_QM0:
7181		qid_base = GAUDI_QUEUE_ID_NIC_2_0;
7182		qman_base = mmNIC1_QM0_BASE;
7183		snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM0");
7184		break;
7185	case GAUDI_EVENT_NIC1_QM1:
7186		qid_base = GAUDI_QUEUE_ID_NIC_3_0;
7187		qman_base = mmNIC1_QM1_BASE;
7188		snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM1");
7189		break;
7190	case GAUDI_EVENT_NIC2_QM0:
7191		qid_base = GAUDI_QUEUE_ID_NIC_4_0;
7192		qman_base = mmNIC2_QM0_BASE;
7193		snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM0");
7194		break;
7195	case GAUDI_EVENT_NIC2_QM1:
7196		qid_base = GAUDI_QUEUE_ID_NIC_5_0;
7197		qman_base = mmNIC2_QM1_BASE;
7198		snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM1");
7199		break;
7200	case GAUDI_EVENT_NIC3_QM0:
7201		qid_base = GAUDI_QUEUE_ID_NIC_6_0;
7202		qman_base = mmNIC3_QM0_BASE;
7203		snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM0");
7204		break;
7205	case GAUDI_EVENT_NIC3_QM1:
7206		qid_base = GAUDI_QUEUE_ID_NIC_7_0;
7207		qman_base = mmNIC3_QM1_BASE;
7208		snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM1");
7209		break;
7210	case GAUDI_EVENT_NIC4_QM0:
7211		qid_base = GAUDI_QUEUE_ID_NIC_8_0;
7212		qman_base = mmNIC4_QM0_BASE;
7213		snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM0");
7214		break;
7215	case GAUDI_EVENT_NIC4_QM1:
7216		qid_base = GAUDI_QUEUE_ID_NIC_9_0;
7217		qman_base = mmNIC4_QM1_BASE;
7218		snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM1");
7219		break;
7220	default:
7221		return;
7222	}
7223
7224	gaudi_handle_qman_err_generic(hdev, desc, qman_base, qid_base, event_mask);
7225}
7226
7227static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,
7228					bool check_razwi, u64 *event_mask)
7229{
7230	bool is_read = false, is_write = false;
7231	u16 engine_id[2], num_of_razwi_eng = 0;
7232	char desc[64] = "";
7233	u64 razwi_addr = 0;
7234	u8 razwi_flags = 0;
7235
7236	/*
7237	 * Init engine id by default as not valid and only if razwi initiated from engine with
7238	 * engine id it will get valid value.
7239	 */
7240	engine_id[0] = HL_RAZWI_NA_ENG_ID;
7241	engine_id[1] = HL_RAZWI_NA_ENG_ID;
7242
7243	gaudi_get_event_desc(event_type, desc, sizeof(desc));
7244	dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
7245		event_type, desc);
7246
7247	if (check_razwi) {
7248		gaudi_print_and_get_razwi_info(hdev, &engine_id[0], &engine_id[1], &is_read,
7249						&is_write);
7250		gaudi_print_and_get_mmu_error_info(hdev, &razwi_addr, event_mask);
7251
7252		if (is_read)
7253			razwi_flags |= HL_RAZWI_READ;
7254		if (is_write)
7255			razwi_flags |= HL_RAZWI_WRITE;
7256
7257		if (engine_id[0] != HL_RAZWI_NA_ENG_ID) {
7258			if (engine_id[1] != HL_RAZWI_NA_ENG_ID)
7259				num_of_razwi_eng = 2;
7260			else
7261				num_of_razwi_eng = 1;
7262		}
7263
7264		if (razwi_flags)
7265			hl_handle_razwi(hdev, razwi_addr, engine_id, num_of_razwi_eng,
7266					razwi_flags, event_mask);
7267	}
7268}
7269
7270static void gaudi_print_out_of_sync_info(struct hl_device *hdev,
7271					struct cpucp_pkt_sync_err *sync_err)
7272{
7273	struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
7274
7275	dev_err(hdev->dev, "Out of sync with FW, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%d\n",
7276		le32_to_cpu(sync_err->pi), le32_to_cpu(sync_err->ci), q->pi, atomic_read(&q->ci));
7277}
7278
7279static void gaudi_print_fw_alive_info(struct hl_device *hdev,
7280					struct hl_eq_fw_alive *fw_alive)
7281{
7282	dev_err(hdev->dev,
7283		"FW alive report: severity=%s, process_id=%u, thread_id=%u, uptime=%llu seconds\n",
7284		(fw_alive->severity == FW_ALIVE_SEVERITY_MINOR) ? "Minor" : "Critical",
7285		le32_to_cpu(fw_alive->process_id),
7286		le32_to_cpu(fw_alive->thread_id),
7287		le64_to_cpu(fw_alive->uptime_seconds));
7288}
7289
7290static void gaudi_print_nic_axi_irq_info(struct hl_device *hdev, u16 event_type,
7291						void *data)
7292{
7293	char desc[64] = "", *type;
7294	struct eq_nic_sei_event *eq_nic_sei = data;
7295	u16 nic_id = event_type - GAUDI_EVENT_NIC_SEI_0;
7296
7297	switch (eq_nic_sei->axi_error_cause) {
7298	case RXB:
7299		type = "RXB";
7300		break;
7301	case RXE:
7302		type = "RXE";
7303		break;
7304	case TXS:
7305		type = "TXS";
7306		break;
7307	case TXE:
7308		type = "TXE";
7309		break;
7310	case QPC_RESP:
7311		type = "QPC_RESP";
7312		break;
7313	case NON_AXI_ERR:
7314		type = "NON_AXI_ERR";
7315		break;
7316	case TMR:
7317		type = "TMR";
7318		break;
7319	default:
7320		dev_err(hdev->dev, "unknown NIC AXI cause %d\n",
7321			eq_nic_sei->axi_error_cause);
7322		type = "N/A";
7323		break;
7324	}
7325
7326	snprintf(desc, sizeof(desc), "NIC%d_%s%d", nic_id, type,
7327			eq_nic_sei->id);
7328	dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
7329		event_type, desc);
7330}
7331
7332static int gaudi_compute_reset_late_init(struct hl_device *hdev)
7333{
7334	/* GAUDI doesn't support any reset except hard-reset */
7335	return -EPERM;
7336}
7337
7338static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device,
7339			struct hl_eq_hbm_ecc_data *hbm_ecc_data)
7340{
7341	u32 base, val, val2, wr_par, rd_par, ca_par, derr, serr, type, ch;
7342	int rc = 0;
7343
7344	if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
7345					CPU_BOOT_DEV_STS0_HBM_ECC_EN) {
7346		if (!hbm_ecc_data) {
7347			dev_err(hdev->dev, "No FW ECC data");
7348			return 0;
7349		}
7350
7351		wr_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_WR_PAR_MASK,
7352				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7353		rd_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_RD_PAR_MASK,
7354				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7355		ca_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_CA_PAR_MASK,
7356				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7357		derr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_DERR_MASK,
7358				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7359		serr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_SERR_MASK,
7360				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7361		type = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_TYPE_MASK,
7362				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7363		ch = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_HBM_CH_MASK,
7364				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7365
7366		dev_err(hdev->dev,
7367			"HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7368			device, ch, wr_par, rd_par, ca_par, serr, derr);
7369		dev_err(hdev->dev,
7370			"HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%u, SEC_CNT=%d, DEC_CNT=%d\n",
7371			device, ch, hbm_ecc_data->first_addr, type,
7372			hbm_ecc_data->sec_cont_cnt, hbm_ecc_data->sec_cnt,
7373			hbm_ecc_data->dec_cnt);
7374		return 0;
7375	}
7376
7377	if (hdev->asic_prop.fw_security_enabled) {
7378		dev_info(hdev->dev, "Cannot access MC regs for ECC data while security is enabled\n");
7379		return 0;
7380	}
7381
7382	base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET;
7383	for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) {
7384		val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF);
7385		val = (val & 0xFF) | ((val >> 8) & 0xFF);
7386		if (val) {
7387			rc = -EIO;
7388			dev_err(hdev->dev,
7389				"HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7390				device, ch * 2, val & 0x1, (val >> 1) & 0x1,
7391				(val >> 2) & 0x1, (val >> 3) & 0x1,
7392				(val >> 4) & 0x1);
7393
7394			val2 = RREG32(base + ch * 0x1000 + 0x060);
7395			dev_err(hdev->dev,
7396				"HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7397				device, ch * 2,
7398				RREG32(base + ch * 0x1000 + 0x064),
7399				(val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7400				(val2 & 0xFF0000) >> 16,
7401				(val2 & 0xFF000000) >> 24);
7402		}
7403
7404		val = RREG32_MASK(base + ch * 0x1000 + 0x07C, 0x0000FFFF);
7405		val = (val & 0xFF) | ((val >> 8) & 0xFF);
7406		if (val) {
7407			rc = -EIO;
7408			dev_err(hdev->dev,
7409				"HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7410				device, ch * 2 + 1, val & 0x1, (val >> 1) & 0x1,
7411				(val >> 2) & 0x1, (val >> 3) & 0x1,
7412				(val >> 4) & 0x1);
7413
7414			val2 = RREG32(base + ch * 0x1000 + 0x070);
7415			dev_err(hdev->dev,
7416				"HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7417				device, ch * 2 + 1,
7418				RREG32(base + ch * 0x1000 + 0x074),
7419				(val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7420				(val2 & 0xFF0000) >> 16,
7421				(val2 & 0xFF000000) >> 24);
7422		}
7423
7424		/* Clear interrupts */
7425		RMWREG32(base + (ch * 0x1000) + 0x060, 0x1C8, 0x1FF);
7426		RMWREG32(base + (ch * 0x1000) + 0x070, 0x1C8, 0x1FF);
7427		WREG32(base + (ch * 0x1000) + 0x06C, 0x1F1F);
7428		WREG32(base + (ch * 0x1000) + 0x07C, 0x1F1F);
7429		RMWREG32(base + (ch * 0x1000) + 0x060, 0x0, 0xF);
7430		RMWREG32(base + (ch * 0x1000) + 0x070, 0x0, 0xF);
7431	}
7432
7433	val  = RREG32(base + 0x8F30);
7434	val2 = RREG32(base + 0x8F34);
7435	if (val | val2) {
7436		rc = -EIO;
7437		dev_err(hdev->dev,
7438			"HBM %d MC SRAM SERR info: Reg 0x8F30=0x%x, Reg 0x8F34=0x%x\n",
7439			device, val, val2);
7440	}
7441	val  = RREG32(base + 0x8F40);
7442	val2 = RREG32(base + 0x8F44);
7443	if (val | val2) {
7444		rc = -EIO;
7445		dev_err(hdev->dev,
7446			"HBM %d MC SRAM DERR info: Reg 0x8F40=0x%x, Reg 0x8F44=0x%x\n",
7447			device, val, val2);
7448	}
7449
7450	return rc;
7451}
7452
7453static int gaudi_hbm_event_to_dev(u16 hbm_event_type)
7454{
7455	switch (hbm_event_type) {
7456	case GAUDI_EVENT_HBM0_SPI_0:
7457	case GAUDI_EVENT_HBM0_SPI_1:
7458		return 0;
7459	case GAUDI_EVENT_HBM1_SPI_0:
7460	case GAUDI_EVENT_HBM1_SPI_1:
7461		return 1;
7462	case GAUDI_EVENT_HBM2_SPI_0:
7463	case GAUDI_EVENT_HBM2_SPI_1:
7464		return 2;
7465	case GAUDI_EVENT_HBM3_SPI_0:
7466	case GAUDI_EVENT_HBM3_SPI_1:
7467		return 3;
7468	default:
7469		break;
7470	}
7471
7472	/* Should never happen */
7473	return 0;
7474}
7475
7476static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id,
7477					char *interrupt_name)
7478{
7479	u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i;
7480	bool soft_reset_required = false;
7481
7482	tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) &
7483				TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK;
7484
7485	for (i = 0 ; i < GAUDI_NUM_OF_TPC_INTR_CAUSE ; i++)
7486		if (tpc_interrupts_cause & BIT(i)) {
7487			dev_err_ratelimited(hdev->dev,
7488					"TPC%d_%s interrupt cause: %s\n",
7489					tpc_id, interrupt_name,
7490					gaudi_tpc_interrupts_cause[i]);
7491			/* If this is QM error, we need to soft-reset */
7492			if (i == 15)
7493				soft_reset_required = true;
7494		}
7495
7496	/* Clear interrupts */
7497	WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0);
7498
7499	return soft_reset_required;
7500}
7501
7502static int tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)
7503{
7504	return (tpc_dec_event_type - GAUDI_EVENT_TPC0_DEC) >> 1;
7505}
7506
7507static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)
7508{
7509	return (tpc_dec_event_type - GAUDI_EVENT_TPC0_KRN_ERR) / 6;
7510}
7511
7512static void gaudi_print_clk_change_info(struct hl_device *hdev, u16 event_type, u64 *event_mask)
7513{
7514	ktime_t zero_time = ktime_set(0, 0);
7515
7516	mutex_lock(&hdev->clk_throttling.lock);
7517
7518	switch (event_type) {
7519	case GAUDI_EVENT_FIX_POWER_ENV_S:
7520		hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_POWER;
7521		hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_POWER;
7522		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].start = ktime_get();
7523		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = zero_time;
7524		dev_info_ratelimited(hdev->dev,
7525			"Clock throttling due to power consumption\n");
7526		break;
7527
7528	case GAUDI_EVENT_FIX_POWER_ENV_E:
7529		hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_POWER;
7530		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = ktime_get();
7531		dev_info_ratelimited(hdev->dev,
7532			"Power envelop is safe, back to optimal clock\n");
7533		break;
7534
7535	case GAUDI_EVENT_FIX_THERMAL_ENV_S:
7536		hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_THERMAL;
7537		hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_THERMAL;
7538		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].start = ktime_get();
7539		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = zero_time;
7540		*event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7541		dev_info_ratelimited(hdev->dev,
7542			"Clock throttling due to overheating\n");
7543		break;
7544
7545	case GAUDI_EVENT_FIX_THERMAL_ENV_E:
7546		hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_THERMAL;
7547		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = ktime_get();
7548		*event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7549		dev_info_ratelimited(hdev->dev,
7550			"Thermal envelop is safe, back to optimal clock\n");
7551		break;
7552
7553	default:
7554		dev_err(hdev->dev, "Received invalid clock change event %d\n",
7555			event_type);
7556		break;
7557	}
7558
7559	mutex_unlock(&hdev->clk_throttling.lock);
7560}
7561
7562static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
7563{
7564	struct gaudi_device *gaudi = hdev->asic_specific;
7565	struct hl_info_fw_err_info fw_err_info;
7566	u64 data = le64_to_cpu(eq_entry->data[0]), event_mask = 0;
7567	u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
7568	u32 fw_fatal_err_flag = 0, flags = 0;
7569	u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
7570			>> EQ_CTL_EVENT_TYPE_SHIFT);
7571	bool reset_required, reset_direct = false;
7572	u8 cause;
7573	int rc;
7574
7575	if (event_type >= GAUDI_EVENT_SIZE) {
7576		dev_err(hdev->dev, "Event type %u exceeds maximum of %u",
7577				event_type, GAUDI_EVENT_SIZE - 1);
7578		return;
7579	}
7580
7581	gaudi->events_stat[event_type]++;
7582	gaudi->events_stat_aggregate[event_type]++;
7583
7584	switch (event_type) {
7585	case GAUDI_EVENT_PCIE_CORE_DERR:
7586	case GAUDI_EVENT_PCIE_IF_DERR:
7587	case GAUDI_EVENT_PCIE_PHY_DERR:
7588	case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7589	case GAUDI_EVENT_MME0_ACC_DERR:
7590	case GAUDI_EVENT_MME0_SBAB_DERR:
7591	case GAUDI_EVENT_MME1_ACC_DERR:
7592	case GAUDI_EVENT_MME1_SBAB_DERR:
7593	case GAUDI_EVENT_MME2_ACC_DERR:
7594	case GAUDI_EVENT_MME2_SBAB_DERR:
7595	case GAUDI_EVENT_MME3_ACC_DERR:
7596	case GAUDI_EVENT_MME3_SBAB_DERR:
7597	case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC:
7598		fallthrough;
7599	case GAUDI_EVENT_CPU_IF_ECC_DERR:
7600	case GAUDI_EVENT_PSOC_MEM_DERR:
7601	case GAUDI_EVENT_PSOC_CORESIGHT_DERR:
7602	case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR:
7603	case GAUDI_EVENT_NIC0_DERR ... GAUDI_EVENT_NIC4_DERR:
7604	case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR:
7605	case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR:
7606	case GAUDI_EVENT_MMU_DERR:
7607	case GAUDI_EVENT_NIC0_CS_DBG_DERR ... GAUDI_EVENT_NIC4_CS_DBG_DERR:
7608		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7609		gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
7610		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7611		fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
7612		goto reset_device;
7613
7614	case GAUDI_EVENT_GIC500:
7615	case GAUDI_EVENT_AXI_ECC:
7616	case GAUDI_EVENT_L2_RAM_ECC:
7617	case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17:
7618		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7619		fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
7620		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7621		goto reset_device;
7622
7623	case GAUDI_EVENT_HBM0_SPI_0:
7624	case GAUDI_EVENT_HBM1_SPI_0:
7625	case GAUDI_EVENT_HBM2_SPI_0:
7626	case GAUDI_EVENT_HBM3_SPI_0:
7627		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7628		gaudi_hbm_read_interrupts(hdev,
7629				gaudi_hbm_event_to_dev(event_type),
7630				&eq_entry->hbm_ecc_data);
7631		fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
7632		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7633		goto reset_device;
7634
7635	case GAUDI_EVENT_HBM0_SPI_1:
7636	case GAUDI_EVENT_HBM1_SPI_1:
7637	case GAUDI_EVENT_HBM2_SPI_1:
7638	case GAUDI_EVENT_HBM3_SPI_1:
7639		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7640		gaudi_hbm_read_interrupts(hdev,
7641				gaudi_hbm_event_to_dev(event_type),
7642				&eq_entry->hbm_ecc_data);
7643		hl_fw_unmask_irq(hdev, event_type);
7644		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7645		break;
7646
7647	case GAUDI_EVENT_TPC0_DEC:
7648	case GAUDI_EVENT_TPC1_DEC:
7649	case GAUDI_EVENT_TPC2_DEC:
7650	case GAUDI_EVENT_TPC3_DEC:
7651	case GAUDI_EVENT_TPC4_DEC:
7652	case GAUDI_EVENT_TPC5_DEC:
7653	case GAUDI_EVENT_TPC6_DEC:
7654	case GAUDI_EVENT_TPC7_DEC:
7655		/* In TPC DEC event, notify on TPC assertion. While there isn't
7656		 * a specific event for assertion yet, the FW generates TPC DEC event.
7657		 * The SW upper layer will inspect an internal mapped area to indicate
7658		 * if the event is a TPC Assertion or a "real" TPC DEC.
7659		 */
7660		event_mask |= HL_NOTIFIER_EVENT_TPC_ASSERT;
7661		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7662		reset_required = gaudi_tpc_read_interrupts(hdev,
7663					tpc_dec_event_to_tpc_id(event_type),
7664					"AXI_SLV_DEC_Error");
7665		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7666		if (reset_required) {
7667			dev_err(hdev->dev, "reset required due to %s\n",
7668				gaudi_irq_map_table[event_type].name);
7669
7670			reset_direct = true;
7671			goto reset_device;
7672		} else {
7673			hl_fw_unmask_irq(hdev, event_type);
7674			event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
7675		}
7676		break;
7677
7678	case GAUDI_EVENT_TPC0_KRN_ERR:
7679	case GAUDI_EVENT_TPC1_KRN_ERR:
7680	case GAUDI_EVENT_TPC2_KRN_ERR:
7681	case GAUDI_EVENT_TPC3_KRN_ERR:
7682	case GAUDI_EVENT_TPC4_KRN_ERR:
7683	case GAUDI_EVENT_TPC5_KRN_ERR:
7684	case GAUDI_EVENT_TPC6_KRN_ERR:
7685	case GAUDI_EVENT_TPC7_KRN_ERR:
7686		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7687		reset_required = gaudi_tpc_read_interrupts(hdev,
7688					tpc_krn_event_to_tpc_id(event_type),
7689					"KRN_ERR");
7690		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7691		if (reset_required) {
7692			dev_err(hdev->dev, "reset required due to %s\n",
7693				gaudi_irq_map_table[event_type].name);
7694
7695			reset_direct = true;
7696			goto reset_device;
7697		} else {
7698			hl_fw_unmask_irq(hdev, event_type);
7699			event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
7700		}
7701		break;
7702
7703	case GAUDI_EVENT_PCIE_CORE_SERR:
7704	case GAUDI_EVENT_PCIE_IF_SERR:
7705	case GAUDI_EVENT_PCIE_PHY_SERR:
7706	case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
7707	case GAUDI_EVENT_MME0_ACC_SERR:
7708	case GAUDI_EVENT_MME0_SBAB_SERR:
7709	case GAUDI_EVENT_MME1_ACC_SERR:
7710	case GAUDI_EVENT_MME1_SBAB_SERR:
7711	case GAUDI_EVENT_MME2_ACC_SERR:
7712	case GAUDI_EVENT_MME2_SBAB_SERR:
7713	case GAUDI_EVENT_MME3_ACC_SERR:
7714	case GAUDI_EVENT_MME3_SBAB_SERR:
7715	case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC:
7716	case GAUDI_EVENT_CPU_IF_ECC_SERR:
7717	case GAUDI_EVENT_PSOC_MEM_SERR:
7718	case GAUDI_EVENT_PSOC_CORESIGHT_SERR:
7719	case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR:
7720	case GAUDI_EVENT_NIC0_SERR ... GAUDI_EVENT_NIC4_SERR:
7721	case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR:
7722	case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR:
7723		fallthrough;
7724	case GAUDI_EVENT_MMU_SERR:
7725		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7726		gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
7727		hl_fw_unmask_irq(hdev, event_type);
7728		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7729		break;
7730
7731	case GAUDI_EVENT_PCIE_DEC:
7732	case GAUDI_EVENT_CPU_AXI_SPLITTER:
7733	case GAUDI_EVENT_PSOC_AXI_DEC:
7734	case GAUDI_EVENT_PSOC_PRSTN_FALL:
7735		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7736		hl_fw_unmask_irq(hdev, event_type);
7737		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7738		break;
7739
7740	case GAUDI_EVENT_MMU_PAGE_FAULT:
7741	case GAUDI_EVENT_MMU_WR_PERM:
7742		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7743		hl_fw_unmask_irq(hdev, event_type);
7744		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7745		break;
7746
7747	case GAUDI_EVENT_MME0_WBC_RSP:
7748	case GAUDI_EVENT_MME0_SBAB0_RSP:
7749	case GAUDI_EVENT_MME1_WBC_RSP:
7750	case GAUDI_EVENT_MME1_SBAB0_RSP:
7751	case GAUDI_EVENT_MME2_WBC_RSP:
7752	case GAUDI_EVENT_MME2_SBAB0_RSP:
7753	case GAUDI_EVENT_MME3_WBC_RSP:
7754	case GAUDI_EVENT_MME3_SBAB0_RSP:
7755	case GAUDI_EVENT_RAZWI_OR_ADC:
7756	case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
7757	case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
7758		fallthrough;
7759	case GAUDI_EVENT_NIC0_QM0:
7760	case GAUDI_EVENT_NIC0_QM1:
7761	case GAUDI_EVENT_NIC1_QM0:
7762	case GAUDI_EVENT_NIC1_QM1:
7763	case GAUDI_EVENT_NIC2_QM0:
7764	case GAUDI_EVENT_NIC2_QM1:
7765	case GAUDI_EVENT_NIC3_QM0:
7766	case GAUDI_EVENT_NIC3_QM1:
7767	case GAUDI_EVENT_NIC4_QM0:
7768	case GAUDI_EVENT_NIC4_QM1:
7769	case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE:
7770	case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
7771		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7772		gaudi_handle_qman_err(hdev, event_type, &event_mask);
7773		hl_fw_unmask_irq(hdev, event_type);
7774		event_mask |= (HL_NOTIFIER_EVENT_USER_ENGINE_ERR | HL_NOTIFIER_EVENT_DEVICE_RESET);
7775		break;
7776
7777	case GAUDI_EVENT_RAZWI_OR_ADC_SW:
7778		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7779		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7780		goto reset_device;
7781
7782	case GAUDI_EVENT_TPC0_BMON_SPMU:
7783	case GAUDI_EVENT_TPC1_BMON_SPMU:
7784	case GAUDI_EVENT_TPC2_BMON_SPMU:
7785	case GAUDI_EVENT_TPC3_BMON_SPMU:
7786	case GAUDI_EVENT_TPC4_BMON_SPMU:
7787	case GAUDI_EVENT_TPC5_BMON_SPMU:
7788	case GAUDI_EVENT_TPC6_BMON_SPMU:
7789	case GAUDI_EVENT_TPC7_BMON_SPMU:
7790	case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7:
7791		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7792		hl_fw_unmask_irq(hdev, event_type);
7793		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7794		break;
7795
7796	case GAUDI_EVENT_NIC_SEI_0 ... GAUDI_EVENT_NIC_SEI_4:
7797		gaudi_print_nic_axi_irq_info(hdev, event_type, &data);
7798		hl_fw_unmask_irq(hdev, event_type);
7799		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7800		break;
7801
7802	case GAUDI_EVENT_DMA_IF_SEI_0 ... GAUDI_EVENT_DMA_IF_SEI_3:
7803		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7804		gaudi_print_sm_sei_info(hdev, event_type,
7805					&eq_entry->sm_sei_data);
7806		rc = hl_state_dump(hdev);
7807		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7808		if (rc)
7809			dev_err(hdev->dev,
7810				"Error during system state dump %d\n", rc);
7811		hl_fw_unmask_irq(hdev, event_type);
7812		break;
7813
7814	case GAUDI_EVENT_STATUS_NIC0_ENG0 ... GAUDI_EVENT_STATUS_NIC4_ENG1:
7815		break;
7816
7817	case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E:
7818		gaudi_print_clk_change_info(hdev, event_type, &event_mask);
7819		hl_fw_unmask_irq(hdev, event_type);
7820		break;
7821
7822	case GAUDI_EVENT_PSOC_GPIO_U16_0:
7823		cause = le64_to_cpu(eq_entry->data[0]) & 0xFF;
7824		dev_err(hdev->dev,
7825			"Received high temp H/W interrupt %d (cause %d)\n",
7826			event_type, cause);
7827		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7828		break;
7829
7830	case GAUDI_EVENT_DEV_RESET_REQ:
7831		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7832		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7833		goto reset_device;
7834
7835	case GAUDI_EVENT_PKT_QUEUE_OUT_SYNC:
7836		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7837		gaudi_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err);
7838		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7839		goto reset_device;
7840
7841	case GAUDI_EVENT_FW_ALIVE_S:
7842		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7843		gaudi_print_fw_alive_info(hdev, &eq_entry->fw_alive);
7844		fw_err_info.err_type = HL_INFO_FW_REPORTED_ERR;
7845		fw_err_info.event_id = event_type;
7846		fw_err_info.event_mask = &event_mask;
7847		hl_handle_fw_err(hdev, &fw_err_info);
7848		goto reset_device;
7849
7850	default:
7851		dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
7852				event_type);
7853		break;
7854	}
7855
7856	if (event_mask)
7857		hl_notifier_event_send_all(hdev, event_mask);
7858
7859	return;
7860
7861reset_device:
7862	reset_required = true;
7863
7864	if (hdev->asic_prop.fw_security_enabled && !reset_direct) {
7865		flags = HL_DRV_RESET_HARD | HL_DRV_RESET_BYPASS_REQ_TO_FW | fw_fatal_err_flag;
7866
7867		/* notify on device unavailable while the reset triggered by fw */
7868		event_mask |= (HL_NOTIFIER_EVENT_DEVICE_RESET |
7869					HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE);
7870	} else if (hdev->hard_reset_on_fw_events) {
7871		flags = HL_DRV_RESET_HARD | HL_DRV_RESET_DELAY | fw_fatal_err_flag;
7872		event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
7873	} else {
7874		reset_required = false;
7875	}
7876
7877	if (reset_required) {
7878		/* escalate general hw errors to critical/fatal error */
7879		if (event_mask & HL_NOTIFIER_EVENT_GENERAL_HW_ERR)
7880			hl_handle_critical_hw_err(hdev, event_type, &event_mask);
7881
7882		hl_device_cond_reset(hdev, flags, event_mask);
7883	} else {
7884		hl_fw_unmask_irq(hdev, event_type);
7885		/* Notification on occurred event needs to be sent although reset is not executed */
7886		if (event_mask)
7887			hl_notifier_event_send_all(hdev, event_mask);
7888	}
7889}
7890
7891static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size)
7892{
7893	struct gaudi_device *gaudi = hdev->asic_specific;
7894
7895	if (aggregate) {
7896		*size = (u32) sizeof(gaudi->events_stat_aggregate);
7897		return gaudi->events_stat_aggregate;
7898	}
7899
7900	*size = (u32) sizeof(gaudi->events_stat);
7901	return gaudi->events_stat;
7902}
7903
7904static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, u32 flags)
7905{
7906	struct gaudi_device *gaudi = hdev->asic_specific;
7907	u32 status, timeout_usec;
7908	int rc;
7909
7910	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
7911		hdev->reset_info.hard_reset_pending)
7912		return 0;
7913
7914	if (hdev->pldm)
7915		timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
7916	else
7917		timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
7918
7919	/* L0 & L1 invalidation */
7920	WREG32(mmSTLB_INV_PS, 3);
7921	WREG32(mmSTLB_CACHE_INV, gaudi->mmu_cache_inv_pi++);
7922	WREG32(mmSTLB_INV_PS, 2);
7923
7924	rc = hl_poll_timeout(
7925		hdev,
7926		mmSTLB_INV_PS,
7927		status,
7928		!status,
7929		1000,
7930		timeout_usec);
7931
7932	WREG32(mmSTLB_INV_SET, 0);
7933
7934	return rc;
7935}
7936
7937static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,
7938						bool is_hard, u32 flags,
7939						u32 asid, u64 va, u64 size)
7940{
7941	/* Treat as invalidate all because there is no range invalidation
7942	 * in Gaudi
7943	 */
7944	return hdev->asic_funcs->mmu_invalidate_cache(hdev, is_hard, flags);
7945}
7946
7947static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid, u64 phys_addr)
7948{
7949	u32 status, timeout_usec;
7950	int rc;
7951
7952	if (hdev->pldm)
7953		timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
7954	else
7955		timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
7956
7957	WREG32(MMU_ASID, asid);
7958	WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
7959	WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
7960	WREG32(MMU_BUSY, 0x80000000);
7961
7962	rc = hl_poll_timeout(
7963		hdev,
7964		MMU_BUSY,
7965		status,
7966		!(status & 0x80000000),
7967		1000,
7968		timeout_usec);
7969
7970	if (rc) {
7971		dev_err(hdev->dev,
7972			"Timeout during MMU hop0 config of asid %d\n", asid);
7973		return rc;
7974	}
7975
7976	return 0;
7977}
7978
7979static int gaudi_send_heartbeat(struct hl_device *hdev)
7980{
7981	struct gaudi_device *gaudi = hdev->asic_specific;
7982
7983	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
7984		return 0;
7985
7986	return hl_fw_send_heartbeat(hdev);
7987}
7988
7989static int gaudi_cpucp_info_get(struct hl_device *hdev)
7990{
7991	struct gaudi_device *gaudi = hdev->asic_specific;
7992	struct asic_fixed_properties *prop = &hdev->asic_prop;
7993	int rc;
7994
7995	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
7996		return 0;
7997
7998	rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0,
7999					mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
8000					mmCPU_BOOT_ERR1);
8001	if (rc)
8002		return rc;
8003
8004	if (!strlen(prop->cpucp_info.card_name))
8005		strscpy_pad(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
8006				CARD_NAME_MAX_LEN);
8007
8008	hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type);
8009
8010	set_default_power_values(hdev);
8011
8012	return 0;
8013}
8014
8015static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
8016		struct engines_data *e)
8017{
8018	struct gaudi_device *gaudi = hdev->asic_specific;
8019	const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n";
8020	const char *mme_slave_fmt = "%-5d%-9s%-14s%-12s%#x\n";
8021	const char *nic_fmt = "%-5d%-9s%#-14x%#x\n";
8022	unsigned long *mask = (unsigned long *)mask_arr;
8023	u32 qm_glbl_sts0, qm_cgm_sts, dma_core_sts0, tpc_cfg_sts, mme_arch_sts;
8024	bool is_idle = true, is_eng_idle, is_slave;
8025	u64 offset;
8026	int i, dma_id, port;
8027
8028	if (e)
8029		hl_engine_data_sprintf(e,
8030			"\nDMA  is_idle  QM_GLBL_STS0  QM_CGM_STS  DMA_CORE_STS0\n"
8031			"---  -------  ------------  ----------  -------------\n");
8032
8033	for (i = 0 ; i < DMA_NUMBER_OF_CHNLS ; i++) {
8034		dma_id = gaudi_dma_assignment[i];
8035		offset = dma_id * DMA_QMAN_OFFSET;
8036
8037		qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + offset);
8038		qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + offset);
8039		dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + offset);
8040		is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8041				IS_DMA_IDLE(dma_core_sts0);
8042		is_idle &= is_eng_idle;
8043
8044		if (mask && !is_eng_idle)
8045			set_bit(GAUDI_ENGINE_ID_DMA_0 + dma_id, mask);
8046		if (e)
8047			hl_engine_data_sprintf(e, fmt, dma_id,
8048				is_eng_idle ? "Y" : "N", qm_glbl_sts0,
8049				qm_cgm_sts, dma_core_sts0);
8050	}
8051
8052	if (e)
8053		hl_engine_data_sprintf(e,
8054			"\nTPC  is_idle  QM_GLBL_STS0  QM_CGM_STS  CFG_STATUS\n"
8055			"---  -------  ------------  ----------  ----------\n");
8056
8057	for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
8058		offset = i * TPC_QMAN_OFFSET;
8059		qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + offset);
8060		qm_cgm_sts = RREG32(mmTPC0_QM_CGM_STS + offset);
8061		tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + offset);
8062		is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8063				IS_TPC_IDLE(tpc_cfg_sts);
8064		is_idle &= is_eng_idle;
8065
8066		if (mask && !is_eng_idle)
8067			set_bit(GAUDI_ENGINE_ID_TPC_0 + i, mask);
8068		if (e)
8069			hl_engine_data_sprintf(e, fmt, i,
8070				is_eng_idle ? "Y" : "N",
8071				qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
8072	}
8073
8074	if (e)
8075		hl_engine_data_sprintf(e,
8076			"\nMME  is_idle  QM_GLBL_STS0  QM_CGM_STS  ARCH_STATUS\n"
8077			"---  -------  ------------  ----------  -----------\n");
8078
8079	for (i = 0 ; i < MME_NUMBER_OF_ENGINES ; i++) {
8080		offset = i * MME_QMAN_OFFSET;
8081		mme_arch_sts = RREG32(mmMME0_CTRL_ARCH_STATUS + offset);
8082		is_eng_idle = IS_MME_IDLE(mme_arch_sts);
8083
8084		/* MME 1 & 3 are slaves, no need to check their QMANs */
8085		is_slave = i % 2;
8086		if (!is_slave) {
8087			qm_glbl_sts0 = RREG32(mmMME0_QM_GLBL_STS0 + offset);
8088			qm_cgm_sts = RREG32(mmMME0_QM_CGM_STS + offset);
8089			is_eng_idle &= IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8090		}
8091
8092		is_idle &= is_eng_idle;
8093
8094		if (mask && !is_eng_idle)
8095			set_bit(GAUDI_ENGINE_ID_MME_0 + i, mask);
8096		if (e) {
8097			if (!is_slave)
8098				hl_engine_data_sprintf(e, fmt, i,
8099					is_eng_idle ? "Y" : "N",
8100					qm_glbl_sts0, qm_cgm_sts, mme_arch_sts);
8101			else
8102				hl_engine_data_sprintf(e, mme_slave_fmt, i,
8103					is_eng_idle ? "Y" : "N", "-",
8104					"-", mme_arch_sts);
8105		}
8106	}
8107
8108	if (e)
8109		hl_engine_data_sprintf(e,
8110				"\nNIC  is_idle  QM_GLBL_STS0  QM_CGM_STS\n"
8111				"---  -------  ------------  ----------\n");
8112
8113	for (i = 0 ; i < (NIC_NUMBER_OF_ENGINES / 2) ; i++) {
8114		offset = i * NIC_MACRO_QMAN_OFFSET;
8115		port = 2 * i;
8116		if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8117			qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset);
8118			qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset);
8119			is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8120			is_idle &= is_eng_idle;
8121
8122			if (mask && !is_eng_idle)
8123				set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8124			if (e)
8125				hl_engine_data_sprintf(e, nic_fmt, port,
8126						is_eng_idle ? "Y" : "N",
8127						qm_glbl_sts0, qm_cgm_sts);
8128		}
8129
8130		port = 2 * i + 1;
8131		if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8132			qm_glbl_sts0 = RREG32(mmNIC0_QM1_GLBL_STS0 + offset);
8133			qm_cgm_sts = RREG32(mmNIC0_QM1_CGM_STS + offset);
8134			is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8135			is_idle &= is_eng_idle;
8136
8137			if (mask && !is_eng_idle)
8138				set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8139			if (e)
8140				hl_engine_data_sprintf(e, nic_fmt, port,
8141						is_eng_idle ? "Y" : "N",
8142						qm_glbl_sts0, qm_cgm_sts);
8143		}
8144	}
8145
8146	if (e)
8147		hl_engine_data_sprintf(e, "\n");
8148
8149	return is_idle;
8150}
8151
8152static void gaudi_hw_queues_lock(struct hl_device *hdev)
8153	__acquires(&gaudi->hw_queues_lock)
8154{
8155	struct gaudi_device *gaudi = hdev->asic_specific;
8156
8157	spin_lock(&gaudi->hw_queues_lock);
8158}
8159
8160static void gaudi_hw_queues_unlock(struct hl_device *hdev)
8161	__releases(&gaudi->hw_queues_lock)
8162{
8163	struct gaudi_device *gaudi = hdev->asic_specific;
8164
8165	spin_unlock(&gaudi->hw_queues_lock);
8166}
8167
8168static u32 gaudi_get_pci_id(struct hl_device *hdev)
8169{
8170	return hdev->pdev->device;
8171}
8172
8173static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data,
8174				size_t max_size)
8175{
8176	struct gaudi_device *gaudi = hdev->asic_specific;
8177
8178	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8179		return 0;
8180
8181	return hl_fw_get_eeprom_data(hdev, data, max_size);
8182}
8183
8184static int gaudi_get_monitor_dump(struct hl_device *hdev, void *data)
8185{
8186	struct gaudi_device *gaudi = hdev->asic_specific;
8187
8188	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8189		return 0;
8190
8191	return hl_fw_get_monitor_dump(hdev, data);
8192}
8193
8194/*
8195 * this function should be used only during initialization and/or after reset,
8196 * when there are no active users.
8197 */
8198static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,	u32 tpc_id)
8199{
8200	u64 kernel_timeout;
8201	u32 status, offset;
8202	int rc;
8203
8204	offset = tpc_id * (mmTPC1_CFG_STATUS - mmTPC0_CFG_STATUS);
8205
8206	if (hdev->pldm)
8207		kernel_timeout = GAUDI_PLDM_TPC_KERNEL_WAIT_USEC;
8208	else
8209		kernel_timeout = HL_DEVICE_TIMEOUT_USEC;
8210
8211	WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset,
8212			lower_32_bits(tpc_kernel));
8213	WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset,
8214			upper_32_bits(tpc_kernel));
8215
8216	WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_LOW + offset,
8217			lower_32_bits(tpc_kernel));
8218	WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH + offset,
8219			upper_32_bits(tpc_kernel));
8220	/* set a valid LUT pointer, content is of no significance */
8221	WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_LO + offset,
8222			lower_32_bits(tpc_kernel));
8223	WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_HI + offset,
8224			upper_32_bits(tpc_kernel));
8225
8226	WREG32(mmTPC0_CFG_QM_SYNC_OBJECT_ADDR + offset,
8227			lower_32_bits(CFG_BASE +
8228				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0));
8229
8230	WREG32(mmTPC0_CFG_TPC_CMD + offset,
8231			(1 << TPC0_CFG_TPC_CMD_ICACHE_INVALIDATE_SHIFT |
8232			1 << TPC0_CFG_TPC_CMD_ICACHE_PREFETCH_64KB_SHIFT));
8233	/* wait a bit for the engine to start executing */
8234	usleep_range(1000, 1500);
8235
8236	/* wait until engine has finished executing */
8237	rc = hl_poll_timeout(
8238		hdev,
8239		mmTPC0_CFG_STATUS + offset,
8240		status,
8241		(status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8242				TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8243		1000,
8244		kernel_timeout);
8245
8246	if (rc) {
8247		dev_err(hdev->dev,
8248			"Timeout while waiting for TPC%d icache prefetch\n",
8249			tpc_id);
8250		return -EIO;
8251	}
8252
8253	WREG32(mmTPC0_CFG_TPC_EXECUTE + offset,
8254			1 << TPC0_CFG_TPC_EXECUTE_V_SHIFT);
8255
8256	/* wait a bit for the engine to start executing */
8257	usleep_range(1000, 1500);
8258
8259	/* wait until engine has finished executing */
8260	rc = hl_poll_timeout(
8261		hdev,
8262		mmTPC0_CFG_STATUS + offset,
8263		status,
8264		(status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8265				TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8266		1000,
8267		kernel_timeout);
8268
8269	if (rc) {
8270		dev_err(hdev->dev,
8271			"Timeout while waiting for TPC%d vector pipe\n",
8272			tpc_id);
8273		return -EIO;
8274	}
8275
8276	rc = hl_poll_timeout(
8277		hdev,
8278		mmTPC0_CFG_WQ_INFLIGHT_CNTR + offset,
8279		status,
8280		(status == 0),
8281		1000,
8282		kernel_timeout);
8283
8284	if (rc) {
8285		dev_err(hdev->dev,
8286			"Timeout while waiting for TPC%d kernel to execute\n",
8287			tpc_id);
8288		return -EIO;
8289	}
8290
8291	return 0;
8292}
8293
8294static int gaudi_internal_cb_pool_init(struct hl_device *hdev,
8295		struct hl_ctx *ctx)
8296{
8297	struct gaudi_device *gaudi = hdev->asic_specific;
8298	int min_alloc_order, rc, collective_cb_size;
8299
8300	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8301		return 0;
8302
8303	hdev->internal_cb_pool_virt_addr = hl_asic_dma_alloc_coherent(hdev,
8304							HOST_SPACE_INTERNAL_CB_SZ,
8305							&hdev->internal_cb_pool_dma_addr,
8306							GFP_KERNEL | __GFP_ZERO);
8307
8308	if (!hdev->internal_cb_pool_virt_addr)
8309		return -ENOMEM;
8310
8311	collective_cb_size = sizeof(struct packet_msg_short) * 5 +
8312			sizeof(struct packet_fence);
8313	min_alloc_order = ilog2(collective_cb_size);
8314
8315	hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1);
8316	if (!hdev->internal_cb_pool) {
8317		dev_err(hdev->dev,
8318			"Failed to create internal CB pool\n");
8319		rc = -ENOMEM;
8320		goto free_internal_cb_pool;
8321	}
8322
8323	rc = gen_pool_add(hdev->internal_cb_pool,
8324				(uintptr_t) hdev->internal_cb_pool_virt_addr,
8325				HOST_SPACE_INTERNAL_CB_SZ, -1);
8326	if (rc) {
8327		dev_err(hdev->dev,
8328			"Failed to add memory to internal CB pool\n");
8329		rc = -EFAULT;
8330		goto destroy_internal_cb_pool;
8331	}
8332
8333	hdev->internal_cb_va_base = hl_reserve_va_block(hdev, ctx,
8334			HL_VA_RANGE_TYPE_HOST, HOST_SPACE_INTERNAL_CB_SZ,
8335			HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
8336
8337	if (!hdev->internal_cb_va_base) {
8338		rc = -ENOMEM;
8339		goto destroy_internal_cb_pool;
8340	}
8341
8342	mutex_lock(&hdev->mmu_lock);
8343
8344	rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base,
8345			hdev->internal_cb_pool_dma_addr,
8346			HOST_SPACE_INTERNAL_CB_SZ);
8347	if (rc)
8348		goto unreserve_internal_cb_pool;
8349
8350	rc = hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR);
8351	if (rc)
8352		goto unmap_internal_cb_pool;
8353
8354	mutex_unlock(&hdev->mmu_lock);
8355
8356	return 0;
8357
8358unmap_internal_cb_pool:
8359	hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base,
8360			HOST_SPACE_INTERNAL_CB_SZ);
8361unreserve_internal_cb_pool:
8362	mutex_unlock(&hdev->mmu_lock);
8363	hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8364			HOST_SPACE_INTERNAL_CB_SZ);
8365destroy_internal_cb_pool:
8366	gen_pool_destroy(hdev->internal_cb_pool);
8367free_internal_cb_pool:
8368	hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr,
8369					hdev->internal_cb_pool_dma_addr);
8370
8371	return rc;
8372}
8373
8374static void gaudi_internal_cb_pool_fini(struct hl_device *hdev,
8375		struct hl_ctx *ctx)
8376{
8377	struct gaudi_device *gaudi = hdev->asic_specific;
8378
8379	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8380		return;
8381
8382	mutex_lock(&hdev->mmu_lock);
8383	hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base,
8384			HOST_SPACE_INTERNAL_CB_SZ);
8385	hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8386			HOST_SPACE_INTERNAL_CB_SZ);
8387	hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR);
8388	mutex_unlock(&hdev->mmu_lock);
8389
8390	gen_pool_destroy(hdev->internal_cb_pool);
8391
8392	hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr,
8393					hdev->internal_cb_pool_dma_addr);
8394}
8395
8396static int gaudi_ctx_init(struct hl_ctx *ctx)
8397{
8398	int rc;
8399
8400	if (ctx->asid == HL_KERNEL_ASID_ID)
8401		return 0;
8402
8403	rc = gaudi_internal_cb_pool_init(ctx->hdev, ctx);
8404	if (rc)
8405		return rc;
8406
8407	rc = gaudi_restore_user_registers(ctx->hdev);
8408	if (rc)
8409		gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
8410
8411	return rc;
8412}
8413
8414static void gaudi_ctx_fini(struct hl_ctx *ctx)
8415{
8416	if (ctx->asid == HL_KERNEL_ASID_ID)
8417		return;
8418
8419	gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
8420}
8421
8422static int gaudi_pre_schedule_cs(struct hl_cs *cs)
8423{
8424	return 0;
8425}
8426
8427static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
8428{
8429	return gaudi_cq_assignment[cq_idx];
8430}
8431
8432static u32 gaudi_get_signal_cb_size(struct hl_device *hdev)
8433{
8434	return sizeof(struct packet_msg_short) +
8435			sizeof(struct packet_msg_prot) * 2;
8436}
8437
8438static u32 gaudi_get_wait_cb_size(struct hl_device *hdev)
8439{
8440	return sizeof(struct packet_msg_short) * 4 +
8441			sizeof(struct packet_fence) +
8442			sizeof(struct packet_msg_prot) * 2;
8443}
8444
8445static u32 gaudi_get_sob_addr(struct hl_device *hdev, u32 sob_id)
8446{
8447	return mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + (sob_id * 4);
8448}
8449
8450static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
8451				u32 size, bool eb)
8452{
8453	struct hl_cb *cb = (struct hl_cb *) data;
8454	struct packet_msg_short *pkt;
8455	u32 value, ctl, pkt_size = sizeof(*pkt);
8456
8457	pkt = cb->kernel_address + size;
8458	memset(pkt, 0, pkt_size);
8459
8460	/* Inc by 1, Mode ADD */
8461	value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);
8462	value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_MOD_MASK, 1);
8463
8464	ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);
8465	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8466	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3); /* W_S SOB base */
8467	ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8468	ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, eb);
8469	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8470	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8471
8472	pkt->value = cpu_to_le32(value);
8473	pkt->ctl = cpu_to_le32(ctl);
8474
8475	return size + pkt_size;
8476}
8477
8478static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value,
8479					u16 addr)
8480{
8481	u32 ctl, pkt_size = sizeof(*pkt);
8482
8483	memset(pkt, 0, pkt_size);
8484
8485	ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr);
8486	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2);  /* W_S MON base */
8487	ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8488	ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8489	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8490	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 0); /* last pkt MB */
8491
8492	pkt->value = cpu_to_le32(value);
8493	pkt->ctl = cpu_to_le32(ctl);
8494
8495	return pkt_size;
8496}
8497
8498static u32 gaudi_add_arm_monitor_pkt(struct hl_device *hdev,
8499		struct packet_msg_short *pkt, u16 sob_base, u8 sob_mask,
8500		u16 sob_val, u16 mon_id)
8501{
8502	u64 monitor_base;
8503	u32 ctl, value, pkt_size = sizeof(*pkt);
8504	u16 msg_addr_offset;
8505	u8 mask;
8506
8507	if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) {
8508		dev_err(hdev->dev,
8509			"sob_base %u (mask %#x) is not valid\n",
8510			sob_base, sob_mask);
8511		return 0;
8512	}
8513
8514	/*
8515	 * monitor_base should be the content of the base0 address registers,
8516	 * so it will be added to the msg short offsets
8517	 */
8518	monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8519
8520	msg_addr_offset =
8521		(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + mon_id * 4) -
8522				monitor_base;
8523
8524	memset(pkt, 0, pkt_size);
8525
8526	/* Monitor config packet: bind the monitor to a sync object */
8527	value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8);
8528	value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);
8529	value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MODE_MASK,
8530			0); /* GREATER OR EQUAL*/
8531	value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MASK_MASK, mask);
8532
8533	ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, msg_addr_offset);
8534	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8535	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
8536	ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8537	ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8538	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8539	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8540
8541	pkt->value = cpu_to_le32(value);
8542	pkt->ctl = cpu_to_le32(ctl);
8543
8544	return pkt_size;
8545}
8546
8547static u32 gaudi_add_fence_pkt(struct packet_fence *pkt)
8548{
8549	u32 ctl, cfg, pkt_size = sizeof(*pkt);
8550
8551	memset(pkt, 0, pkt_size);
8552
8553	cfg = FIELD_PREP(GAUDI_PKT_FENCE_CFG_DEC_VAL_MASK, 1);
8554	cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);
8555	cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_ID_MASK, 2);
8556
8557	ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_FENCE);
8558	ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8559	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8560	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8561
8562	pkt->cfg = cpu_to_le32(cfg);
8563	pkt->ctl = cpu_to_le32(ctl);
8564
8565	return pkt_size;
8566}
8567
8568static int gaudi_get_fence_addr(struct hl_device *hdev, u32 queue_id, u64 *addr)
8569{
8570	u32 offset, nic_index;
8571
8572	switch (queue_id) {
8573	case GAUDI_QUEUE_ID_DMA_0_0:
8574		offset = mmDMA0_QM_CP_FENCE2_RDATA_0;
8575		break;
8576	case GAUDI_QUEUE_ID_DMA_0_1:
8577		offset = mmDMA0_QM_CP_FENCE2_RDATA_1;
8578		break;
8579	case GAUDI_QUEUE_ID_DMA_0_2:
8580		offset = mmDMA0_QM_CP_FENCE2_RDATA_2;
8581		break;
8582	case GAUDI_QUEUE_ID_DMA_0_3:
8583		offset = mmDMA0_QM_CP_FENCE2_RDATA_3;
8584		break;
8585	case GAUDI_QUEUE_ID_DMA_1_0:
8586		offset = mmDMA1_QM_CP_FENCE2_RDATA_0;
8587		break;
8588	case GAUDI_QUEUE_ID_DMA_1_1:
8589		offset = mmDMA1_QM_CP_FENCE2_RDATA_1;
8590		break;
8591	case GAUDI_QUEUE_ID_DMA_1_2:
8592		offset = mmDMA1_QM_CP_FENCE2_RDATA_2;
8593		break;
8594	case GAUDI_QUEUE_ID_DMA_1_3:
8595		offset = mmDMA1_QM_CP_FENCE2_RDATA_3;
8596		break;
8597	case GAUDI_QUEUE_ID_DMA_5_0:
8598		offset = mmDMA5_QM_CP_FENCE2_RDATA_0;
8599		break;
8600	case GAUDI_QUEUE_ID_DMA_5_1:
8601		offset = mmDMA5_QM_CP_FENCE2_RDATA_1;
8602		break;
8603	case GAUDI_QUEUE_ID_DMA_5_2:
8604		offset = mmDMA5_QM_CP_FENCE2_RDATA_2;
8605		break;
8606	case GAUDI_QUEUE_ID_DMA_5_3:
8607		offset = mmDMA5_QM_CP_FENCE2_RDATA_3;
8608		break;
8609	case GAUDI_QUEUE_ID_TPC_7_0:
8610		offset = mmTPC7_QM_CP_FENCE2_RDATA_0;
8611		break;
8612	case GAUDI_QUEUE_ID_TPC_7_1:
8613		offset = mmTPC7_QM_CP_FENCE2_RDATA_1;
8614		break;
8615	case GAUDI_QUEUE_ID_TPC_7_2:
8616		offset = mmTPC7_QM_CP_FENCE2_RDATA_2;
8617		break;
8618	case GAUDI_QUEUE_ID_TPC_7_3:
8619		offset = mmTPC7_QM_CP_FENCE2_RDATA_3;
8620		break;
8621	case GAUDI_QUEUE_ID_NIC_0_0:
8622	case GAUDI_QUEUE_ID_NIC_1_0:
8623	case GAUDI_QUEUE_ID_NIC_2_0:
8624	case GAUDI_QUEUE_ID_NIC_3_0:
8625	case GAUDI_QUEUE_ID_NIC_4_0:
8626	case GAUDI_QUEUE_ID_NIC_5_0:
8627	case GAUDI_QUEUE_ID_NIC_6_0:
8628	case GAUDI_QUEUE_ID_NIC_7_0:
8629	case GAUDI_QUEUE_ID_NIC_8_0:
8630	case GAUDI_QUEUE_ID_NIC_9_0:
8631		nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2;
8632		offset = mmNIC0_QM0_CP_FENCE2_RDATA_0 +
8633				(nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8634				(nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8635		break;
8636	case GAUDI_QUEUE_ID_NIC_0_1:
8637	case GAUDI_QUEUE_ID_NIC_1_1:
8638	case GAUDI_QUEUE_ID_NIC_2_1:
8639	case GAUDI_QUEUE_ID_NIC_3_1:
8640	case GAUDI_QUEUE_ID_NIC_4_1:
8641	case GAUDI_QUEUE_ID_NIC_5_1:
8642	case GAUDI_QUEUE_ID_NIC_6_1:
8643	case GAUDI_QUEUE_ID_NIC_7_1:
8644	case GAUDI_QUEUE_ID_NIC_8_1:
8645	case GAUDI_QUEUE_ID_NIC_9_1:
8646		nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_1) >> 2;
8647		offset = mmNIC0_QM0_CP_FENCE2_RDATA_1 +
8648				(nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8649				(nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8650		break;
8651	case GAUDI_QUEUE_ID_NIC_0_2:
8652	case GAUDI_QUEUE_ID_NIC_1_2:
8653	case GAUDI_QUEUE_ID_NIC_2_2:
8654	case GAUDI_QUEUE_ID_NIC_3_2:
8655	case GAUDI_QUEUE_ID_NIC_4_2:
8656	case GAUDI_QUEUE_ID_NIC_5_2:
8657	case GAUDI_QUEUE_ID_NIC_6_2:
8658	case GAUDI_QUEUE_ID_NIC_7_2:
8659	case GAUDI_QUEUE_ID_NIC_8_2:
8660	case GAUDI_QUEUE_ID_NIC_9_2:
8661		nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_2) >> 2;
8662		offset = mmNIC0_QM0_CP_FENCE2_RDATA_2 +
8663				(nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8664				(nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8665		break;
8666	case GAUDI_QUEUE_ID_NIC_0_3:
8667	case GAUDI_QUEUE_ID_NIC_1_3:
8668	case GAUDI_QUEUE_ID_NIC_2_3:
8669	case GAUDI_QUEUE_ID_NIC_3_3:
8670	case GAUDI_QUEUE_ID_NIC_4_3:
8671	case GAUDI_QUEUE_ID_NIC_5_3:
8672	case GAUDI_QUEUE_ID_NIC_6_3:
8673	case GAUDI_QUEUE_ID_NIC_7_3:
8674	case GAUDI_QUEUE_ID_NIC_8_3:
8675	case GAUDI_QUEUE_ID_NIC_9_3:
8676		nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_3) >> 2;
8677		offset = mmNIC0_QM0_CP_FENCE2_RDATA_3 +
8678				(nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8679				(nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8680		break;
8681	default:
8682		return -EINVAL;
8683	}
8684
8685	*addr = CFG_BASE + offset;
8686
8687	return 0;
8688}
8689
8690static u32 gaudi_add_mon_pkts(void *buf, u16 mon_id, u64 fence_addr)
8691{
8692	u64 monitor_base;
8693	u32 size = 0;
8694	u16 msg_addr_offset;
8695
8696	/*
8697	 * monitor_base should be the content of the base0 address registers,
8698	 * so it will be added to the msg short offsets
8699	 */
8700	monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8701
8702	/* First monitor config packet: low address of the sync */
8703	msg_addr_offset =
8704		(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_id * 4) -
8705				monitor_base;
8706
8707	size += gaudi_add_mon_msg_short(buf + size, (u32) fence_addr,
8708					msg_addr_offset);
8709
8710	/* Second monitor config packet: high address of the sync */
8711	msg_addr_offset =
8712		(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_id * 4) -
8713				monitor_base;
8714
8715	size += gaudi_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32),
8716					msg_addr_offset);
8717
8718	/*
8719	 * Third monitor config packet: the payload, i.e. what to write when the
8720	 * sync triggers
8721	 */
8722	msg_addr_offset =
8723		(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_id * 4) -
8724				monitor_base;
8725
8726	size += gaudi_add_mon_msg_short(buf + size, 1, msg_addr_offset);
8727
8728	return size;
8729}
8730
8731static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
8732				struct hl_gen_wait_properties *prop)
8733{
8734	struct hl_cb *cb = (struct hl_cb *) prop->data;
8735	void *buf = cb->kernel_address;
8736	u64 fence_addr = 0;
8737	u32 size = prop->size;
8738
8739	if (gaudi_get_fence_addr(hdev, prop->q_idx, &fence_addr)) {
8740		dev_crit(hdev->dev, "wrong queue id %d for wait packet\n",
8741				prop->q_idx);
8742		return 0;
8743	}
8744
8745	size += gaudi_add_mon_pkts(buf + size, prop->mon_id, fence_addr);
8746	size += gaudi_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base,
8747			prop->sob_mask, prop->sob_val, prop->mon_id);
8748	size += gaudi_add_fence_pkt(buf + size);
8749
8750	return size;
8751}
8752
8753static void gaudi_reset_sob(struct hl_device *hdev, void *data)
8754{
8755	struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data;
8756
8757	dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx,
8758		hw_sob->sob_id);
8759
8760	WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
8761			hw_sob->sob_id * 4, 0);
8762
8763	kref_init(&hw_sob->kref);
8764}
8765
8766static u64 gaudi_get_device_time(struct hl_device *hdev)
8767{
8768	u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
8769
8770	return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
8771}
8772
8773static int gaudi_get_hw_block_id(struct hl_device *hdev, u64 block_addr,
8774				u32 *block_size, u32 *block_id)
8775{
8776	return -EPERM;
8777}
8778
8779static int gaudi_block_mmap(struct hl_device *hdev,
8780				struct vm_area_struct *vma,
8781				u32 block_id, u32 block_size)
8782{
8783	return -EPERM;
8784}
8785
8786static void gaudi_enable_events_from_fw(struct hl_device *hdev)
8787{
8788	struct cpu_dyn_regs *dyn_regs =
8789			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
8790	u32 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
8791			mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
8792			le32_to_cpu(dyn_regs->gic_host_ints_irq);
8793
8794	WREG32(irq_handler_offset,
8795		gaudi_irq_map_table[GAUDI_EVENT_INTS_REGISTER].cpu_id);
8796}
8797
8798static int gaudi_ack_mmu_page_fault_or_access_error(struct hl_device *hdev, u64 mmu_cap_mask)
8799{
8800	return -EINVAL;
8801}
8802
8803static int gaudi_map_pll_idx_to_fw_idx(u32 pll_idx)
8804{
8805	switch (pll_idx) {
8806	case HL_GAUDI_CPU_PLL: return CPU_PLL;
8807	case HL_GAUDI_PCI_PLL: return PCI_PLL;
8808	case HL_GAUDI_NIC_PLL: return NIC_PLL;
8809	case HL_GAUDI_DMA_PLL: return DMA_PLL;
8810	case HL_GAUDI_MESH_PLL: return MESH_PLL;
8811	case HL_GAUDI_MME_PLL: return MME_PLL;
8812	case HL_GAUDI_TPC_PLL: return TPC_PLL;
8813	case HL_GAUDI_IF_PLL: return IF_PLL;
8814	case HL_GAUDI_SRAM_PLL: return SRAM_PLL;
8815	case HL_GAUDI_HBM_PLL: return HBM_PLL;
8816	default: return -EINVAL;
8817	}
8818}
8819
8820static int gaudi_add_sync_to_engine_map_entry(
8821	struct hl_sync_to_engine_map *map, u32 reg_value,
8822	enum hl_sync_engine_type engine_type, u32 engine_id)
8823{
8824	struct hl_sync_to_engine_map_entry *entry;
8825
8826	/* Reg value represents a partial address of sync object,
8827	 * it is used as unique identifier. For this we need to
8828	 * clear the cutoff cfg base bits from the value.
8829	 */
8830	if (reg_value == 0 || reg_value == 0xffffffff)
8831		return 0;
8832	reg_value -= lower_32_bits(CFG_BASE);
8833
8834	/* create a new hash entry */
8835	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
8836	if (!entry)
8837		return -ENOMEM;
8838	entry->engine_type = engine_type;
8839	entry->engine_id = engine_id;
8840	entry->sync_id = reg_value;
8841	hash_add(map->tb, &entry->node, reg_value);
8842
8843	return 0;
8844}
8845
8846static int gaudi_gen_sync_to_engine_map(struct hl_device *hdev,
8847				struct hl_sync_to_engine_map *map)
8848{
8849	struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
8850	int i, j, rc;
8851	u32 reg_value;
8852
8853	/* Iterate over TPC engines */
8854	for (i = 0; i < sds->props[SP_NUM_OF_TPC_ENGINES]; ++i) {
8855
8856		reg_value = RREG32(sds->props[SP_TPC0_CFG_SO] +
8857					sds->props[SP_NEXT_TPC] * i);
8858
8859		rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
8860							ENGINE_TPC, i);
8861		if (rc)
8862			goto free_sync_to_engine_map;
8863	}
8864
8865	/* Iterate over MME engines */
8866	for (i = 0; i < sds->props[SP_NUM_OF_MME_ENGINES]; ++i) {
8867		for (j = 0; j < sds->props[SP_SUB_MME_ENG_NUM]; ++j) {
8868
8869			reg_value = RREG32(sds->props[SP_MME_CFG_SO] +
8870						sds->props[SP_NEXT_MME] * i +
8871						j * sizeof(u32));
8872
8873			rc = gaudi_add_sync_to_engine_map_entry(
8874				map, reg_value, ENGINE_MME,
8875				i * sds->props[SP_SUB_MME_ENG_NUM] + j);
8876			if (rc)
8877				goto free_sync_to_engine_map;
8878		}
8879	}
8880
8881	/* Iterate over DMA engines */
8882	for (i = 0; i < sds->props[SP_NUM_OF_DMA_ENGINES]; ++i) {
8883		reg_value = RREG32(sds->props[SP_DMA_CFG_SO] +
8884					sds->props[SP_DMA_QUEUES_OFFSET] * i);
8885		rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
8886							ENGINE_DMA, i);
8887		if (rc)
8888			goto free_sync_to_engine_map;
8889	}
8890
8891	return 0;
8892
8893free_sync_to_engine_map:
8894	hl_state_dump_free_sync_to_engine_map(map);
8895
8896	return rc;
8897}
8898
8899static int gaudi_monitor_valid(struct hl_mon_state_dump *mon)
8900{
8901	return FIELD_GET(
8902		SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_VALID_MASK,
8903		mon->status);
8904}
8905
8906static void gaudi_fill_sobs_from_mon(char *sobs, struct hl_mon_state_dump *mon)
8907{
8908	const size_t max_write = 10;
8909	u32 gid, mask, sob;
8910	int i, offset;
8911
8912	/* Sync object ID is calculated as follows:
8913	 * (8 * group_id + cleared bits in mask)
8914	 */
8915	gid = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
8916			mon->arm_data);
8917	mask = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
8918			mon->arm_data);
8919
8920	for (i = 0, offset = 0; mask && offset < MONITOR_SOB_STRING_SIZE -
8921		max_write; mask >>= 1, i++) {
8922		if (!(mask & 1)) {
8923			sob = gid * MONITOR_MAX_SOBS + i;
8924
8925			if (offset > 0)
8926				offset += snprintf(sobs + offset, max_write,
8927							", ");
8928
8929			offset += snprintf(sobs + offset, max_write, "%u", sob);
8930		}
8931	}
8932}
8933
8934static int gaudi_print_single_monitor(char **buf, size_t *size, size_t *offset,
8935				struct hl_device *hdev,
8936				struct hl_mon_state_dump *mon)
8937{
8938	const char *name;
8939	char scratch_buf1[BIN_REG_STRING_SIZE],
8940		scratch_buf2[BIN_REG_STRING_SIZE];
8941	char monitored_sobs[MONITOR_SOB_STRING_SIZE] = {0};
8942
8943	name = hl_state_dump_get_monitor_name(hdev, mon);
8944	if (!name)
8945		name = "";
8946
8947	gaudi_fill_sobs_from_mon(monitored_sobs, mon);
8948
8949	return hl_snprintf_resize(
8950		buf, size, offset,
8951		"Mon id: %u%s, wait for group id: %u mask %s to reach val: %u and write %u to address 0x%llx. Pending: %s. Means sync objects [%s] are being monitored.",
8952		mon->id, name,
8953		FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
8954				mon->arm_data),
8955		hl_format_as_binary(
8956			scratch_buf1, sizeof(scratch_buf1),
8957			FIELD_GET(
8958				SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
8959				mon->arm_data)),
8960		FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SOD_MASK,
8961				mon->arm_data),
8962		mon->wr_data,
8963		(((u64)mon->wr_addr_high) << 32) | mon->wr_addr_low,
8964		hl_format_as_binary(
8965			scratch_buf2, sizeof(scratch_buf2),
8966			FIELD_GET(
8967				SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_PENDING_MASK,
8968				mon->status)),
8969		monitored_sobs);
8970}
8971
8972
8973static int gaudi_print_fences_single_engine(
8974	struct hl_device *hdev, u64 base_offset, u64 status_base_offset,
8975	enum hl_sync_engine_type engine_type, u32 engine_id, char **buf,
8976	size_t *size, size_t *offset)
8977{
8978	struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
8979	int rc = -ENOMEM, i;
8980	u32 *statuses, *fences;
8981
8982	statuses = kcalloc(sds->props[SP_ENGINE_NUM_OF_QUEUES],
8983			sizeof(*statuses), GFP_KERNEL);
8984	if (!statuses)
8985		goto out;
8986
8987	fences = kcalloc(sds->props[SP_ENGINE_NUM_OF_FENCES] *
8988				sds->props[SP_ENGINE_NUM_OF_QUEUES],
8989			 sizeof(*fences), GFP_KERNEL);
8990	if (!fences)
8991		goto free_status;
8992
8993	for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES]; ++i)
8994		statuses[i] = RREG32(status_base_offset + i * sizeof(u32));
8995
8996	for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES] *
8997				sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i)
8998		fences[i] = RREG32(base_offset + i * sizeof(u32));
8999
9000	/* The actual print */
9001	for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i) {
9002		u32 fence_id;
9003		u64 fence_cnt, fence_rdata;
9004		const char *engine_name;
9005
9006		if (!FIELD_GET(TPC0_QM_CP_STS_0_FENCE_IN_PROGRESS_MASK,
9007			statuses[i]))
9008			continue;
9009
9010		fence_id =
9011			FIELD_GET(TPC0_QM_CP_STS_0_FENCE_ID_MASK, statuses[i]);
9012		fence_cnt = base_offset + CFG_BASE +
9013			sizeof(u32) *
9014			(i + fence_id * sds->props[SP_ENGINE_NUM_OF_QUEUES]);
9015		fence_rdata = fence_cnt - sds->props[SP_FENCE0_CNT_OFFSET] +
9016				sds->props[SP_FENCE0_RDATA_OFFSET];
9017		engine_name = hl_sync_engine_to_string(engine_type);
9018
9019		rc = hl_snprintf_resize(
9020			buf, size, offset,
9021			"%s%u, stream %u: fence id %u cnt = 0x%llx (%s%u_QM.CP_FENCE%u_CNT_%u) rdata = 0x%llx (%s%u_QM.CP_FENCE%u_RDATA_%u) value = %u, cp_status = %u\n",
9022			engine_name, engine_id,
9023			i, fence_id,
9024			fence_cnt, engine_name, engine_id, fence_id, i,
9025			fence_rdata, engine_name, engine_id, fence_id, i,
9026			fences[fence_id],
9027			statuses[i]);
9028		if (rc)
9029			goto free_fences;
9030	}
9031
9032	rc = 0;
9033
9034free_fences:
9035	kfree(fences);
9036free_status:
9037	kfree(statuses);
9038out:
9039	return rc;
9040}
9041
9042
9043static struct hl_state_dump_specs_funcs gaudi_state_dump_funcs = {
9044	.monitor_valid = gaudi_monitor_valid,
9045	.print_single_monitor = gaudi_print_single_monitor,
9046	.gen_sync_to_engine_map = gaudi_gen_sync_to_engine_map,
9047	.print_fences_single_engine = gaudi_print_fences_single_engine,
9048};
9049
9050static void gaudi_state_dump_init(struct hl_device *hdev)
9051{
9052	struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9053	int i;
9054
9055	for (i = 0; i < ARRAY_SIZE(gaudi_so_id_to_str); ++i)
9056		hash_add(sds->so_id_to_str_tb,
9057			&gaudi_so_id_to_str[i].node,
9058			gaudi_so_id_to_str[i].id);
9059
9060	for (i = 0; i < ARRAY_SIZE(gaudi_monitor_id_to_str); ++i)
9061		hash_add(sds->monitor_id_to_str_tb,
9062			&gaudi_monitor_id_to_str[i].node,
9063			gaudi_monitor_id_to_str[i].id);
9064
9065	sds->props = gaudi_state_dump_specs_props;
9066
9067	sds->sync_namager_names = gaudi_sync_manager_names;
9068
9069	sds->funcs = gaudi_state_dump_funcs;
9070}
9071
9072static u32 *gaudi_get_stream_master_qid_arr(void)
9073{
9074	return gaudi_stream_master;
9075}
9076
9077static int gaudi_set_dram_properties(struct hl_device *hdev)
9078{
9079	return 0;
9080}
9081
9082static int gaudi_set_binning_masks(struct hl_device *hdev)
9083{
9084	return 0;
9085}
9086
9087static void gaudi_check_if_razwi_happened(struct hl_device *hdev)
9088{
9089}
9090
9091static ssize_t infineon_ver_show(struct device *dev, struct device_attribute *attr, char *buf)
9092{
9093	struct hl_device *hdev = dev_get_drvdata(dev);
9094	struct cpucp_info *cpucp_info;
9095
9096	cpucp_info = &hdev->asic_prop.cpucp_info;
9097
9098	return sprintf(buf, "%#04x\n", le32_to_cpu(cpucp_info->infineon_version));
9099}
9100
9101static DEVICE_ATTR_RO(infineon_ver);
9102
9103static struct attribute *gaudi_vrm_dev_attrs[] = {
9104	&dev_attr_infineon_ver.attr,
9105	NULL,
9106};
9107
9108static void gaudi_add_device_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp,
9109					struct attribute_group *dev_vrm_attr_grp)
9110{
9111	hl_sysfs_add_dev_clk_attr(hdev, dev_clk_attr_grp);
9112	dev_vrm_attr_grp->attrs = gaudi_vrm_dev_attrs;
9113}
9114
9115static int gaudi_send_device_activity(struct hl_device *hdev, bool open)
9116{
9117	return 0;
9118}
9119
9120static const struct hl_asic_funcs gaudi_funcs = {
9121	.early_init = gaudi_early_init,
9122	.early_fini = gaudi_early_fini,
9123	.late_init = gaudi_late_init,
9124	.late_fini = gaudi_late_fini,
9125	.sw_init = gaudi_sw_init,
9126	.sw_fini = gaudi_sw_fini,
9127	.hw_init = gaudi_hw_init,
9128	.hw_fini = gaudi_hw_fini,
9129	.halt_engines = gaudi_halt_engines,
9130	.suspend = gaudi_suspend,
9131	.resume = gaudi_resume,
9132	.mmap = gaudi_mmap,
9133	.ring_doorbell = gaudi_ring_doorbell,
9134	.pqe_write = gaudi_pqe_write,
9135	.asic_dma_alloc_coherent = gaudi_dma_alloc_coherent,
9136	.asic_dma_free_coherent = gaudi_dma_free_coherent,
9137	.scrub_device_mem = gaudi_scrub_device_mem,
9138	.scrub_device_dram = gaudi_scrub_device_dram,
9139	.get_int_queue_base = gaudi_get_int_queue_base,
9140	.test_queues = gaudi_test_queues,
9141	.asic_dma_pool_zalloc = gaudi_dma_pool_zalloc,
9142	.asic_dma_pool_free = gaudi_dma_pool_free,
9143	.cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc,
9144	.cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free,
9145	.dma_unmap_sgtable = hl_asic_dma_unmap_sgtable,
9146	.cs_parser = gaudi_cs_parser,
9147	.dma_map_sgtable = hl_asic_dma_map_sgtable,
9148	.add_end_of_cb_packets = gaudi_add_end_of_cb_packets,
9149	.update_eq_ci = gaudi_update_eq_ci,
9150	.context_switch = gaudi_context_switch,
9151	.restore_phase_topology = gaudi_restore_phase_topology,
9152	.debugfs_read_dma = gaudi_debugfs_read_dma,
9153	.add_device_attr = gaudi_add_device_attr,
9154	.handle_eqe = gaudi_handle_eqe,
9155	.get_events_stat = gaudi_get_events_stat,
9156	.read_pte = gaudi_read_pte,
9157	.write_pte = gaudi_write_pte,
9158	.mmu_invalidate_cache = gaudi_mmu_invalidate_cache,
9159	.mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range,
9160	.mmu_prefetch_cache_range = NULL,
9161	.send_heartbeat = gaudi_send_heartbeat,
9162	.debug_coresight = gaudi_debug_coresight,
9163	.is_device_idle = gaudi_is_device_idle,
9164	.compute_reset_late_init = gaudi_compute_reset_late_init,
9165	.hw_queues_lock = gaudi_hw_queues_lock,
9166	.hw_queues_unlock = gaudi_hw_queues_unlock,
9167	.get_pci_id = gaudi_get_pci_id,
9168	.get_eeprom_data = gaudi_get_eeprom_data,
9169	.get_monitor_dump = gaudi_get_monitor_dump,
9170	.send_cpu_message = gaudi_send_cpu_message,
9171	.pci_bars_map = gaudi_pci_bars_map,
9172	.init_iatu = gaudi_init_iatu,
9173	.rreg = hl_rreg,
9174	.wreg = hl_wreg,
9175	.halt_coresight = gaudi_halt_coresight,
9176	.ctx_init = gaudi_ctx_init,
9177	.ctx_fini = gaudi_ctx_fini,
9178	.pre_schedule_cs = gaudi_pre_schedule_cs,
9179	.get_queue_id_for_cq = gaudi_get_queue_id_for_cq,
9180	.load_firmware_to_device = gaudi_load_firmware_to_device,
9181	.load_boot_fit_to_device = gaudi_load_boot_fit_to_device,
9182	.get_signal_cb_size = gaudi_get_signal_cb_size,
9183	.get_wait_cb_size = gaudi_get_wait_cb_size,
9184	.gen_signal_cb = gaudi_gen_signal_cb,
9185	.gen_wait_cb = gaudi_gen_wait_cb,
9186	.reset_sob = gaudi_reset_sob,
9187	.reset_sob_group = gaudi_reset_sob_group,
9188	.get_device_time = gaudi_get_device_time,
9189	.pb_print_security_errors = NULL,
9190	.collective_wait_init_cs = gaudi_collective_wait_init_cs,
9191	.collective_wait_create_jobs = gaudi_collective_wait_create_jobs,
9192	.get_dec_base_addr = NULL,
9193	.scramble_addr = hl_mmu_scramble_addr,
9194	.descramble_addr = hl_mmu_descramble_addr,
9195	.ack_protection_bits_errors = gaudi_ack_protection_bits_errors,
9196	.get_hw_block_id = gaudi_get_hw_block_id,
9197	.hw_block_mmap = gaudi_block_mmap,
9198	.enable_events_from_fw = gaudi_enable_events_from_fw,
9199	.ack_mmu_errors = gaudi_ack_mmu_page_fault_or_access_error,
9200	.map_pll_idx_to_fw_idx = gaudi_map_pll_idx_to_fw_idx,
9201	.init_firmware_preload_params = gaudi_init_firmware_preload_params,
9202	.init_firmware_loader = gaudi_init_firmware_loader,
9203	.init_cpu_scrambler_dram = gaudi_init_scrambler_hbm,
9204	.state_dump_init = gaudi_state_dump_init,
9205	.get_sob_addr = gaudi_get_sob_addr,
9206	.set_pci_memory_regions = gaudi_set_pci_memory_regions,
9207	.get_stream_master_qid_arr = gaudi_get_stream_master_qid_arr,
9208	.check_if_razwi_happened = gaudi_check_if_razwi_happened,
9209	.mmu_get_real_page_size = hl_mmu_get_real_page_size,
9210	.access_dev_mem = hl_access_dev_mem,
9211	.set_dram_bar_base = gaudi_set_hbm_bar_base,
9212	.send_device_activity = gaudi_send_device_activity,
9213	.set_dram_properties = gaudi_set_dram_properties,
9214	.set_binning_masks = gaudi_set_binning_masks,
9215};
9216
9217/**
9218 * gaudi_set_asic_funcs - set GAUDI function pointers
9219 *
9220 * @hdev: pointer to hl_device structure
9221 *
9222 */
9223void gaudi_set_asic_funcs(struct hl_device *hdev)
9224{
9225	hdev->asic_funcs = &gaudi_funcs;
9226}
9227