1/* SPDX-License-Identifier: GPL-2.0 or MIT */ 2/* Copyright 2023 Collabora ltd. */ 3 4#ifndef __PANTHOR_MCU_H__ 5#define __PANTHOR_MCU_H__ 6 7#include <linux/types.h> 8 9struct panthor_device; 10struct panthor_kernel_bo; 11 12#define MAX_CSGS 31 13#define MAX_CS_PER_CSG 32 14 15struct panthor_fw_ringbuf_input_iface { 16 u64 insert; 17 u64 extract; 18}; 19 20struct panthor_fw_ringbuf_output_iface { 21 u64 extract; 22 u32 active; 23}; 24 25struct panthor_fw_cs_control_iface { 26#define CS_FEATURES_WORK_REGS(x) (((x) & GENMASK(7, 0)) + 1) 27#define CS_FEATURES_SCOREBOARDS(x) (((x) & GENMASK(15, 8)) >> 8) 28#define CS_FEATURES_COMPUTE BIT(16) 29#define CS_FEATURES_FRAGMENT BIT(17) 30#define CS_FEATURES_TILER BIT(18) 31 u32 features; 32 u32 input_va; 33 u32 output_va; 34}; 35 36struct panthor_fw_cs_input_iface { 37#define CS_STATE_MASK GENMASK(2, 0) 38#define CS_STATE_STOP 0 39#define CS_STATE_START 1 40#define CS_EXTRACT_EVENT BIT(4) 41#define CS_IDLE_SYNC_WAIT BIT(8) 42#define CS_IDLE_PROTM_PENDING BIT(9) 43#define CS_IDLE_EMPTY BIT(10) 44#define CS_IDLE_RESOURCE_REQ BIT(11) 45#define CS_TILER_OOM BIT(26) 46#define CS_PROTM_PENDING BIT(27) 47#define CS_FATAL BIT(30) 48#define CS_FAULT BIT(31) 49#define CS_REQ_MASK (CS_STATE_MASK | \ 50 CS_EXTRACT_EVENT | \ 51 CS_IDLE_SYNC_WAIT | \ 52 CS_IDLE_PROTM_PENDING | \ 53 CS_IDLE_EMPTY | \ 54 CS_IDLE_RESOURCE_REQ) 55#define CS_EVT_MASK (CS_TILER_OOM | \ 56 CS_PROTM_PENDING | \ 57 CS_FATAL | \ 58 CS_FAULT) 59 u32 req; 60 61#define CS_CONFIG_PRIORITY(x) ((x) & GENMASK(3, 0)) 62#define CS_CONFIG_DOORBELL(x) (((x) << 8) & GENMASK(15, 8)) 63 u32 config; 64 u32 reserved1; 65 u32 ack_irq_mask; 66 u64 ringbuf_base; 67 u32 ringbuf_size; 68 u32 reserved2; 69 u64 heap_start; 70 u64 heap_end; 71 u64 ringbuf_input; 72 u64 ringbuf_output; 73 u32 instr_config; 74 u32 instrbuf_size; 75 u64 instrbuf_base; 76 u64 instrbuf_offset_ptr; 77}; 78 79struct panthor_fw_cs_output_iface { 80 u32 ack; 81 u32 reserved1[15]; 82 u64 status_cmd_ptr; 83 84#define CS_STATUS_WAIT_SB_MASK GENMASK(15, 0) 85#define CS_STATUS_WAIT_SB_SRC_MASK GENMASK(19, 16) 86#define CS_STATUS_WAIT_SB_SRC_NONE (0 << 16) 87#define CS_STATUS_WAIT_SB_SRC_WAIT (8 << 16) 88#define CS_STATUS_WAIT_SYNC_COND_LE (0 << 24) 89#define CS_STATUS_WAIT_SYNC_COND_GT (1 << 24) 90#define CS_STATUS_WAIT_SYNC_COND_MASK GENMASK(27, 24) 91#define CS_STATUS_WAIT_PROGRESS BIT(28) 92#define CS_STATUS_WAIT_PROTM BIT(29) 93#define CS_STATUS_WAIT_SYNC_64B BIT(30) 94#define CS_STATUS_WAIT_SYNC BIT(31) 95 u32 status_wait; 96 u32 status_req_resource; 97 u64 status_wait_sync_ptr; 98 u32 status_wait_sync_value; 99 u32 status_scoreboards; 100 101#define CS_STATUS_BLOCKED_REASON_UNBLOCKED 0 102#define CS_STATUS_BLOCKED_REASON_SB_WAIT 1 103#define CS_STATUS_BLOCKED_REASON_PROGRESS_WAIT 2 104#define CS_STATUS_BLOCKED_REASON_SYNC_WAIT 3 105#define CS_STATUS_BLOCKED_REASON_DEFERRED 5 106#define CS_STATUS_BLOCKED_REASON_RES 6 107#define CS_STATUS_BLOCKED_REASON_FLUSH 7 108#define CS_STATUS_BLOCKED_REASON_MASK GENMASK(3, 0) 109 u32 status_blocked_reason; 110 u32 status_wait_sync_value_hi; 111 u32 reserved2[6]; 112 113#define CS_EXCEPTION_TYPE(x) ((x) & GENMASK(7, 0)) 114#define CS_EXCEPTION_DATA(x) (((x) >> 8) & GENMASK(23, 0)) 115 u32 fault; 116 u32 fatal; 117 u64 fault_info; 118 u64 fatal_info; 119 u32 reserved3[10]; 120 u32 heap_vt_start; 121 u32 heap_vt_end; 122 u32 reserved4; 123 u32 heap_frag_end; 124 u64 heap_address; 125}; 126 127struct panthor_fw_csg_control_iface { 128 u32 features; 129 u32 input_va; 130 u32 output_va; 131 u32 suspend_size; 132 u32 protm_suspend_size; 133 u32 stream_num; 134 u32 stream_stride; 135}; 136 137struct panthor_fw_csg_input_iface { 138#define CSG_STATE_MASK GENMASK(2, 0) 139#define CSG_STATE_TERMINATE 0 140#define CSG_STATE_START 1 141#define CSG_STATE_SUSPEND 2 142#define CSG_STATE_RESUME 3 143#define CSG_ENDPOINT_CONFIG BIT(4) 144#define CSG_STATUS_UPDATE BIT(5) 145#define CSG_SYNC_UPDATE BIT(28) 146#define CSG_IDLE BIT(29) 147#define CSG_DOORBELL BIT(30) 148#define CSG_PROGRESS_TIMER_EVENT BIT(31) 149#define CSG_REQ_MASK (CSG_STATE_MASK | \ 150 CSG_ENDPOINT_CONFIG | \ 151 CSG_STATUS_UPDATE) 152#define CSG_EVT_MASK (CSG_SYNC_UPDATE | \ 153 CSG_IDLE | \ 154 CSG_PROGRESS_TIMER_EVENT) 155 u32 req; 156 u32 ack_irq_mask; 157 158 u32 doorbell_req; 159 u32 cs_irq_ack; 160 u32 reserved1[4]; 161 u64 allow_compute; 162 u64 allow_fragment; 163 u32 allow_other; 164 165#define CSG_EP_REQ_COMPUTE(x) ((x) & GENMASK(7, 0)) 166#define CSG_EP_REQ_FRAGMENT(x) (((x) << 8) & GENMASK(15, 8)) 167#define CSG_EP_REQ_TILER(x) (((x) << 16) & GENMASK(19, 16)) 168#define CSG_EP_REQ_EXCL_COMPUTE BIT(20) 169#define CSG_EP_REQ_EXCL_FRAGMENT BIT(21) 170#define CSG_EP_REQ_PRIORITY(x) (((x) << 28) & GENMASK(31, 28)) 171#define CSG_EP_REQ_PRIORITY_MASK GENMASK(31, 28) 172 u32 endpoint_req; 173 u32 reserved2[2]; 174 u64 suspend_buf; 175 u64 protm_suspend_buf; 176 u32 config; 177 u32 iter_trace_config; 178}; 179 180struct panthor_fw_csg_output_iface { 181 u32 ack; 182 u32 reserved1; 183 u32 doorbell_ack; 184 u32 cs_irq_req; 185 u32 status_endpoint_current; 186 u32 status_endpoint_req; 187 188#define CSG_STATUS_STATE_IS_IDLE BIT(0) 189 u32 status_state; 190 u32 resource_dep; 191}; 192 193struct panthor_fw_global_control_iface { 194 u32 version; 195 u32 features; 196 u32 input_va; 197 u32 output_va; 198 u32 group_num; 199 u32 group_stride; 200 u32 perfcnt_size; 201 u32 instr_features; 202}; 203 204struct panthor_fw_global_input_iface { 205#define GLB_HALT BIT(0) 206#define GLB_CFG_PROGRESS_TIMER BIT(1) 207#define GLB_CFG_ALLOC_EN BIT(2) 208#define GLB_CFG_POWEROFF_TIMER BIT(3) 209#define GLB_PROTM_ENTER BIT(4) 210#define GLB_PERFCNT_EN BIT(5) 211#define GLB_PERFCNT_SAMPLE BIT(6) 212#define GLB_COUNTER_EN BIT(7) 213#define GLB_PING BIT(8) 214#define GLB_FWCFG_UPDATE BIT(9) 215#define GLB_IDLE_EN BIT(10) 216#define GLB_SLEEP BIT(12) 217#define GLB_INACTIVE_COMPUTE BIT(20) 218#define GLB_INACTIVE_FRAGMENT BIT(21) 219#define GLB_INACTIVE_TILER BIT(22) 220#define GLB_PROTM_EXIT BIT(23) 221#define GLB_PERFCNT_THRESHOLD BIT(24) 222#define GLB_PERFCNT_OVERFLOW BIT(25) 223#define GLB_IDLE BIT(26) 224#define GLB_DBG_CSF BIT(30) 225#define GLB_DBG_HOST BIT(31) 226#define GLB_REQ_MASK GENMASK(10, 0) 227#define GLB_EVT_MASK GENMASK(26, 20) 228 u32 req; 229 u32 ack_irq_mask; 230 u32 doorbell_req; 231 u32 reserved1; 232 u32 progress_timer; 233 234#define GLB_TIMER_VAL(x) ((x) & GENMASK(30, 0)) 235#define GLB_TIMER_SOURCE_GPU_COUNTER BIT(31) 236 u32 poweroff_timer; 237 u64 core_en_mask; 238 u32 reserved2; 239 u32 perfcnt_as; 240 u64 perfcnt_base; 241 u32 perfcnt_extract; 242 u32 reserved3[3]; 243 u32 perfcnt_config; 244 u32 perfcnt_csg_select; 245 u32 perfcnt_fw_enable; 246 u32 perfcnt_csg_enable; 247 u32 perfcnt_csf_enable; 248 u32 perfcnt_shader_enable; 249 u32 perfcnt_tiler_enable; 250 u32 perfcnt_mmu_l2_enable; 251 u32 reserved4[8]; 252 u32 idle_timer; 253}; 254 255enum panthor_fw_halt_status { 256 PANTHOR_FW_HALT_OK = 0, 257 PANTHOR_FW_HALT_ON_PANIC = 0x4e, 258 PANTHOR_FW_HALT_ON_WATCHDOG_EXPIRATION = 0x4f, 259}; 260 261struct panthor_fw_global_output_iface { 262 u32 ack; 263 u32 reserved1; 264 u32 doorbell_ack; 265 u32 reserved2; 266 u32 halt_status; 267 u32 perfcnt_status; 268 u32 perfcnt_insert; 269}; 270 271/** 272 * struct panthor_fw_cs_iface - Firmware command stream slot interface 273 */ 274struct panthor_fw_cs_iface { 275 /** 276 * @lock: Lock protecting access to the panthor_fw_cs_input_iface::req 277 * field. 278 * 279 * Needed so we can update the req field concurrently from the interrupt 280 * handler and the scheduler logic. 281 * 282 * TODO: Ideally we'd want to use a cmpxchg() to update the req, but FW 283 * interface sections are mapped uncached/write-combined right now, and 284 * using cmpxchg() on such mappings leads to SError faults. Revisit when 285 * we have 'SHARED' GPU mappings hooked up. 286 */ 287 spinlock_t lock; 288 289 /** 290 * @control: Command stream slot control interface. 291 * 292 * Used to expose command stream slot properties. 293 * 294 * This interface is read-only. 295 */ 296 struct panthor_fw_cs_control_iface *control; 297 298 /** 299 * @input: Command stream slot input interface. 300 * 301 * Used for host updates/events. 302 */ 303 struct panthor_fw_cs_input_iface *input; 304 305 /** 306 * @output: Command stream slot output interface. 307 * 308 * Used for FW updates/events. 309 * 310 * This interface is read-only. 311 */ 312 const struct panthor_fw_cs_output_iface *output; 313}; 314 315/** 316 * struct panthor_fw_csg_iface - Firmware command stream group slot interface 317 */ 318struct panthor_fw_csg_iface { 319 /** 320 * @lock: Lock protecting access to the panthor_fw_csg_input_iface::req 321 * field. 322 * 323 * Needed so we can update the req field concurrently from the interrupt 324 * handler and the scheduler logic. 325 * 326 * TODO: Ideally we'd want to use a cmpxchg() to update the req, but FW 327 * interface sections are mapped uncached/write-combined right now, and 328 * using cmpxchg() on such mappings leads to SError faults. Revisit when 329 * we have 'SHARED' GPU mappings hooked up. 330 */ 331 spinlock_t lock; 332 333 /** 334 * @control: Command stream group slot control interface. 335 * 336 * Used to expose command stream group slot properties. 337 * 338 * This interface is read-only. 339 */ 340 const struct panthor_fw_csg_control_iface *control; 341 342 /** 343 * @input: Command stream slot input interface. 344 * 345 * Used for host updates/events. 346 */ 347 struct panthor_fw_csg_input_iface *input; 348 349 /** 350 * @output: Command stream group slot output interface. 351 * 352 * Used for FW updates/events. 353 * 354 * This interface is read-only. 355 */ 356 const struct panthor_fw_csg_output_iface *output; 357}; 358 359/** 360 * struct panthor_fw_global_iface - Firmware global interface 361 */ 362struct panthor_fw_global_iface { 363 /** 364 * @lock: Lock protecting access to the panthor_fw_global_input_iface::req 365 * field. 366 * 367 * Needed so we can update the req field concurrently from the interrupt 368 * handler and the scheduler/FW management logic. 369 * 370 * TODO: Ideally we'd want to use a cmpxchg() to update the req, but FW 371 * interface sections are mapped uncached/write-combined right now, and 372 * using cmpxchg() on such mappings leads to SError faults. Revisit when 373 * we have 'SHARED' GPU mappings hooked up. 374 */ 375 spinlock_t lock; 376 377 /** 378 * @control: Command stream group slot control interface. 379 * 380 * Used to expose global FW properties. 381 * 382 * This interface is read-only. 383 */ 384 const struct panthor_fw_global_control_iface *control; 385 386 /** 387 * @input: Global input interface. 388 * 389 * Used for host updates/events. 390 */ 391 struct panthor_fw_global_input_iface *input; 392 393 /** 394 * @output: Global output interface. 395 * 396 * Used for FW updates/events. 397 * 398 * This interface is read-only. 399 */ 400 const struct panthor_fw_global_output_iface *output; 401}; 402 403/** 404 * panthor_fw_toggle_reqs() - Toggle acknowledge bits to send an event to the FW 405 * @__iface: The interface to operate on. 406 * @__in_reg: Name of the register to update in the input section of the interface. 407 * @__out_reg: Name of the register to take as a reference in the output section of the 408 * interface. 409 * @__mask: Mask to apply to the update. 410 * 411 * The Host -> FW event/message passing was designed to be lockless, with each side of 412 * the channel having its writeable section. Events are signaled as a difference between 413 * the host and FW side in the req/ack registers (when a bit differs, there's an event 414 * pending, when they are the same, nothing needs attention). 415 * 416 * This helper allows one to update the req register based on the current value of the 417 * ack register managed by the FW. Toggling a specific bit will flag an event. In order 418 * for events to be re-evaluated, the interface doorbell needs to be rung. 419 * 420 * Concurrent accesses to the same req register is covered. 421 * 422 * Anything requiring atomic updates to multiple registers requires a dedicated lock. 423 */ 424#define panthor_fw_toggle_reqs(__iface, __in_reg, __out_reg, __mask) \ 425 do { \ 426 u32 __cur_val, __new_val, __out_val; \ 427 spin_lock(&(__iface)->lock); \ 428 __cur_val = READ_ONCE((__iface)->input->__in_reg); \ 429 __out_val = READ_ONCE((__iface)->output->__out_reg); \ 430 __new_val = ((__out_val ^ (__mask)) & (__mask)) | (__cur_val & ~(__mask)); \ 431 WRITE_ONCE((__iface)->input->__in_reg, __new_val); \ 432 spin_unlock(&(__iface)->lock); \ 433 } while (0) 434 435/** 436 * panthor_fw_update_reqs() - Update bits to reflect a configuration change 437 * @__iface: The interface to operate on. 438 * @__in_reg: Name of the register to update in the input section of the interface. 439 * @__val: Value to set. 440 * @__mask: Mask to apply to the update. 441 * 442 * Some configuration get passed through req registers that are also used to 443 * send events to the FW. Those req registers being updated from the interrupt 444 * handler, they require special helpers to update the configuration part as well. 445 * 446 * Concurrent accesses to the same req register is covered. 447 * 448 * Anything requiring atomic updates to multiple registers requires a dedicated lock. 449 */ 450#define panthor_fw_update_reqs(__iface, __in_reg, __val, __mask) \ 451 do { \ 452 u32 __cur_val, __new_val; \ 453 spin_lock(&(__iface)->lock); \ 454 __cur_val = READ_ONCE((__iface)->input->__in_reg); \ 455 __new_val = (__cur_val & ~(__mask)) | ((__val) & (__mask)); \ 456 WRITE_ONCE((__iface)->input->__in_reg, __new_val); \ 457 spin_unlock(&(__iface)->lock); \ 458 } while (0) 459 460struct panthor_fw_global_iface * 461panthor_fw_get_glb_iface(struct panthor_device *ptdev); 462 463struct panthor_fw_csg_iface * 464panthor_fw_get_csg_iface(struct panthor_device *ptdev, u32 csg_slot); 465 466struct panthor_fw_cs_iface * 467panthor_fw_get_cs_iface(struct panthor_device *ptdev, u32 csg_slot, u32 cs_slot); 468 469int panthor_fw_csg_wait_acks(struct panthor_device *ptdev, u32 csg_id, u32 req_mask, 470 u32 *acked, u32 timeout_ms); 471 472int panthor_fw_glb_wait_acks(struct panthor_device *ptdev, u32 req_mask, u32 *acked, 473 u32 timeout_ms); 474 475void panthor_fw_ring_csg_doorbells(struct panthor_device *ptdev, u32 csg_slot); 476 477struct panthor_kernel_bo * 478panthor_fw_alloc_queue_iface_mem(struct panthor_device *ptdev, 479 struct panthor_fw_ringbuf_input_iface **input, 480 const struct panthor_fw_ringbuf_output_iface **output, 481 u32 *input_fw_va, u32 *output_fw_va); 482struct panthor_kernel_bo * 483panthor_fw_alloc_suspend_buf_mem(struct panthor_device *ptdev, size_t size); 484 485struct panthor_vm *panthor_fw_vm(struct panthor_device *ptdev); 486 487void panthor_fw_pre_reset(struct panthor_device *ptdev, bool on_hang); 488int panthor_fw_post_reset(struct panthor_device *ptdev); 489 490static inline void panthor_fw_suspend(struct panthor_device *ptdev) 491{ 492 panthor_fw_pre_reset(ptdev, false); 493} 494 495static inline int panthor_fw_resume(struct panthor_device *ptdev) 496{ 497 return panthor_fw_post_reset(ptdev); 498} 499 500int panthor_fw_init(struct panthor_device *ptdev); 501void panthor_fw_unplug(struct panthor_device *ptdev); 502 503#endif 504