1254885Sdumbbell/* 2254885Sdumbbell * Copyright 2010 Advanced Micro Devices, Inc. 3254885Sdumbbell * 4254885Sdumbbell * Permission is hereby granted, free of charge, to any person obtaining a 5254885Sdumbbell * copy of this software and associated documentation files (the "Software"), 6254885Sdumbbell * to deal in the Software without restriction, including without limitation 7254885Sdumbbell * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8254885Sdumbbell * and/or sell copies of the Software, and to permit persons to whom the 9254885Sdumbbell * Software is furnished to do so, subject to the following conditions: 10254885Sdumbbell * 11254885Sdumbbell * The above copyright notice and this permission notice (including the next 12254885Sdumbbell * paragraph) shall be included in all copies or substantial portions of the 13254885Sdumbbell * Software. 14254885Sdumbbell * 15254885Sdumbbell * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16254885Sdumbbell * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17254885Sdumbbell * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18254885Sdumbbell * THE COPYRIGHT HOLDER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 19254885Sdumbbell * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 20254885Sdumbbell * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21254885Sdumbbell * DEALINGS IN THE SOFTWARE. 22254885Sdumbbell * 23254885Sdumbbell * Authors: 24254885Sdumbbell * Alex Deucher <alexander.deucher@amd.com> 25254885Sdumbbell */ 26254885Sdumbbell 27254885Sdumbbell#include <sys/cdefs.h> 28254885Sdumbbell__FBSDID("$FreeBSD: releng/10.3/sys/dev/drm2/radeon/evergreen_blit_kms.c 261455 2014-02-04 03:36:42Z eadler $"); 29254885Sdumbbell 30254885Sdumbbell#include <dev/drm2/drmP.h> 31254885Sdumbbell#include <dev/drm2/radeon/radeon_drm.h> 32254885Sdumbbell#include "radeon.h" 33254885Sdumbbell#include "radeon_asic.h" 34254885Sdumbbell 35254885Sdumbbell#include "evergreend.h" 36254885Sdumbbell#include "evergreen_blit_shaders.h" 37254885Sdumbbell#include "cayman_blit_shaders.h" 38254885Sdumbbell#include "radeon_blit_common.h" 39254885Sdumbbell 40254885Sdumbbell/* emits 17 */ 41254885Sdumbbellstatic void 42254885Sdumbbellset_render_target(struct radeon_device *rdev, int format, 43254885Sdumbbell int w, int h, u64 gpu_addr) 44254885Sdumbbell{ 45254885Sdumbbell struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; 46254885Sdumbbell u32 cb_color_info; 47254885Sdumbbell int pitch, slice; 48254885Sdumbbell 49254885Sdumbbell h = roundup2(h, 8); 50254885Sdumbbell if (h < 8) 51254885Sdumbbell h = 8; 52254885Sdumbbell 53254885Sdumbbell cb_color_info = CB_FORMAT(format) | 54254885Sdumbbell CB_SOURCE_FORMAT(CB_SF_EXPORT_NORM) | 55254885Sdumbbell CB_ARRAY_MODE(ARRAY_1D_TILED_THIN1); 56254885Sdumbbell pitch = (w / 8) - 1; 57254885Sdumbbell slice = ((w * h) / 64) - 1; 58254885Sdumbbell 59254885Sdumbbell radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 15)); 60254885Sdumbbell radeon_ring_write(ring, (CB_COLOR0_BASE - PACKET3_SET_CONTEXT_REG_START) >> 2); 61254885Sdumbbell radeon_ring_write(ring, gpu_addr >> 8); 62254885Sdumbbell radeon_ring_write(ring, pitch); 63254885Sdumbbell radeon_ring_write(ring, slice); 64254885Sdumbbell radeon_ring_write(ring, 0); 65254885Sdumbbell radeon_ring_write(ring, cb_color_info); 66254885Sdumbbell radeon_ring_write(ring, 0); 67254885Sdumbbell radeon_ring_write(ring, (w - 1) | ((h - 1) << 16)); 68254885Sdumbbell radeon_ring_write(ring, 0); 69254885Sdumbbell radeon_ring_write(ring, 0); 70254885Sdumbbell radeon_ring_write(ring, 0); 71254885Sdumbbell radeon_ring_write(ring, 0); 72254885Sdumbbell radeon_ring_write(ring, 0); 73254885Sdumbbell radeon_ring_write(ring, 0); 74254885Sdumbbell radeon_ring_write(ring, 0); 75254885Sdumbbell radeon_ring_write(ring, 0); 76254885Sdumbbell} 77254885Sdumbbell 78254885Sdumbbell/* emits 5dw */ 79254885Sdumbbellstatic void 80254885Sdumbbellcp_set_surface_sync(struct radeon_device *rdev, 81254885Sdumbbell u32 sync_type, u32 size, 82254885Sdumbbell u64 mc_addr) 83254885Sdumbbell{ 84254885Sdumbbell struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; 85254885Sdumbbell u32 cp_coher_size; 86254885Sdumbbell 87254885Sdumbbell if (size == 0xffffffff) 88254885Sdumbbell cp_coher_size = 0xffffffff; 89254885Sdumbbell else 90254885Sdumbbell cp_coher_size = ((size + 255) >> 8); 91254885Sdumbbell 92254885Sdumbbell if (rdev->family >= CHIP_CAYMAN) { 93254885Sdumbbell /* CP_COHER_CNTL2 has to be set manually when submitting a surface_sync 94254885Sdumbbell * to the RB directly. For IBs, the CP programs this as part of the 95254885Sdumbbell * surface_sync packet. 96254885Sdumbbell */ 97254885Sdumbbell radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); 98254885Sdumbbell radeon_ring_write(ring, (0x85e8 - PACKET3_SET_CONFIG_REG_START) >> 2); 99254885Sdumbbell radeon_ring_write(ring, 0); /* CP_COHER_CNTL2 */ 100254885Sdumbbell } 101254885Sdumbbell radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3)); 102254885Sdumbbell radeon_ring_write(ring, sync_type); 103254885Sdumbbell radeon_ring_write(ring, cp_coher_size); 104254885Sdumbbell radeon_ring_write(ring, mc_addr >> 8); 105254885Sdumbbell radeon_ring_write(ring, 10); /* poll interval */ 106254885Sdumbbell} 107254885Sdumbbell 108254885Sdumbbell/* emits 11dw + 1 surface sync = 16dw */ 109254885Sdumbbellstatic void 110254885Sdumbbellset_shaders(struct radeon_device *rdev) 111254885Sdumbbell{ 112254885Sdumbbell struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; 113254885Sdumbbell u64 gpu_addr; 114254885Sdumbbell 115254885Sdumbbell /* VS */ 116254885Sdumbbell gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.vs_offset; 117254885Sdumbbell radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 3)); 118254885Sdumbbell radeon_ring_write(ring, (SQ_PGM_START_VS - PACKET3_SET_CONTEXT_REG_START) >> 2); 119254885Sdumbbell radeon_ring_write(ring, gpu_addr >> 8); 120254885Sdumbbell radeon_ring_write(ring, 2); 121254885Sdumbbell radeon_ring_write(ring, 0); 122254885Sdumbbell 123254885Sdumbbell /* PS */ 124254885Sdumbbell gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.ps_offset; 125254885Sdumbbell radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 4)); 126254885Sdumbbell radeon_ring_write(ring, (SQ_PGM_START_PS - PACKET3_SET_CONTEXT_REG_START) >> 2); 127254885Sdumbbell radeon_ring_write(ring, gpu_addr >> 8); 128254885Sdumbbell radeon_ring_write(ring, 1); 129254885Sdumbbell radeon_ring_write(ring, 0); 130254885Sdumbbell radeon_ring_write(ring, 2); 131254885Sdumbbell 132254885Sdumbbell gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.vs_offset; 133254885Sdumbbell cp_set_surface_sync(rdev, PACKET3_SH_ACTION_ENA, 512, gpu_addr); 134254885Sdumbbell} 135254885Sdumbbell 136254885Sdumbbell/* emits 10 + 1 sync (5) = 15 */ 137254885Sdumbbellstatic void 138254885Sdumbbellset_vtx_resource(struct radeon_device *rdev, u64 gpu_addr) 139254885Sdumbbell{ 140254885Sdumbbell struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; 141254885Sdumbbell u32 sq_vtx_constant_word2, sq_vtx_constant_word3; 142254885Sdumbbell 143254885Sdumbbell /* high addr, stride */ 144254885Sdumbbell sq_vtx_constant_word2 = SQ_VTXC_BASE_ADDR_HI(upper_32_bits(gpu_addr) & 0xff) | 145254885Sdumbbell SQ_VTXC_STRIDE(16); 146254885Sdumbbell#ifdef __BIG_ENDIAN 147254885Sdumbbell sq_vtx_constant_word2 |= SQ_VTXC_ENDIAN_SWAP(SQ_ENDIAN_8IN32); 148254885Sdumbbell#endif 149254885Sdumbbell /* xyzw swizzles */ 150254885Sdumbbell sq_vtx_constant_word3 = SQ_VTCX_SEL_X(SQ_SEL_X) | 151254885Sdumbbell SQ_VTCX_SEL_Y(SQ_SEL_Y) | 152254885Sdumbbell SQ_VTCX_SEL_Z(SQ_SEL_Z) | 153254885Sdumbbell SQ_VTCX_SEL_W(SQ_SEL_W); 154254885Sdumbbell 155254885Sdumbbell radeon_ring_write(ring, PACKET3(PACKET3_SET_RESOURCE, 8)); 156254885Sdumbbell radeon_ring_write(ring, 0x580); 157254885Sdumbbell radeon_ring_write(ring, gpu_addr & 0xffffffff); 158254885Sdumbbell radeon_ring_write(ring, 48 - 1); /* size */ 159254885Sdumbbell radeon_ring_write(ring, sq_vtx_constant_word2); 160254885Sdumbbell radeon_ring_write(ring, sq_vtx_constant_word3); 161254885Sdumbbell radeon_ring_write(ring, 0); 162254885Sdumbbell radeon_ring_write(ring, 0); 163254885Sdumbbell radeon_ring_write(ring, 0); 164254885Sdumbbell radeon_ring_write(ring, S__SQ_CONSTANT_TYPE(SQ_TEX_VTX_VALID_BUFFER)); 165254885Sdumbbell 166254885Sdumbbell if ((rdev->family == CHIP_CEDAR) || 167254885Sdumbbell (rdev->family == CHIP_PALM) || 168254885Sdumbbell (rdev->family == CHIP_SUMO) || 169254885Sdumbbell (rdev->family == CHIP_SUMO2) || 170254885Sdumbbell (rdev->family == CHIP_CAICOS)) 171254885Sdumbbell cp_set_surface_sync(rdev, 172254885Sdumbbell PACKET3_TC_ACTION_ENA, 48, gpu_addr); 173254885Sdumbbell else 174254885Sdumbbell cp_set_surface_sync(rdev, 175254885Sdumbbell PACKET3_VC_ACTION_ENA, 48, gpu_addr); 176254885Sdumbbell 177254885Sdumbbell} 178254885Sdumbbell 179254885Sdumbbell/* emits 10 */ 180254885Sdumbbellstatic void 181254885Sdumbbellset_tex_resource(struct radeon_device *rdev, 182254885Sdumbbell int format, int w, int h, int pitch, 183254885Sdumbbell u64 gpu_addr, u32 size) 184254885Sdumbbell{ 185254885Sdumbbell struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; 186254885Sdumbbell u32 sq_tex_resource_word0, sq_tex_resource_word1; 187254885Sdumbbell u32 sq_tex_resource_word4, sq_tex_resource_word7; 188254885Sdumbbell 189254885Sdumbbell if (h < 1) 190254885Sdumbbell h = 1; 191254885Sdumbbell 192254885Sdumbbell sq_tex_resource_word0 = TEX_DIM(SQ_TEX_DIM_2D); 193254885Sdumbbell sq_tex_resource_word0 |= ((((pitch >> 3) - 1) << 6) | 194254885Sdumbbell ((w - 1) << 18)); 195254885Sdumbbell sq_tex_resource_word1 = ((h - 1) << 0) | 196254885Sdumbbell TEX_ARRAY_MODE(ARRAY_1D_TILED_THIN1); 197254885Sdumbbell /* xyzw swizzles */ 198254885Sdumbbell sq_tex_resource_word4 = TEX_DST_SEL_X(SQ_SEL_X) | 199254885Sdumbbell TEX_DST_SEL_Y(SQ_SEL_Y) | 200254885Sdumbbell TEX_DST_SEL_Z(SQ_SEL_Z) | 201254885Sdumbbell TEX_DST_SEL_W(SQ_SEL_W); 202254885Sdumbbell 203254885Sdumbbell sq_tex_resource_word7 = format | 204254885Sdumbbell S__SQ_CONSTANT_TYPE(SQ_TEX_VTX_VALID_TEXTURE); 205254885Sdumbbell 206254885Sdumbbell cp_set_surface_sync(rdev, 207254885Sdumbbell PACKET3_TC_ACTION_ENA, size, gpu_addr); 208254885Sdumbbell 209254885Sdumbbell radeon_ring_write(ring, PACKET3(PACKET3_SET_RESOURCE, 8)); 210254885Sdumbbell radeon_ring_write(ring, 0); 211254885Sdumbbell radeon_ring_write(ring, sq_tex_resource_word0); 212254885Sdumbbell radeon_ring_write(ring, sq_tex_resource_word1); 213254885Sdumbbell radeon_ring_write(ring, gpu_addr >> 8); 214254885Sdumbbell radeon_ring_write(ring, gpu_addr >> 8); 215254885Sdumbbell radeon_ring_write(ring, sq_tex_resource_word4); 216254885Sdumbbell radeon_ring_write(ring, 0); 217254885Sdumbbell radeon_ring_write(ring, 0); 218254885Sdumbbell radeon_ring_write(ring, sq_tex_resource_word7); 219254885Sdumbbell} 220254885Sdumbbell 221254885Sdumbbell/* emits 12 */ 222254885Sdumbbellstatic void 223254885Sdumbbellset_scissors(struct radeon_device *rdev, int x1, int y1, 224254885Sdumbbell int x2, int y2) 225254885Sdumbbell{ 226254885Sdumbbell struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; 227254885Sdumbbell /* workaround some hw bugs */ 228254885Sdumbbell if (x2 == 0) 229254885Sdumbbell x1 = 1; 230254885Sdumbbell if (y2 == 0) 231254885Sdumbbell y1 = 1; 232254885Sdumbbell if (rdev->family >= CHIP_CAYMAN) { 233254885Sdumbbell if ((x2 == 1) && (y2 == 1)) 234254885Sdumbbell x2 = 2; 235254885Sdumbbell } 236254885Sdumbbell 237254885Sdumbbell radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2)); 238254885Sdumbbell radeon_ring_write(ring, (PA_SC_SCREEN_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_START) >> 2); 239254885Sdumbbell radeon_ring_write(ring, (x1 << 0) | (y1 << 16)); 240254885Sdumbbell radeon_ring_write(ring, (x2 << 0) | (y2 << 16)); 241254885Sdumbbell 242254885Sdumbbell radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2)); 243254885Sdumbbell radeon_ring_write(ring, (PA_SC_GENERIC_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_START) >> 2); 244261455Seadler radeon_ring_write(ring, (x1 << 0) | (y1 << 16) | (1U << 31)); 245254885Sdumbbell radeon_ring_write(ring, (x2 << 0) | (y2 << 16)); 246254885Sdumbbell 247254885Sdumbbell radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2)); 248254885Sdumbbell radeon_ring_write(ring, (PA_SC_WINDOW_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_START) >> 2); 249261455Seadler radeon_ring_write(ring, (x1 << 0) | (y1 << 16) | (1U << 31)); 250254885Sdumbbell radeon_ring_write(ring, (x2 << 0) | (y2 << 16)); 251254885Sdumbbell} 252254885Sdumbbell 253254885Sdumbbell/* emits 10 */ 254254885Sdumbbellstatic void 255254885Sdumbbelldraw_auto(struct radeon_device *rdev) 256254885Sdumbbell{ 257254885Sdumbbell struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; 258254885Sdumbbell radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); 259254885Sdumbbell radeon_ring_write(ring, (VGT_PRIMITIVE_TYPE - PACKET3_SET_CONFIG_REG_START) >> 2); 260254885Sdumbbell radeon_ring_write(ring, DI_PT_RECTLIST); 261254885Sdumbbell 262254885Sdumbbell radeon_ring_write(ring, PACKET3(PACKET3_INDEX_TYPE, 0)); 263254885Sdumbbell radeon_ring_write(ring, 264254885Sdumbbell#ifdef __BIG_ENDIAN 265254885Sdumbbell (2 << 2) | 266254885Sdumbbell#endif 267254885Sdumbbell DI_INDEX_SIZE_16_BIT); 268254885Sdumbbell 269254885Sdumbbell radeon_ring_write(ring, PACKET3(PACKET3_NUM_INSTANCES, 0)); 270254885Sdumbbell radeon_ring_write(ring, 1); 271254885Sdumbbell 272254885Sdumbbell radeon_ring_write(ring, PACKET3(PACKET3_DRAW_INDEX_AUTO, 1)); 273254885Sdumbbell radeon_ring_write(ring, 3); 274254885Sdumbbell radeon_ring_write(ring, DI_SRC_SEL_AUTO_INDEX); 275254885Sdumbbell 276254885Sdumbbell} 277254885Sdumbbell 278254885Sdumbbell/* emits 39 */ 279254885Sdumbbellstatic void 280254885Sdumbbellset_default_state(struct radeon_device *rdev) 281254885Sdumbbell{ 282254885Sdumbbell struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; 283254885Sdumbbell u32 sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2, sq_gpr_resource_mgmt_3; 284254885Sdumbbell u32 sq_thread_resource_mgmt, sq_thread_resource_mgmt_2; 285254885Sdumbbell u32 sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2, sq_stack_resource_mgmt_3; 286254885Sdumbbell int num_ps_gprs, num_vs_gprs, num_temp_gprs; 287254885Sdumbbell int num_gs_gprs, num_es_gprs, num_hs_gprs, num_ls_gprs; 288254885Sdumbbell int num_ps_threads, num_vs_threads, num_gs_threads, num_es_threads; 289254885Sdumbbell int num_hs_threads, num_ls_threads; 290254885Sdumbbell int num_ps_stack_entries, num_vs_stack_entries, num_gs_stack_entries, num_es_stack_entries; 291254885Sdumbbell int num_hs_stack_entries, num_ls_stack_entries; 292254885Sdumbbell u64 gpu_addr; 293254885Sdumbbell int dwords; 294254885Sdumbbell 295254885Sdumbbell /* set clear context state */ 296254885Sdumbbell radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); 297254885Sdumbbell radeon_ring_write(ring, 0); 298254885Sdumbbell 299254885Sdumbbell if (rdev->family < CHIP_CAYMAN) { 300254885Sdumbbell switch (rdev->family) { 301254885Sdumbbell case CHIP_CEDAR: 302254885Sdumbbell default: 303254885Sdumbbell num_ps_gprs = 93; 304254885Sdumbbell num_vs_gprs = 46; 305254885Sdumbbell num_temp_gprs = 4; 306254885Sdumbbell num_gs_gprs = 31; 307254885Sdumbbell num_es_gprs = 31; 308254885Sdumbbell num_hs_gprs = 23; 309254885Sdumbbell num_ls_gprs = 23; 310254885Sdumbbell num_ps_threads = 96; 311254885Sdumbbell num_vs_threads = 16; 312254885Sdumbbell num_gs_threads = 16; 313254885Sdumbbell num_es_threads = 16; 314254885Sdumbbell num_hs_threads = 16; 315254885Sdumbbell num_ls_threads = 16; 316254885Sdumbbell num_ps_stack_entries = 42; 317254885Sdumbbell num_vs_stack_entries = 42; 318254885Sdumbbell num_gs_stack_entries = 42; 319254885Sdumbbell num_es_stack_entries = 42; 320254885Sdumbbell num_hs_stack_entries = 42; 321254885Sdumbbell num_ls_stack_entries = 42; 322254885Sdumbbell break; 323254885Sdumbbell case CHIP_REDWOOD: 324254885Sdumbbell num_ps_gprs = 93; 325254885Sdumbbell num_vs_gprs = 46; 326254885Sdumbbell num_temp_gprs = 4; 327254885Sdumbbell num_gs_gprs = 31; 328254885Sdumbbell num_es_gprs = 31; 329254885Sdumbbell num_hs_gprs = 23; 330254885Sdumbbell num_ls_gprs = 23; 331254885Sdumbbell num_ps_threads = 128; 332254885Sdumbbell num_vs_threads = 20; 333254885Sdumbbell num_gs_threads = 20; 334254885Sdumbbell num_es_threads = 20; 335254885Sdumbbell num_hs_threads = 20; 336254885Sdumbbell num_ls_threads = 20; 337254885Sdumbbell num_ps_stack_entries = 42; 338254885Sdumbbell num_vs_stack_entries = 42; 339254885Sdumbbell num_gs_stack_entries = 42; 340254885Sdumbbell num_es_stack_entries = 42; 341254885Sdumbbell num_hs_stack_entries = 42; 342254885Sdumbbell num_ls_stack_entries = 42; 343254885Sdumbbell break; 344254885Sdumbbell case CHIP_JUNIPER: 345254885Sdumbbell num_ps_gprs = 93; 346254885Sdumbbell num_vs_gprs = 46; 347254885Sdumbbell num_temp_gprs = 4; 348254885Sdumbbell num_gs_gprs = 31; 349254885Sdumbbell num_es_gprs = 31; 350254885Sdumbbell num_hs_gprs = 23; 351254885Sdumbbell num_ls_gprs = 23; 352254885Sdumbbell num_ps_threads = 128; 353254885Sdumbbell num_vs_threads = 20; 354254885Sdumbbell num_gs_threads = 20; 355254885Sdumbbell num_es_threads = 20; 356254885Sdumbbell num_hs_threads = 20; 357254885Sdumbbell num_ls_threads = 20; 358254885Sdumbbell num_ps_stack_entries = 85; 359254885Sdumbbell num_vs_stack_entries = 85; 360254885Sdumbbell num_gs_stack_entries = 85; 361254885Sdumbbell num_es_stack_entries = 85; 362254885Sdumbbell num_hs_stack_entries = 85; 363254885Sdumbbell num_ls_stack_entries = 85; 364254885Sdumbbell break; 365254885Sdumbbell case CHIP_CYPRESS: 366254885Sdumbbell case CHIP_HEMLOCK: 367254885Sdumbbell num_ps_gprs = 93; 368254885Sdumbbell num_vs_gprs = 46; 369254885Sdumbbell num_temp_gprs = 4; 370254885Sdumbbell num_gs_gprs = 31; 371254885Sdumbbell num_es_gprs = 31; 372254885Sdumbbell num_hs_gprs = 23; 373254885Sdumbbell num_ls_gprs = 23; 374254885Sdumbbell num_ps_threads = 128; 375254885Sdumbbell num_vs_threads = 20; 376254885Sdumbbell num_gs_threads = 20; 377254885Sdumbbell num_es_threads = 20; 378254885Sdumbbell num_hs_threads = 20; 379254885Sdumbbell num_ls_threads = 20; 380254885Sdumbbell num_ps_stack_entries = 85; 381254885Sdumbbell num_vs_stack_entries = 85; 382254885Sdumbbell num_gs_stack_entries = 85; 383254885Sdumbbell num_es_stack_entries = 85; 384254885Sdumbbell num_hs_stack_entries = 85; 385254885Sdumbbell num_ls_stack_entries = 85; 386254885Sdumbbell break; 387254885Sdumbbell case CHIP_PALM: 388254885Sdumbbell num_ps_gprs = 93; 389254885Sdumbbell num_vs_gprs = 46; 390254885Sdumbbell num_temp_gprs = 4; 391254885Sdumbbell num_gs_gprs = 31; 392254885Sdumbbell num_es_gprs = 31; 393254885Sdumbbell num_hs_gprs = 23; 394254885Sdumbbell num_ls_gprs = 23; 395254885Sdumbbell num_ps_threads = 96; 396254885Sdumbbell num_vs_threads = 16; 397254885Sdumbbell num_gs_threads = 16; 398254885Sdumbbell num_es_threads = 16; 399254885Sdumbbell num_hs_threads = 16; 400254885Sdumbbell num_ls_threads = 16; 401254885Sdumbbell num_ps_stack_entries = 42; 402254885Sdumbbell num_vs_stack_entries = 42; 403254885Sdumbbell num_gs_stack_entries = 42; 404254885Sdumbbell num_es_stack_entries = 42; 405254885Sdumbbell num_hs_stack_entries = 42; 406254885Sdumbbell num_ls_stack_entries = 42; 407254885Sdumbbell break; 408254885Sdumbbell case CHIP_SUMO: 409254885Sdumbbell num_ps_gprs = 93; 410254885Sdumbbell num_vs_gprs = 46; 411254885Sdumbbell num_temp_gprs = 4; 412254885Sdumbbell num_gs_gprs = 31; 413254885Sdumbbell num_es_gprs = 31; 414254885Sdumbbell num_hs_gprs = 23; 415254885Sdumbbell num_ls_gprs = 23; 416254885Sdumbbell num_ps_threads = 96; 417254885Sdumbbell num_vs_threads = 25; 418254885Sdumbbell num_gs_threads = 25; 419254885Sdumbbell num_es_threads = 25; 420254885Sdumbbell num_hs_threads = 25; 421254885Sdumbbell num_ls_threads = 25; 422254885Sdumbbell num_ps_stack_entries = 42; 423254885Sdumbbell num_vs_stack_entries = 42; 424254885Sdumbbell num_gs_stack_entries = 42; 425254885Sdumbbell num_es_stack_entries = 42; 426254885Sdumbbell num_hs_stack_entries = 42; 427254885Sdumbbell num_ls_stack_entries = 42; 428254885Sdumbbell break; 429254885Sdumbbell case CHIP_SUMO2: 430254885Sdumbbell num_ps_gprs = 93; 431254885Sdumbbell num_vs_gprs = 46; 432254885Sdumbbell num_temp_gprs = 4; 433254885Sdumbbell num_gs_gprs = 31; 434254885Sdumbbell num_es_gprs = 31; 435254885Sdumbbell num_hs_gprs = 23; 436254885Sdumbbell num_ls_gprs = 23; 437254885Sdumbbell num_ps_threads = 96; 438254885Sdumbbell num_vs_threads = 25; 439254885Sdumbbell num_gs_threads = 25; 440254885Sdumbbell num_es_threads = 25; 441254885Sdumbbell num_hs_threads = 25; 442254885Sdumbbell num_ls_threads = 25; 443254885Sdumbbell num_ps_stack_entries = 85; 444254885Sdumbbell num_vs_stack_entries = 85; 445254885Sdumbbell num_gs_stack_entries = 85; 446254885Sdumbbell num_es_stack_entries = 85; 447254885Sdumbbell num_hs_stack_entries = 85; 448254885Sdumbbell num_ls_stack_entries = 85; 449254885Sdumbbell break; 450254885Sdumbbell case CHIP_BARTS: 451254885Sdumbbell num_ps_gprs = 93; 452254885Sdumbbell num_vs_gprs = 46; 453254885Sdumbbell num_temp_gprs = 4; 454254885Sdumbbell num_gs_gprs = 31; 455254885Sdumbbell num_es_gprs = 31; 456254885Sdumbbell num_hs_gprs = 23; 457254885Sdumbbell num_ls_gprs = 23; 458254885Sdumbbell num_ps_threads = 128; 459254885Sdumbbell num_vs_threads = 20; 460254885Sdumbbell num_gs_threads = 20; 461254885Sdumbbell num_es_threads = 20; 462254885Sdumbbell num_hs_threads = 20; 463254885Sdumbbell num_ls_threads = 20; 464254885Sdumbbell num_ps_stack_entries = 85; 465254885Sdumbbell num_vs_stack_entries = 85; 466254885Sdumbbell num_gs_stack_entries = 85; 467254885Sdumbbell num_es_stack_entries = 85; 468254885Sdumbbell num_hs_stack_entries = 85; 469254885Sdumbbell num_ls_stack_entries = 85; 470254885Sdumbbell break; 471254885Sdumbbell case CHIP_TURKS: 472254885Sdumbbell num_ps_gprs = 93; 473254885Sdumbbell num_vs_gprs = 46; 474254885Sdumbbell num_temp_gprs = 4; 475254885Sdumbbell num_gs_gprs = 31; 476254885Sdumbbell num_es_gprs = 31; 477254885Sdumbbell num_hs_gprs = 23; 478254885Sdumbbell num_ls_gprs = 23; 479254885Sdumbbell num_ps_threads = 128; 480254885Sdumbbell num_vs_threads = 20; 481254885Sdumbbell num_gs_threads = 20; 482254885Sdumbbell num_es_threads = 20; 483254885Sdumbbell num_hs_threads = 20; 484254885Sdumbbell num_ls_threads = 20; 485254885Sdumbbell num_ps_stack_entries = 42; 486254885Sdumbbell num_vs_stack_entries = 42; 487254885Sdumbbell num_gs_stack_entries = 42; 488254885Sdumbbell num_es_stack_entries = 42; 489254885Sdumbbell num_hs_stack_entries = 42; 490254885Sdumbbell num_ls_stack_entries = 42; 491254885Sdumbbell break; 492254885Sdumbbell case CHIP_CAICOS: 493254885Sdumbbell num_ps_gprs = 93; 494254885Sdumbbell num_vs_gprs = 46; 495254885Sdumbbell num_temp_gprs = 4; 496254885Sdumbbell num_gs_gprs = 31; 497254885Sdumbbell num_es_gprs = 31; 498254885Sdumbbell num_hs_gprs = 23; 499254885Sdumbbell num_ls_gprs = 23; 500254885Sdumbbell num_ps_threads = 128; 501254885Sdumbbell num_vs_threads = 10; 502254885Sdumbbell num_gs_threads = 10; 503254885Sdumbbell num_es_threads = 10; 504254885Sdumbbell num_hs_threads = 10; 505254885Sdumbbell num_ls_threads = 10; 506254885Sdumbbell num_ps_stack_entries = 42; 507254885Sdumbbell num_vs_stack_entries = 42; 508254885Sdumbbell num_gs_stack_entries = 42; 509254885Sdumbbell num_es_stack_entries = 42; 510254885Sdumbbell num_hs_stack_entries = 42; 511254885Sdumbbell num_ls_stack_entries = 42; 512254885Sdumbbell break; 513254885Sdumbbell } 514254885Sdumbbell 515254885Sdumbbell if ((rdev->family == CHIP_CEDAR) || 516254885Sdumbbell (rdev->family == CHIP_PALM) || 517254885Sdumbbell (rdev->family == CHIP_SUMO) || 518254885Sdumbbell (rdev->family == CHIP_SUMO2) || 519254885Sdumbbell (rdev->family == CHIP_CAICOS)) 520254885Sdumbbell sq_config = 0; 521254885Sdumbbell else 522254885Sdumbbell sq_config = VC_ENABLE; 523254885Sdumbbell 524254885Sdumbbell sq_config |= (EXPORT_SRC_C | 525254885Sdumbbell CS_PRIO(0) | 526254885Sdumbbell LS_PRIO(0) | 527254885Sdumbbell HS_PRIO(0) | 528254885Sdumbbell PS_PRIO(0) | 529254885Sdumbbell VS_PRIO(1) | 530254885Sdumbbell GS_PRIO(2) | 531254885Sdumbbell ES_PRIO(3)); 532254885Sdumbbell 533254885Sdumbbell sq_gpr_resource_mgmt_1 = (NUM_PS_GPRS(num_ps_gprs) | 534254885Sdumbbell NUM_VS_GPRS(num_vs_gprs) | 535254885Sdumbbell NUM_CLAUSE_TEMP_GPRS(num_temp_gprs)); 536254885Sdumbbell sq_gpr_resource_mgmt_2 = (NUM_GS_GPRS(num_gs_gprs) | 537254885Sdumbbell NUM_ES_GPRS(num_es_gprs)); 538254885Sdumbbell sq_gpr_resource_mgmt_3 = (NUM_HS_GPRS(num_hs_gprs) | 539254885Sdumbbell NUM_LS_GPRS(num_ls_gprs)); 540254885Sdumbbell sq_thread_resource_mgmt = (NUM_PS_THREADS(num_ps_threads) | 541254885Sdumbbell NUM_VS_THREADS(num_vs_threads) | 542254885Sdumbbell NUM_GS_THREADS(num_gs_threads) | 543254885Sdumbbell NUM_ES_THREADS(num_es_threads)); 544254885Sdumbbell sq_thread_resource_mgmt_2 = (NUM_HS_THREADS(num_hs_threads) | 545254885Sdumbbell NUM_LS_THREADS(num_ls_threads)); 546254885Sdumbbell sq_stack_resource_mgmt_1 = (NUM_PS_STACK_ENTRIES(num_ps_stack_entries) | 547254885Sdumbbell NUM_VS_STACK_ENTRIES(num_vs_stack_entries)); 548254885Sdumbbell sq_stack_resource_mgmt_2 = (NUM_GS_STACK_ENTRIES(num_gs_stack_entries) | 549254885Sdumbbell NUM_ES_STACK_ENTRIES(num_es_stack_entries)); 550254885Sdumbbell sq_stack_resource_mgmt_3 = (NUM_HS_STACK_ENTRIES(num_hs_stack_entries) | 551254885Sdumbbell NUM_LS_STACK_ENTRIES(num_ls_stack_entries)); 552254885Sdumbbell 553254885Sdumbbell /* disable dyn gprs */ 554254885Sdumbbell radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); 555254885Sdumbbell radeon_ring_write(ring, (SQ_DYN_GPR_CNTL_PS_FLUSH_REQ - PACKET3_SET_CONFIG_REG_START) >> 2); 556254885Sdumbbell radeon_ring_write(ring, 0); 557254885Sdumbbell 558254885Sdumbbell /* setup LDS */ 559254885Sdumbbell radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); 560254885Sdumbbell radeon_ring_write(ring, (SQ_LDS_RESOURCE_MGMT - PACKET3_SET_CONFIG_REG_START) >> 2); 561254885Sdumbbell radeon_ring_write(ring, 0x10001000); 562254885Sdumbbell 563254885Sdumbbell /* SQ config */ 564254885Sdumbbell radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 11)); 565254885Sdumbbell radeon_ring_write(ring, (SQ_CONFIG - PACKET3_SET_CONFIG_REG_START) >> 2); 566254885Sdumbbell radeon_ring_write(ring, sq_config); 567254885Sdumbbell radeon_ring_write(ring, sq_gpr_resource_mgmt_1); 568254885Sdumbbell radeon_ring_write(ring, sq_gpr_resource_mgmt_2); 569254885Sdumbbell radeon_ring_write(ring, sq_gpr_resource_mgmt_3); 570254885Sdumbbell radeon_ring_write(ring, 0); 571254885Sdumbbell radeon_ring_write(ring, 0); 572254885Sdumbbell radeon_ring_write(ring, sq_thread_resource_mgmt); 573254885Sdumbbell radeon_ring_write(ring, sq_thread_resource_mgmt_2); 574254885Sdumbbell radeon_ring_write(ring, sq_stack_resource_mgmt_1); 575254885Sdumbbell radeon_ring_write(ring, sq_stack_resource_mgmt_2); 576254885Sdumbbell radeon_ring_write(ring, sq_stack_resource_mgmt_3); 577254885Sdumbbell } 578254885Sdumbbell 579254885Sdumbbell /* CONTEXT_CONTROL */ 580254885Sdumbbell radeon_ring_write(ring, 0xc0012800); 581254885Sdumbbell radeon_ring_write(ring, 0x80000000); 582254885Sdumbbell radeon_ring_write(ring, 0x80000000); 583254885Sdumbbell 584254885Sdumbbell /* SQ_VTX_BASE_VTX_LOC */ 585254885Sdumbbell radeon_ring_write(ring, 0xc0026f00); 586254885Sdumbbell radeon_ring_write(ring, 0x00000000); 587254885Sdumbbell radeon_ring_write(ring, 0x00000000); 588254885Sdumbbell radeon_ring_write(ring, 0x00000000); 589254885Sdumbbell 590254885Sdumbbell /* SET_SAMPLER */ 591254885Sdumbbell radeon_ring_write(ring, 0xc0036e00); 592254885Sdumbbell radeon_ring_write(ring, 0x00000000); 593254885Sdumbbell radeon_ring_write(ring, 0x00000012); 594254885Sdumbbell radeon_ring_write(ring, 0x00000000); 595254885Sdumbbell radeon_ring_write(ring, 0x00000000); 596254885Sdumbbell 597254885Sdumbbell /* set to DX10/11 mode */ 598254885Sdumbbell radeon_ring_write(ring, PACKET3(PACKET3_MODE_CONTROL, 0)); 599254885Sdumbbell radeon_ring_write(ring, 1); 600254885Sdumbbell 601254885Sdumbbell /* emit an IB pointing at default state */ 602254885Sdumbbell dwords = roundup2(rdev->r600_blit.state_len, 0x10); 603254885Sdumbbell gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.state_offset; 604254885Sdumbbell radeon_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); 605254885Sdumbbell radeon_ring_write(ring, gpu_addr & 0xFFFFFFFC); 606254885Sdumbbell radeon_ring_write(ring, upper_32_bits(gpu_addr) & 0xFF); 607254885Sdumbbell radeon_ring_write(ring, dwords); 608254885Sdumbbell 609254885Sdumbbell} 610254885Sdumbbell 611254885Sdumbbellint evergreen_blit_init(struct radeon_device *rdev) 612254885Sdumbbell{ 613254885Sdumbbell u32 obj_size; 614254885Sdumbbell int i, r, dwords; 615254885Sdumbbell void *ptr; 616254885Sdumbbell u32 packet2s[16]; 617254885Sdumbbell int num_packet2s = 0; 618254885Sdumbbell 619254885Sdumbbell rdev->r600_blit.primitives.set_render_target = set_render_target; 620254885Sdumbbell rdev->r600_blit.primitives.cp_set_surface_sync = cp_set_surface_sync; 621254885Sdumbbell rdev->r600_blit.primitives.set_shaders = set_shaders; 622254885Sdumbbell rdev->r600_blit.primitives.set_vtx_resource = set_vtx_resource; 623254885Sdumbbell rdev->r600_blit.primitives.set_tex_resource = set_tex_resource; 624254885Sdumbbell rdev->r600_blit.primitives.set_scissors = set_scissors; 625254885Sdumbbell rdev->r600_blit.primitives.draw_auto = draw_auto; 626254885Sdumbbell rdev->r600_blit.primitives.set_default_state = set_default_state; 627254885Sdumbbell 628254885Sdumbbell rdev->r600_blit.ring_size_common = 8; /* sync semaphore */ 629254885Sdumbbell rdev->r600_blit.ring_size_common += 55; /* shaders + def state */ 630254885Sdumbbell rdev->r600_blit.ring_size_common += 16; /* fence emit for VB IB */ 631254885Sdumbbell rdev->r600_blit.ring_size_common += 5; /* done copy */ 632254885Sdumbbell rdev->r600_blit.ring_size_common += 16; /* fence emit for done copy */ 633254885Sdumbbell 634254885Sdumbbell rdev->r600_blit.ring_size_per_loop = 74; 635254885Sdumbbell if (rdev->family >= CHIP_CAYMAN) 636254885Sdumbbell rdev->r600_blit.ring_size_per_loop += 9; /* additional DWs for surface sync */ 637254885Sdumbbell 638254885Sdumbbell rdev->r600_blit.max_dim = 16384; 639254885Sdumbbell 640254885Sdumbbell rdev->r600_blit.state_offset = 0; 641254885Sdumbbell 642254885Sdumbbell if (rdev->family < CHIP_CAYMAN) 643254885Sdumbbell rdev->r600_blit.state_len = evergreen_default_size; 644254885Sdumbbell else 645254885Sdumbbell rdev->r600_blit.state_len = cayman_default_size; 646254885Sdumbbell 647254885Sdumbbell dwords = rdev->r600_blit.state_len; 648254885Sdumbbell while (dwords & 0xf) { 649254885Sdumbbell packet2s[num_packet2s++] = cpu_to_le32(PACKET2(0)); 650254885Sdumbbell dwords++; 651254885Sdumbbell } 652254885Sdumbbell 653254885Sdumbbell obj_size = dwords * 4; 654254885Sdumbbell obj_size = roundup2(obj_size, 256); 655254885Sdumbbell 656254885Sdumbbell rdev->r600_blit.vs_offset = obj_size; 657254885Sdumbbell if (rdev->family < CHIP_CAYMAN) 658254885Sdumbbell obj_size += evergreen_vs_size * 4; 659254885Sdumbbell else 660254885Sdumbbell obj_size += cayman_vs_size * 4; 661254885Sdumbbell obj_size = roundup2(obj_size, 256); 662254885Sdumbbell 663254885Sdumbbell rdev->r600_blit.ps_offset = obj_size; 664254885Sdumbbell if (rdev->family < CHIP_CAYMAN) 665254885Sdumbbell obj_size += evergreen_ps_size * 4; 666254885Sdumbbell else 667254885Sdumbbell obj_size += cayman_ps_size * 4; 668254885Sdumbbell obj_size = roundup2(obj_size, 256); 669254885Sdumbbell 670254885Sdumbbell /* pin copy shader into vram if not already initialized */ 671254885Sdumbbell if (!rdev->r600_blit.shader_obj) { 672254885Sdumbbell r = radeon_bo_create(rdev, obj_size, PAGE_SIZE, true, 673254885Sdumbbell RADEON_GEM_DOMAIN_VRAM, 674254885Sdumbbell NULL, &rdev->r600_blit.shader_obj); 675254885Sdumbbell if (r) { 676254885Sdumbbell DRM_ERROR("evergreen failed to allocate shader\n"); 677254885Sdumbbell return r; 678254885Sdumbbell } 679254885Sdumbbell 680254885Sdumbbell r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false); 681254885Sdumbbell if (unlikely(r != 0)) 682254885Sdumbbell return r; 683254885Sdumbbell r = radeon_bo_pin(rdev->r600_blit.shader_obj, RADEON_GEM_DOMAIN_VRAM, 684254885Sdumbbell &rdev->r600_blit.shader_gpu_addr); 685254885Sdumbbell radeon_bo_unreserve(rdev->r600_blit.shader_obj); 686254885Sdumbbell if (r) { 687254885Sdumbbell dev_err(rdev->dev, "(%d) pin blit object failed\n", r); 688254885Sdumbbell return r; 689254885Sdumbbell } 690254885Sdumbbell } 691254885Sdumbbell 692254885Sdumbbell DRM_DEBUG("evergreen blit allocated bo %08x vs %08x ps %08x\n", 693254885Sdumbbell obj_size, 694254885Sdumbbell rdev->r600_blit.vs_offset, rdev->r600_blit.ps_offset); 695254885Sdumbbell 696254885Sdumbbell r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false); 697254885Sdumbbell if (unlikely(r != 0)) 698254885Sdumbbell return r; 699254885Sdumbbell r = radeon_bo_kmap(rdev->r600_blit.shader_obj, &ptr); 700254885Sdumbbell if (r) { 701254885Sdumbbell DRM_ERROR("failed to map blit object %d\n", r); 702254885Sdumbbell return r; 703254885Sdumbbell } 704254885Sdumbbell 705254885Sdumbbell if (rdev->family < CHIP_CAYMAN) { 706254885Sdumbbell memcpy_toio((char *)ptr + rdev->r600_blit.state_offset, 707254885Sdumbbell evergreen_default_state, rdev->r600_blit.state_len * 4); 708254885Sdumbbell 709254885Sdumbbell if (num_packet2s) 710254885Sdumbbell memcpy_toio((char *)ptr + rdev->r600_blit.state_offset + (rdev->r600_blit.state_len * 4), 711254885Sdumbbell packet2s, num_packet2s * 4); 712254885Sdumbbell for (i = 0; i < evergreen_vs_size; i++) 713254885Sdumbbell *(u32 *)((unsigned long)ptr + rdev->r600_blit.vs_offset + i * 4) = cpu_to_le32(evergreen_vs[i]); 714254885Sdumbbell for (i = 0; i < evergreen_ps_size; i++) 715254885Sdumbbell *(u32 *)((unsigned long)ptr + rdev->r600_blit.ps_offset + i * 4) = cpu_to_le32(evergreen_ps[i]); 716254885Sdumbbell } else { 717254885Sdumbbell memcpy_toio((char *)ptr + rdev->r600_blit.state_offset, 718254885Sdumbbell cayman_default_state, rdev->r600_blit.state_len * 4); 719254885Sdumbbell 720254885Sdumbbell if (num_packet2s) 721254885Sdumbbell memcpy_toio((char *)ptr + rdev->r600_blit.state_offset + (rdev->r600_blit.state_len * 4), 722254885Sdumbbell packet2s, num_packet2s * 4); 723254885Sdumbbell for (i = 0; i < cayman_vs_size; i++) 724254885Sdumbbell *(u32 *)((unsigned long)ptr + rdev->r600_blit.vs_offset + i * 4) = cpu_to_le32(cayman_vs[i]); 725254885Sdumbbell for (i = 0; i < cayman_ps_size; i++) 726254885Sdumbbell *(u32 *)((unsigned long)ptr + rdev->r600_blit.ps_offset + i * 4) = cpu_to_le32(cayman_ps[i]); 727254885Sdumbbell } 728254885Sdumbbell radeon_bo_kunmap(rdev->r600_blit.shader_obj); 729254885Sdumbbell radeon_bo_unreserve(rdev->r600_blit.shader_obj); 730254885Sdumbbell 731254885Sdumbbell radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size); 732254885Sdumbbell return 0; 733254885Sdumbbell} 734