rc_calc_fpu.c revision 1.1
1/* 2 * Copyright 2021 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 * Authors: AMD 23 * 24 */ 25 26#include "rc_calc_fpu.h" 27 28#include "qp_tables.h" 29#include "amdgpu_dm/dc_fpu.h" 30 31#define table_hash(mode, bpc, max_min) ((mode << 16) | (bpc << 8) | max_min) 32 33#define MODE_SELECT(val444, val422, val420) \ 34 (cm == CM_444 || cm == CM_RGB) ? (val444) : (cm == CM_422 ? (val422) : (val420)) 35 36 37#define TABLE_CASE(mode, bpc, max) case (table_hash(mode, BPC_##bpc, max)): \ 38 table = qp_table_##mode##_##bpc##bpc_##max; \ 39 table_size = sizeof(qp_table_##mode##_##bpc##bpc_##max)/sizeof(*qp_table_##mode##_##bpc##bpc_##max); \ 40 break 41 42static int median3(int a, int b, int c) 43{ 44 if (a > b) 45 swap(a, b); 46 if (b > c) 47 swap(b, c); 48 if (a > b) 49 swap(b, c); 50 51 return b; 52} 53 54static double dsc_roundf(double num) 55{ 56 if (num < 0.0) 57 num = num - 0.5; 58 else 59 num = num + 0.5; 60 61 return (int)(num); 62} 63 64static double dsc_ceil(double num) 65{ 66 double retval = (int)num; 67 68 if (retval != num && num > 0) 69 retval = num + 1; 70 71 return (int)retval; 72} 73 74static void get_qp_set(qp_set qps, enum colour_mode cm, enum bits_per_comp bpc, 75 enum max_min max_min, float bpp) 76{ 77 int mode = MODE_SELECT(444, 422, 420); 78 int sel = table_hash(mode, bpc, max_min); 79 int table_size = 0; 80 int index; 81 const struct qp_entry *table = 0L; 82 83 // alias enum 84 enum { min = DAL_MM_MIN, max = DAL_MM_MAX }; 85 switch (sel) { 86 TABLE_CASE(444, 8, max); 87 TABLE_CASE(444, 8, min); 88 TABLE_CASE(444, 10, max); 89 TABLE_CASE(444, 10, min); 90 TABLE_CASE(444, 12, max); 91 TABLE_CASE(444, 12, min); 92 TABLE_CASE(422, 8, max); 93 TABLE_CASE(422, 8, min); 94 TABLE_CASE(422, 10, max); 95 TABLE_CASE(422, 10, min); 96 TABLE_CASE(422, 12, max); 97 TABLE_CASE(422, 12, min); 98 TABLE_CASE(420, 8, max); 99 TABLE_CASE(420, 8, min); 100 TABLE_CASE(420, 10, max); 101 TABLE_CASE(420, 10, min); 102 TABLE_CASE(420, 12, max); 103 TABLE_CASE(420, 12, min); 104 } 105 106 if (table == 0) 107 return; 108 109 index = (bpp - table[0].bpp) * 2; 110 111 /* requested size is bigger than the table */ 112 if (index >= table_size) { 113 dm_error("ERROR: Requested rc_calc to find a bpp entry that exceeds the table size\n"); 114 return; 115 } 116 117 memcpy(qps, table[index].qps, sizeof(qp_set)); 118} 119 120static void get_ofs_set(qp_set ofs, enum colour_mode mode, float bpp) 121{ 122 int *p = ofs; 123 124 if (mode == CM_444 || mode == CM_RGB) { 125 *p++ = (bpp <= 6) ? (0) : ((((bpp >= 8) && (bpp <= 12))) ? (2) : ((bpp >= 15) ? (10) : ((((bpp > 6) && (bpp < 8))) ? (0 + dsc_roundf((bpp - 6) * (2 / 2.0))) : (2 + dsc_roundf((bpp - 12) * (8 / 3.0)))))); 126 *p++ = (bpp <= 6) ? (-2) : ((((bpp >= 8) && (bpp <= 12))) ? (0) : ((bpp >= 15) ? (8) : ((((bpp > 6) && (bpp < 8))) ? (-2 + dsc_roundf((bpp - 6) * (2 / 2.0))) : (0 + dsc_roundf((bpp - 12) * (8 / 3.0)))))); 127 *p++ = (bpp <= 6) ? (-2) : ((((bpp >= 8) && (bpp <= 12))) ? (0) : ((bpp >= 15) ? (6) : ((((bpp > 6) && (bpp < 8))) ? (-2 + dsc_roundf((bpp - 6) * (2 / 2.0))) : (0 + dsc_roundf((bpp - 12) * (6 / 3.0)))))); 128 *p++ = (bpp <= 6) ? (-4) : ((((bpp >= 8) && (bpp <= 12))) ? (-2) : ((bpp >= 15) ? (4) : ((((bpp > 6) && (bpp < 8))) ? (-4 + dsc_roundf((bpp - 6) * (2 / 2.0))) : (-2 + dsc_roundf((bpp - 12) * (6 / 3.0)))))); 129 *p++ = (bpp <= 6) ? (-6) : ((((bpp >= 8) && (bpp <= 12))) ? (-4) : ((bpp >= 15) ? (2) : ((((bpp > 6) && (bpp < 8))) ? (-6 + dsc_roundf((bpp - 6) * (2 / 2.0))) : (-4 + dsc_roundf((bpp - 12) * (6 / 3.0)))))); 130 *p++ = (bpp <= 12) ? (-6) : ((bpp >= 15) ? (0) : (-6 + dsc_roundf((bpp - 12) * (6 / 3.0)))); 131 *p++ = (bpp <= 12) ? (-8) : ((bpp >= 15) ? (-2) : (-8 + dsc_roundf((bpp - 12) * (6 / 3.0)))); 132 *p++ = (bpp <= 12) ? (-8) : ((bpp >= 15) ? (-4) : (-8 + dsc_roundf((bpp - 12) * (4 / 3.0)))); 133 *p++ = (bpp <= 12) ? (-8) : ((bpp >= 15) ? (-6) : (-8 + dsc_roundf((bpp - 12) * (2 / 3.0)))); 134 *p++ = (bpp <= 12) ? (-10) : ((bpp >= 15) ? (-8) : (-10 + dsc_roundf((bpp - 12) * (2 / 3.0)))); 135 *p++ = -10; 136 *p++ = (bpp <= 6) ? (-12) : ((bpp >= 8) ? (-10) : (-12 + dsc_roundf((bpp - 6) * (2 / 2.0)))); 137 *p++ = -12; 138 *p++ = -12; 139 *p++ = -12; 140 } else if (mode == CM_422) { 141 *p++ = (bpp <= 8) ? (2) : ((bpp >= 10) ? (10) : (2 + dsc_roundf((bpp - 8) * (8 / 2.0)))); 142 *p++ = (bpp <= 8) ? (0) : ((bpp >= 10) ? (8) : (0 + dsc_roundf((bpp - 8) * (8 / 2.0)))); 143 *p++ = (bpp <= 8) ? (0) : ((bpp >= 10) ? (6) : (0 + dsc_roundf((bpp - 8) * (6 / 2.0)))); 144 *p++ = (bpp <= 8) ? (-2) : ((bpp >= 10) ? (4) : (-2 + dsc_roundf((bpp - 8) * (6 / 2.0)))); 145 *p++ = (bpp <= 8) ? (-4) : ((bpp >= 10) ? (2) : (-4 + dsc_roundf((bpp - 8) * (6 / 2.0)))); 146 *p++ = (bpp <= 8) ? (-6) : ((bpp >= 10) ? (0) : (-6 + dsc_roundf((bpp - 8) * (6 / 2.0)))); 147 *p++ = (bpp <= 8) ? (-8) : ((bpp >= 10) ? (-2) : (-8 + dsc_roundf((bpp - 8) * (6 / 2.0)))); 148 *p++ = (bpp <= 8) ? (-8) : ((bpp >= 10) ? (-4) : (-8 + dsc_roundf((bpp - 8) * (4 / 2.0)))); 149 *p++ = (bpp <= 8) ? (-8) : ((bpp >= 10) ? (-6) : (-8 + dsc_roundf((bpp - 8) * (2 / 2.0)))); 150 *p++ = (bpp <= 8) ? (-10) : ((bpp >= 10) ? (-8) : (-10 + dsc_roundf((bpp - 8) * (2 / 2.0)))); 151 *p++ = -10; 152 *p++ = (bpp <= 6) ? (-12) : ((bpp >= 7) ? (-10) : (-12 + dsc_roundf((bpp - 6) * (2.0 / 1)))); 153 *p++ = -12; 154 *p++ = -12; 155 *p++ = -12; 156 } else { 157 *p++ = (bpp <= 6) ? (2) : ((bpp >= 8) ? (10) : (2 + dsc_roundf((bpp - 6) * (8 / 2.0)))); 158 *p++ = (bpp <= 6) ? (0) : ((bpp >= 8) ? (8) : (0 + dsc_roundf((bpp - 6) * (8 / 2.0)))); 159 *p++ = (bpp <= 6) ? (0) : ((bpp >= 8) ? (6) : (0 + dsc_roundf((bpp - 6) * (6 / 2.0)))); 160 *p++ = (bpp <= 6) ? (-2) : ((bpp >= 8) ? (4) : (-2 + dsc_roundf((bpp - 6) * (6 / 2.0)))); 161 *p++ = (bpp <= 6) ? (-4) : ((bpp >= 8) ? (2) : (-4 + dsc_roundf((bpp - 6) * (6 / 2.0)))); 162 *p++ = (bpp <= 6) ? (-6) : ((bpp >= 8) ? (0) : (-6 + dsc_roundf((bpp - 6) * (6 / 2.0)))); 163 *p++ = (bpp <= 6) ? (-8) : ((bpp >= 8) ? (-2) : (-8 + dsc_roundf((bpp - 6) * (6 / 2.0)))); 164 *p++ = (bpp <= 6) ? (-8) : ((bpp >= 8) ? (-4) : (-8 + dsc_roundf((bpp - 6) * (4 / 2.0)))); 165 *p++ = (bpp <= 6) ? (-8) : ((bpp >= 8) ? (-6) : (-8 + dsc_roundf((bpp - 6) * (2 / 2.0)))); 166 *p++ = (bpp <= 6) ? (-10) : ((bpp >= 8) ? (-8) : (-10 + dsc_roundf((bpp - 6) * (2 / 2.0)))); 167 *p++ = -10; 168 *p++ = (bpp <= 4) ? (-12) : ((bpp >= 5) ? (-10) : (-12 + dsc_roundf((bpp - 4) * (2 / 1.0)))); 169 *p++ = -12; 170 *p++ = -12; 171 *p++ = -12; 172 } 173} 174 175void _do_calc_rc_params(struct rc_params *rc, 176 enum colour_mode cm, 177 enum bits_per_comp bpc, 178 u16 drm_bpp, 179 bool is_navite_422_or_420, 180 int slice_width, 181 int slice_height, 182 int minor_version) 183{ 184 float bpp; 185 float bpp_group; 186 float initial_xmit_delay_factor; 187 int padding_pixels; 188 int i; 189 190 dc_assert_fp_enabled(); 191 192 bpp = ((float)drm_bpp / 16.0); 193 /* in native_422 or native_420 modes, the bits_per_pixel is double the 194 * target bpp (the latter is what calc_rc_params expects) 195 */ 196 if (is_navite_422_or_420) 197 bpp /= 2.0; 198 199 rc->rc_quant_incr_limit0 = ((bpc == BPC_8) ? 11 : (bpc == BPC_10 ? 15 : 19)) - ((minor_version == 1 && cm == CM_444) ? 1 : 0); 200 rc->rc_quant_incr_limit1 = ((bpc == BPC_8) ? 11 : (bpc == BPC_10 ? 15 : 19)) - ((minor_version == 1 && cm == CM_444) ? 1 : 0); 201 202 bpp_group = MODE_SELECT(bpp, bpp * 2.0, bpp * 2.0); 203 204 switch (cm) { 205 case CM_420: 206 rc->initial_fullness_offset = (bpp >= 6) ? (2048) : ((bpp <= 4) ? (6144) : ((((bpp > 4) && (bpp <= 5))) ? (6144 - dsc_roundf((bpp - 4) * (512))) : (5632 - dsc_roundf((bpp - 5) * (3584))))); 207 rc->first_line_bpg_offset = median3(0, (12 + (int) (0.09 * min(34, slice_height - 8))), (int)((3 * bpc * 3) - (3 * bpp_group))); 208 rc->second_line_bpg_offset = median3(0, 12, (int)((3 * bpc * 3) - (3 * bpp_group))); 209 break; 210 case CM_422: 211 rc->initial_fullness_offset = (bpp >= 8) ? (2048) : ((bpp <= 7) ? (5632) : (5632 - dsc_roundf((bpp - 7) * (3584)))); 212 rc->first_line_bpg_offset = median3(0, (12 + (int) (0.09 * min(34, slice_height - 8))), (int)((3 * bpc * 4) - (3 * bpp_group))); 213 rc->second_line_bpg_offset = 0; 214 break; 215 case CM_444: 216 case CM_RGB: 217 rc->initial_fullness_offset = (bpp >= 12) ? (2048) : ((bpp <= 8) ? (6144) : ((((bpp > 8) && (bpp <= 10))) ? (6144 - dsc_roundf((bpp - 8) * (512 / 2))) : (5632 - dsc_roundf((bpp - 10) * (3584 / 2))))); 218 rc->first_line_bpg_offset = median3(0, (12 + (int) (0.09 * min(34, slice_height - 8))), (int)(((3 * bpc + (cm == CM_444 ? 0 : 2)) * 3) - (3 * bpp_group))); 219 rc->second_line_bpg_offset = 0; 220 break; 221 } 222 223 initial_xmit_delay_factor = (cm == CM_444 || cm == CM_RGB) ? 1.0 : 2.0; 224 rc->initial_xmit_delay = dsc_roundf(8192.0/2.0/bpp/initial_xmit_delay_factor); 225 226 if (cm == CM_422 || cm == CM_420) 227 slice_width /= 2; 228 229 padding_pixels = ((slice_width % 3) != 0) ? (3 - (slice_width % 3)) * (rc->initial_xmit_delay / slice_width) : 0; 230 if (3 * bpp_group >= (((rc->initial_xmit_delay + 2) / 3) * (3 + (cm == CM_422)))) { 231 if ((rc->initial_xmit_delay + padding_pixels) % 3 == 1) 232 rc->initial_xmit_delay++; 233 } 234 235 rc->flatness_min_qp = ((bpc == BPC_8) ? (3) : ((bpc == BPC_10) ? (7) : (11))) - ((minor_version == 1 && cm == CM_444) ? 1 : 0); 236 rc->flatness_max_qp = ((bpc == BPC_8) ? (12) : ((bpc == BPC_10) ? (16) : (20))) - ((minor_version == 1 && cm == CM_444) ? 1 : 0); 237 rc->flatness_det_thresh = 2 << (bpc - 8); 238 239 get_qp_set(rc->qp_min, cm, bpc, DAL_MM_MIN, bpp); 240 get_qp_set(rc->qp_max, cm, bpc, DAL_MM_MAX, bpp); 241 if (cm == CM_444 && minor_version == 1) { 242 for (i = 0; i < QP_SET_SIZE; ++i) { 243 rc->qp_min[i] = rc->qp_min[i] > 0 ? rc->qp_min[i] - 1 : 0; 244 rc->qp_max[i] = rc->qp_max[i] > 0 ? rc->qp_max[i] - 1 : 0; 245 } 246 } 247 get_ofs_set(rc->ofs, cm, bpp); 248 249 /* fixed parameters */ 250 rc->rc_model_size = 8192; 251 rc->rc_edge_factor = 6; 252 rc->rc_tgt_offset_hi = 3; 253 rc->rc_tgt_offset_lo = 3; 254 255 rc->rc_buf_thresh[0] = 896; 256 rc->rc_buf_thresh[1] = 1792; 257 rc->rc_buf_thresh[2] = 2688; 258 rc->rc_buf_thresh[3] = 3584; 259 rc->rc_buf_thresh[4] = 4480; 260 rc->rc_buf_thresh[5] = 5376; 261 rc->rc_buf_thresh[6] = 6272; 262 rc->rc_buf_thresh[7] = 6720; 263 rc->rc_buf_thresh[8] = 7168; 264 rc->rc_buf_thresh[9] = 7616; 265 rc->rc_buf_thresh[10] = 7744; 266 rc->rc_buf_thresh[11] = 7872; 267 rc->rc_buf_thresh[12] = 8000; 268 rc->rc_buf_thresh[13] = 8064; 269} 270 271u32 _do_bytes_per_pixel_calc(int slice_width, 272 u16 drm_bpp, 273 bool is_navite_422_or_420) 274{ 275 float bpp; 276 u32 bytes_per_pixel; 277 double d_bytes_per_pixel; 278 279 dc_assert_fp_enabled(); 280 281 bpp = ((float)drm_bpp / 16.0); 282 d_bytes_per_pixel = dsc_ceil(bpp * slice_width / 8.0) / slice_width; 283 // TODO: Make sure the formula for calculating this is precise (ceiling 284 // vs. floor, and at what point they should be applied) 285 if (is_navite_422_or_420) 286 d_bytes_per_pixel /= 2; 287 288 bytes_per_pixel = (u32)dsc_ceil(d_bytes_per_pixel * 0x10000000); 289 290 return bytes_per_pixel; 291} 292