1/* 2 * DXVA2 H264 HW acceleration. 3 * 4 * copyright (c) 2009 Laurent Aimar 5 * 6 * This file is part of FFmpeg. 7 * 8 * FFmpeg is free software; you can redistribute it and/or 9 * modify it under the terms of the GNU Lesser General Public 10 * License as published by the Free Software Foundation; either 11 * version 2.1 of the License, or (at your option) any later version. 12 * 13 * FFmpeg is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 * Lesser General Public License for more details. 17 * 18 * You should have received a copy of the GNU Lesser General Public 19 * License along with FFmpeg; if not, write to the Free Software 20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 21 */ 22 23#include "dxva2_internal.h" 24#include "h264.h" 25#include "h264data.h" 26#include "mpegutils.h" 27 28struct dxva2_picture_context { 29 DXVA_PicParams_H264 pp; 30 DXVA_Qmatrix_H264 qm; 31 unsigned slice_count; 32 DXVA_Slice_H264_Short slice_short[MAX_SLICES]; 33 DXVA_Slice_H264_Long slice_long[MAX_SLICES]; 34 const uint8_t *bitstream; 35 unsigned bitstream_size; 36}; 37 38static void fill_picture_entry(DXVA_PicEntry_H264 *pic, 39 unsigned index, unsigned flag) 40{ 41 assert((index&0x7f) == index && (flag&0x01) == flag); 42 pic->bPicEntry = index | (flag << 7); 43} 44 45static void fill_picture_parameters(struct dxva_context *ctx, const H264Context *h, 46 DXVA_PicParams_H264 *pp) 47{ 48 const H264Picture *current_picture = h->cur_pic_ptr; 49 int i, j; 50 51 memset(pp, 0, sizeof(*pp)); 52 /* Configure current picture */ 53 fill_picture_entry(&pp->CurrPic, 54 ff_dxva2_get_surface_index(ctx, ¤t_picture->f), 55 h->picture_structure == PICT_BOTTOM_FIELD); 56 /* Configure the set of references */ 57 pp->UsedForReferenceFlags = 0; 58 pp->NonExistingFrameFlags = 0; 59 for (i = 0, j = 0; i < FF_ARRAY_ELEMS(pp->RefFrameList); i++) { 60 const H264Picture *r; 61 if (j < h->short_ref_count) { 62 r = h->short_ref[j++]; 63 } else { 64 r = NULL; 65 while (!r && j < h->short_ref_count + 16) 66 r = h->long_ref[j++ - h->short_ref_count]; 67 } 68 if (r) { 69 fill_picture_entry(&pp->RefFrameList[i], 70 ff_dxva2_get_surface_index(ctx, &r->f), 71 r->long_ref != 0); 72 73 if ((r->reference & PICT_TOP_FIELD) && r->field_poc[0] != INT_MAX) 74 pp->FieldOrderCntList[i][0] = r->field_poc[0]; 75 if ((r->reference & PICT_BOTTOM_FIELD) && r->field_poc[1] != INT_MAX) 76 pp->FieldOrderCntList[i][1] = r->field_poc[1]; 77 78 pp->FrameNumList[i] = r->long_ref ? r->pic_id : r->frame_num; 79 if (r->reference & PICT_TOP_FIELD) 80 pp->UsedForReferenceFlags |= 1 << (2*i + 0); 81 if (r->reference & PICT_BOTTOM_FIELD) 82 pp->UsedForReferenceFlags |= 1 << (2*i + 1); 83 } else { 84 pp->RefFrameList[i].bPicEntry = 0xff; 85 pp->FieldOrderCntList[i][0] = 0; 86 pp->FieldOrderCntList[i][1] = 0; 87 pp->FrameNumList[i] = 0; 88 } 89 } 90 91 pp->wFrameWidthInMbsMinus1 = h->mb_width - 1; 92 pp->wFrameHeightInMbsMinus1 = h->mb_height - 1; 93 pp->num_ref_frames = h->sps.ref_frame_count; 94 95 pp->wBitFields = ((h->picture_structure != PICT_FRAME) << 0) | 96 ((h->sps.mb_aff && 97 (h->picture_structure == PICT_FRAME)) << 1) | 98 (h->sps.residual_color_transform_flag << 2) | 99 /* sp_for_switch_flag (not implemented by FFmpeg) */ 100 (0 << 3) | 101 (h->sps.chroma_format_idc << 4) | 102 ((h->nal_ref_idc != 0) << 6) | 103 (h->pps.constrained_intra_pred << 7) | 104 (h->pps.weighted_pred << 8) | 105 (h->pps.weighted_bipred_idc << 9) | 106 /* MbsConsecutiveFlag */ 107 (1 << 11) | 108 (h->sps.frame_mbs_only_flag << 12) | 109 (h->pps.transform_8x8_mode << 13) | 110 ((h->sps.level_idc >= 31) << 14) | 111 /* IntraPicFlag (Modified if we detect a non 112 * intra slice in dxva2_h264_decode_slice) */ 113 (1 << 15); 114 115 pp->bit_depth_luma_minus8 = h->sps.bit_depth_luma - 8; 116 pp->bit_depth_chroma_minus8 = h->sps.bit_depth_chroma - 8; 117 if (ctx->workaround & FF_DXVA2_WORKAROUND_SCALING_LIST_ZIGZAG) 118 pp->Reserved16Bits = 0; 119 else if (ctx->workaround & FF_DXVA2_WORKAROUND_INTEL_CLEARVIDEO) 120 pp->Reserved16Bits = 0x34c; 121 else 122 pp->Reserved16Bits = 3; /* FIXME is there a way to detect the right mode ? */ 123 pp->StatusReportFeedbackNumber = 1 + ctx->report_id++; 124 pp->CurrFieldOrderCnt[0] = 0; 125 if ((h->picture_structure & PICT_TOP_FIELD) && 126 current_picture->field_poc[0] != INT_MAX) 127 pp->CurrFieldOrderCnt[0] = current_picture->field_poc[0]; 128 pp->CurrFieldOrderCnt[1] = 0; 129 if ((h->picture_structure & PICT_BOTTOM_FIELD) && 130 current_picture->field_poc[1] != INT_MAX) 131 pp->CurrFieldOrderCnt[1] = current_picture->field_poc[1]; 132 pp->pic_init_qs_minus26 = h->pps.init_qs - 26; 133 pp->chroma_qp_index_offset = h->pps.chroma_qp_index_offset[0]; 134 pp->second_chroma_qp_index_offset = h->pps.chroma_qp_index_offset[1]; 135 pp->ContinuationFlag = 1; 136 pp->pic_init_qp_minus26 = h->pps.init_qp - 26; 137 pp->num_ref_idx_l0_active_minus1 = h->pps.ref_count[0] - 1; 138 pp->num_ref_idx_l1_active_minus1 = h->pps.ref_count[1] - 1; 139 pp->Reserved8BitsA = 0; 140 pp->frame_num = h->frame_num; 141 pp->log2_max_frame_num_minus4 = h->sps.log2_max_frame_num - 4; 142 pp->pic_order_cnt_type = h->sps.poc_type; 143 if (h->sps.poc_type == 0) 144 pp->log2_max_pic_order_cnt_lsb_minus4 = h->sps.log2_max_poc_lsb - 4; 145 else if (h->sps.poc_type == 1) 146 pp->delta_pic_order_always_zero_flag = h->sps.delta_pic_order_always_zero_flag; 147 pp->direct_8x8_inference_flag = h->sps.direct_8x8_inference_flag; 148 pp->entropy_coding_mode_flag = h->pps.cabac; 149 pp->pic_order_present_flag = h->pps.pic_order_present; 150 pp->num_slice_groups_minus1 = h->pps.slice_group_count - 1; 151 pp->slice_group_map_type = h->pps.mb_slice_group_map_type; 152 pp->deblocking_filter_control_present_flag = h->pps.deblocking_filter_parameters_present; 153 pp->redundant_pic_cnt_present_flag= h->pps.redundant_pic_cnt_present; 154 pp->Reserved8BitsB = 0; 155 pp->slice_group_change_rate_minus1= 0; /* XXX not implemented by FFmpeg */ 156 //pp->SliceGroupMap[810]; /* XXX not implemented by FFmpeg */ 157} 158 159static void fill_scaling_lists(struct dxva_context *ctx, const H264Context *h, DXVA_Qmatrix_H264 *qm) 160{ 161 unsigned i, j; 162 memset(qm, 0, sizeof(*qm)); 163 if (ctx->workaround & FF_DXVA2_WORKAROUND_SCALING_LIST_ZIGZAG) { 164 for (i = 0; i < 6; i++) 165 for (j = 0; j < 16; j++) 166 qm->bScalingLists4x4[i][j] = h->pps.scaling_matrix4[i][j]; 167 168 for (i = 0; i < 64; i++) { 169 qm->bScalingLists8x8[0][i] = h->pps.scaling_matrix8[0][i]; 170 qm->bScalingLists8x8[1][i] = h->pps.scaling_matrix8[3][i]; 171 } 172 } else { 173 for (i = 0; i < 6; i++) 174 for (j = 0; j < 16; j++) 175 qm->bScalingLists4x4[i][j] = h->pps.scaling_matrix4[i][zigzag_scan[j]]; 176 177 for (i = 0; i < 64; i++) { 178 qm->bScalingLists8x8[0][i] = h->pps.scaling_matrix8[0][ff_zigzag_direct[i]]; 179 qm->bScalingLists8x8[1][i] = h->pps.scaling_matrix8[3][ff_zigzag_direct[i]]; 180 } 181 } 182} 183 184static int is_slice_short(struct dxva_context *ctx) 185{ 186 assert(ctx->cfg->ConfigBitstreamRaw == 1 || 187 ctx->cfg->ConfigBitstreamRaw == 2); 188 return ctx->cfg->ConfigBitstreamRaw == 2; 189} 190 191static void fill_slice_short(DXVA_Slice_H264_Short *slice, 192 unsigned position, unsigned size) 193{ 194 memset(slice, 0, sizeof(*slice)); 195 slice->BSNALunitDataLocation = position; 196 slice->SliceBytesInBuffer = size; 197 slice->wBadSliceChopping = 0; 198} 199 200static int get_refpic_index(const DXVA_PicParams_H264 *pp, int surface_index) 201{ 202 int i; 203 for (i = 0; i < FF_ARRAY_ELEMS(pp->RefFrameList); i++) { 204 if ((pp->RefFrameList[i].bPicEntry & 0x7f) == surface_index) 205 return i; 206 } 207 return 0x7f; 208} 209 210static void fill_slice_long(AVCodecContext *avctx, DXVA_Slice_H264_Long *slice, 211 const DXVA_PicParams_H264 *pp, unsigned position, unsigned size) 212{ 213 const H264Context *h = avctx->priv_data; 214 struct dxva_context *ctx = avctx->hwaccel_context; 215 unsigned list; 216 217 memset(slice, 0, sizeof(*slice)); 218 slice->BSNALunitDataLocation = position; 219 slice->SliceBytesInBuffer = size; 220 slice->wBadSliceChopping = 0; 221 222 slice->first_mb_in_slice = (h->mb_y >> FIELD_OR_MBAFF_PICTURE(h)) * h->mb_width + h->mb_x; 223 slice->NumMbsForSlice = 0; /* XXX it is set once we have all slices */ 224 slice->BitOffsetToSliceData = get_bits_count(&h->gb); 225 slice->slice_type = ff_h264_get_slice_type(h); 226 if (h->slice_type_fixed) 227 slice->slice_type += 5; 228 slice->luma_log2_weight_denom = h->luma_log2_weight_denom; 229 slice->chroma_log2_weight_denom = h->chroma_log2_weight_denom; 230 if (h->list_count > 0) 231 slice->num_ref_idx_l0_active_minus1 = h->ref_count[0] - 1; 232 if (h->list_count > 1) 233 slice->num_ref_idx_l1_active_minus1 = h->ref_count[1] - 1; 234 slice->slice_alpha_c0_offset_div2 = h->slice_alpha_c0_offset / 2; 235 slice->slice_beta_offset_div2 = h->slice_beta_offset / 2; 236 slice->Reserved8Bits = 0; 237 238 for (list = 0; list < 2; list++) { 239 unsigned i; 240 for (i = 0; i < FF_ARRAY_ELEMS(slice->RefPicList[list]); i++) { 241 if (list < h->list_count && i < h->ref_count[list]) { 242 const H264Picture *r = &h->ref_list[list][i]; 243 unsigned plane; 244 unsigned index; 245 if (ctx->workaround & FF_DXVA2_WORKAROUND_INTEL_CLEARVIDEO) 246 index = ff_dxva2_get_surface_index(ctx, &r->f); 247 else 248 index = get_refpic_index(pp, ff_dxva2_get_surface_index(ctx, &r->f)); 249 fill_picture_entry(&slice->RefPicList[list][i], index, 250 r->reference == PICT_BOTTOM_FIELD); 251 for (plane = 0; plane < 3; plane++) { 252 int w, o; 253 if (plane == 0 && h->luma_weight_flag[list]) { 254 w = h->luma_weight[i][list][0]; 255 o = h->luma_weight[i][list][1]; 256 } else if (plane >= 1 && h->chroma_weight_flag[list]) { 257 w = h->chroma_weight[i][list][plane-1][0]; 258 o = h->chroma_weight[i][list][plane-1][1]; 259 } else { 260 w = 1 << (plane == 0 ? h->luma_log2_weight_denom : 261 h->chroma_log2_weight_denom); 262 o = 0; 263 } 264 slice->Weights[list][i][plane][0] = w; 265 slice->Weights[list][i][plane][1] = o; 266 } 267 } else { 268 unsigned plane; 269 slice->RefPicList[list][i].bPicEntry = 0xff; 270 for (plane = 0; plane < 3; plane++) { 271 slice->Weights[list][i][plane][0] = 0; 272 slice->Weights[list][i][plane][1] = 0; 273 } 274 } 275 } 276 } 277 slice->slice_qs_delta = 0; /* XXX not implemented by FFmpeg */ 278 slice->slice_qp_delta = h->qscale - h->pps.init_qp; 279 slice->redundant_pic_cnt = h->redundant_pic_count; 280 if (h->slice_type == AV_PICTURE_TYPE_B) 281 slice->direct_spatial_mv_pred_flag = h->direct_spatial_mv_pred; 282 slice->cabac_init_idc = h->pps.cabac ? h->cabac_init_idc : 0; 283 if (h->deblocking_filter < 2) 284 slice->disable_deblocking_filter_idc = 1 - h->deblocking_filter; 285 else 286 slice->disable_deblocking_filter_idc = h->deblocking_filter; 287 slice->slice_id = h->current_slice - 1; 288} 289 290static int commit_bitstream_and_slice_buffer(AVCodecContext *avctx, 291 DXVA2_DecodeBufferDesc *bs, 292 DXVA2_DecodeBufferDesc *sc) 293{ 294 const H264Context *h = avctx->priv_data; 295 const unsigned mb_count = h->mb_width * h->mb_height; 296 struct dxva_context *ctx = avctx->hwaccel_context; 297 const H264Picture *current_picture = h->cur_pic_ptr; 298 struct dxva2_picture_context *ctx_pic = current_picture->hwaccel_picture_private; 299 DXVA_Slice_H264_Short *slice = NULL; 300 uint8_t *dxva_data, *current, *end; 301 unsigned dxva_size; 302 void *slice_data; 303 unsigned slice_size; 304 unsigned padding; 305 unsigned i; 306 307 /* Create an annex B bitstream buffer with only slice NAL and finalize slice */ 308 if (FAILED(IDirectXVideoDecoder_GetBuffer(ctx->decoder, 309 DXVA2_BitStreamDateBufferType, 310 (void **)&dxva_data, &dxva_size))) 311 return -1; 312 current = dxva_data; 313 end = dxva_data + dxva_size; 314 315 for (i = 0; i < ctx_pic->slice_count; i++) { 316 static const uint8_t start_code[] = { 0, 0, 1 }; 317 static const unsigned start_code_size = sizeof(start_code); 318 unsigned position, size; 319 320 assert(offsetof(DXVA_Slice_H264_Short, BSNALunitDataLocation) == 321 offsetof(DXVA_Slice_H264_Long, BSNALunitDataLocation)); 322 assert(offsetof(DXVA_Slice_H264_Short, SliceBytesInBuffer) == 323 offsetof(DXVA_Slice_H264_Long, SliceBytesInBuffer)); 324 325 if (is_slice_short(ctx)) 326 slice = &ctx_pic->slice_short[i]; 327 else 328 slice = (DXVA_Slice_H264_Short*)&ctx_pic->slice_long[i]; 329 330 position = slice->BSNALunitDataLocation; 331 size = slice->SliceBytesInBuffer; 332 if (start_code_size + size > end - current) { 333 av_log(avctx, AV_LOG_ERROR, "Failed to build bitstream"); 334 break; 335 } 336 337 slice->BSNALunitDataLocation = current - dxva_data; 338 slice->SliceBytesInBuffer = start_code_size + size; 339 340 if (!is_slice_short(ctx)) { 341 DXVA_Slice_H264_Long *slice_long = (DXVA_Slice_H264_Long*)slice; 342 if (i < ctx_pic->slice_count - 1) 343 slice_long->NumMbsForSlice = 344 slice_long[1].first_mb_in_slice - slice_long[0].first_mb_in_slice; 345 else 346 slice_long->NumMbsForSlice = mb_count - slice_long->first_mb_in_slice; 347 } 348 349 memcpy(current, start_code, start_code_size); 350 current += start_code_size; 351 352 memcpy(current, &ctx_pic->bitstream[position], size); 353 current += size; 354 } 355 padding = FFMIN(128 - ((current - dxva_data) & 127), end - current); 356 if (slice && padding > 0) { 357 memset(current, 0, padding); 358 current += padding; 359 360 slice->SliceBytesInBuffer += padding; 361 } 362 if (FAILED(IDirectXVideoDecoder_ReleaseBuffer(ctx->decoder, 363 DXVA2_BitStreamDateBufferType))) 364 return -1; 365 if (i < ctx_pic->slice_count) 366 return -1; 367 368 memset(bs, 0, sizeof(*bs)); 369 bs->CompressedBufferType = DXVA2_BitStreamDateBufferType; 370 bs->DataSize = current - dxva_data; 371 bs->NumMBsInBuffer = mb_count; 372 373 if (is_slice_short(ctx)) { 374 slice_data = ctx_pic->slice_short; 375 slice_size = ctx_pic->slice_count * sizeof(*ctx_pic->slice_short); 376 } else { 377 slice_data = ctx_pic->slice_long; 378 slice_size = ctx_pic->slice_count * sizeof(*ctx_pic->slice_long); 379 } 380 assert((bs->DataSize & 127) == 0); 381 return ff_dxva2_commit_buffer(avctx, ctx, sc, 382 DXVA2_SliceControlBufferType, 383 slice_data, slice_size, mb_count); 384} 385 386 387static int dxva2_h264_start_frame(AVCodecContext *avctx, 388 av_unused const uint8_t *buffer, 389 av_unused uint32_t size) 390{ 391 const H264Context *h = avctx->priv_data; 392 struct dxva_context *ctx = avctx->hwaccel_context; 393 struct dxva2_picture_context *ctx_pic = h->cur_pic_ptr->hwaccel_picture_private; 394 395 if (!ctx->decoder || !ctx->cfg || ctx->surface_count <= 0) 396 return -1; 397 assert(ctx_pic); 398 399 /* Fill up DXVA_PicParams_H264 */ 400 fill_picture_parameters(ctx, h, &ctx_pic->pp); 401 402 /* Fill up DXVA_Qmatrix_H264 */ 403 fill_scaling_lists(ctx, h, &ctx_pic->qm); 404 405 ctx_pic->slice_count = 0; 406 ctx_pic->bitstream_size = 0; 407 ctx_pic->bitstream = NULL; 408 return 0; 409} 410 411static int dxva2_h264_decode_slice(AVCodecContext *avctx, 412 const uint8_t *buffer, 413 uint32_t size) 414{ 415 const H264Context *h = avctx->priv_data; 416 struct dxva_context *ctx = avctx->hwaccel_context; 417 const H264Picture *current_picture = h->cur_pic_ptr; 418 struct dxva2_picture_context *ctx_pic = current_picture->hwaccel_picture_private; 419 unsigned position; 420 421 if (ctx_pic->slice_count >= MAX_SLICES) 422 return -1; 423 424 if (!ctx_pic->bitstream) 425 ctx_pic->bitstream = buffer; 426 ctx_pic->bitstream_size += size; 427 428 position = buffer - ctx_pic->bitstream; 429 if (is_slice_short(ctx)) 430 fill_slice_short(&ctx_pic->slice_short[ctx_pic->slice_count], 431 position, size); 432 else 433 fill_slice_long(avctx, &ctx_pic->slice_long[ctx_pic->slice_count], 434 &ctx_pic->pp, position, size); 435 ctx_pic->slice_count++; 436 437 if (h->slice_type != AV_PICTURE_TYPE_I && h->slice_type != AV_PICTURE_TYPE_SI) 438 ctx_pic->pp.wBitFields &= ~(1 << 15); /* Set IntraPicFlag to 0 */ 439 return 0; 440} 441 442static int dxva2_h264_end_frame(AVCodecContext *avctx) 443{ 444 H264Context *h = avctx->priv_data; 445 struct dxva2_picture_context *ctx_pic = 446 h->cur_pic_ptr->hwaccel_picture_private; 447 int ret; 448 449 if (ctx_pic->slice_count <= 0 || ctx_pic->bitstream_size <= 0) 450 return -1; 451 ret = ff_dxva2_common_end_frame(avctx, &h->cur_pic_ptr->f, 452 &ctx_pic->pp, sizeof(ctx_pic->pp), 453 &ctx_pic->qm, sizeof(ctx_pic->qm), 454 commit_bitstream_and_slice_buffer); 455 if (!ret) 456 ff_h264_draw_horiz_band(h, 0, h->avctx->height); 457 return ret; 458} 459 460AVHWAccel ff_h264_dxva2_hwaccel = { 461 .name = "h264_dxva2", 462 .type = AVMEDIA_TYPE_VIDEO, 463 .id = AV_CODEC_ID_H264, 464 .pix_fmt = AV_PIX_FMT_DXVA2_VLD, 465 .start_frame = dxva2_h264_start_frame, 466 .decode_slice = dxva2_h264_decode_slice, 467 .end_frame = dxva2_h264_end_frame, 468 .frame_priv_data_size = sizeof(struct dxva2_picture_context), 469}; 470