1/* 2 * DXVA2 H264 HW acceleration. 3 * 4 * copyright (c) 2009 Laurent Aimar 5 * 6 * This file is part of FFmpeg. 7 * 8 * FFmpeg is free software; you can redistribute it and/or 9 * modify it under the terms of the GNU Lesser General Public 10 * License as published by the Free Software Foundation; either 11 * version 2.1 of the License, or (at your option) any later version. 12 * 13 * FFmpeg is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 * Lesser General Public License for more details. 17 * 18 * You should have received a copy of the GNU Lesser General Public 19 * License along with FFmpeg; if not, write to the Free Software 20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 21 */ 22 23#include "dxva2_internal.h" 24#include "h264.h" 25#include "h264data.h" 26 27struct dxva2_picture_context { 28 DXVA_PicParams_H264 pp; 29 DXVA_Qmatrix_H264 qm; 30 unsigned slice_count; 31 DXVA_Slice_H264_Short slice_short[MAX_SLICES]; 32 DXVA_Slice_H264_Long slice_long[MAX_SLICES]; 33 const uint8_t *bitstream; 34 unsigned bitstream_size; 35}; 36 37static void fill_picture_entry(DXVA_PicEntry_H264 *pic, 38 unsigned index, unsigned flag) 39{ 40 assert((index&0x7f) == index && (flag&0x01) == flag); 41 pic->bPicEntry = index | (flag << 7); 42} 43 44static void fill_picture_parameters(struct dxva_context *ctx, const H264Context *h, 45 DXVA_PicParams_H264 *pp) 46{ 47 const MpegEncContext *s = &h->s; 48 const Picture *current_picture = s->current_picture_ptr; 49 int i; 50 51 memset(pp, 0, sizeof(*pp)); 52 /* Configure current picture */ 53 fill_picture_entry(&pp->CurrPic, 54 ff_dxva2_get_surface_index(ctx, current_picture), 55 s->picture_structure == PICT_BOTTOM_FIELD); 56 /* Configure the set of references */ 57 pp->UsedForReferenceFlags = 0; 58 pp->NonExistingFrameFlags = 0; 59 for (i = 0; i < FF_ARRAY_ELEMS(pp->RefFrameList); i++) { 60 if (i < h->short_ref_count + h->long_ref_count) { 61 const Picture *r; 62 if (i < h->short_ref_count) { 63 r = h->short_ref[i]; 64 assert(!r->long_ref); 65 } else { 66 r = h->long_ref[i - h->short_ref_count]; 67 assert(r->long_ref); 68 } 69 fill_picture_entry(&pp->RefFrameList[i], 70 ff_dxva2_get_surface_index(ctx, r), 71 r->long_ref != 0); 72 73 if ((r->reference & PICT_TOP_FIELD) && r->field_poc[0] != INT_MAX) 74 pp->FieldOrderCntList[i][0] = r->field_poc[0]; 75 if ((r->reference & PICT_BOTTOM_FIELD) && r->field_poc[1] != INT_MAX) 76 pp->FieldOrderCntList[i][1] = r->field_poc[1]; 77 78 pp->FrameNumList[i] = r->long_ref ? r->pic_id : r->frame_num; 79 if (r->reference & PICT_TOP_FIELD) 80 pp->UsedForReferenceFlags |= 1 << (2*i + 0); 81 if (r->reference & PICT_BOTTOM_FIELD) 82 pp->UsedForReferenceFlags |= 1 << (2*i + 1); 83 } else { 84 pp->RefFrameList[i].bPicEntry = 0xff; 85 pp->FieldOrderCntList[i][0] = 0; 86 pp->FieldOrderCntList[i][1] = 0; 87 pp->FrameNumList[i] = 0; 88 } 89 } 90 91 pp->wFrameWidthInMbsMinus1 = s->mb_width - 1; 92 pp->wFrameHeightInMbsMinus1 = s->mb_height - 1; 93 pp->num_ref_frames = h->sps.ref_frame_count; 94 95 pp->wBitFields = ((s->picture_structure != PICT_FRAME) << 0) | 96 (h->sps.mb_aff << 1) | 97 (h->sps.residual_color_transform_flag << 2) | 98 /* sp_for_switch_flag (not implemented by FFmpeg) */ 99 (0 << 3) | 100 (h->sps.chroma_format_idc << 4) | 101 ((h->nal_ref_idc != 0) << 6) | 102 (h->pps.constrained_intra_pred << 7) | 103 (h->pps.weighted_pred << 8) | 104 (h->pps.weighted_bipred_idc << 9) | 105 /* MbsConsecutiveFlag */ 106 (1 << 11) | 107 (h->sps.frame_mbs_only_flag << 12) | 108 (h->pps.transform_8x8_mode << 13) | 109 ((h->sps.level_idc >= 31) << 14) | 110 /* IntraPicFlag (Modified if we detect a non 111 * intra slice in decode_slice) */ 112 (1 << 15); 113 114 pp->bit_depth_luma_minus8 = h->sps.bit_depth_luma - 8; 115 pp->bit_depth_chroma_minus8 = h->sps.bit_depth_chroma - 8; 116 pp->Reserved16Bits = 3; /* FIXME is there a way to detect the right mode ? */ 117 pp->StatusReportFeedbackNumber = 1 + ctx->report_id++; 118 pp->CurrFieldOrderCnt[0] = 0; 119 if ((s->picture_structure & PICT_TOP_FIELD) && 120 current_picture->field_poc[0] != INT_MAX) 121 pp->CurrFieldOrderCnt[0] = current_picture->field_poc[0]; 122 pp->CurrFieldOrderCnt[1] = 0; 123 if ((s->picture_structure & PICT_BOTTOM_FIELD) && 124 current_picture->field_poc[1] != INT_MAX) 125 pp->CurrFieldOrderCnt[1] = current_picture->field_poc[1]; 126 pp->pic_init_qs_minus26 = h->pps.init_qs - 26; 127 pp->chroma_qp_index_offset = h->pps.chroma_qp_index_offset[0]; 128 pp->second_chroma_qp_index_offset = h->pps.chroma_qp_index_offset[1]; 129 pp->ContinuationFlag = 1; 130 pp->pic_init_qp_minus26 = h->pps.init_qp - 26; 131 pp->num_ref_idx_l0_active_minus1 = h->pps.ref_count[0] - 1; 132 pp->num_ref_idx_l1_active_minus1 = h->pps.ref_count[1] - 1; 133 pp->Reserved8BitsA = 0; 134 pp->frame_num = h->frame_num; 135 pp->log2_max_frame_num_minus4 = h->sps.log2_max_frame_num - 4; 136 pp->pic_order_cnt_type = h->sps.poc_type; 137 if (h->sps.poc_type == 0) 138 pp->log2_max_pic_order_cnt_lsb_minus4 = h->sps.log2_max_poc_lsb - 4; 139 else if (h->sps.poc_type == 1) 140 pp->delta_pic_order_always_zero_flag = h->sps.delta_pic_order_always_zero_flag; 141 pp->direct_8x8_inference_flag = h->sps.direct_8x8_inference_flag; 142 pp->entropy_coding_mode_flag = h->pps.cabac; 143 pp->pic_order_present_flag = h->pps.pic_order_present; 144 pp->num_slice_groups_minus1 = h->pps.slice_group_count - 1; 145 pp->slice_group_map_type = h->pps.mb_slice_group_map_type; 146 pp->deblocking_filter_control_present_flag = h->pps.deblocking_filter_parameters_present; 147 pp->redundant_pic_cnt_present_flag= h->pps.redundant_pic_cnt_present; 148 pp->Reserved8BitsB = 0; 149 pp->slice_group_change_rate_minus1= 0; /* XXX not implemented by FFmpeg */ 150 //pp->SliceGroupMap[810]; /* XXX not implemented by FFmpeg */ 151} 152 153static void fill_scaling_lists(const H264Context *h, DXVA_Qmatrix_H264 *qm) 154{ 155 unsigned i, j; 156 memset(qm, 0, sizeof(*qm)); 157 for (i = 0; i < 6; i++) 158 for (j = 0; j < 16; j++) 159 qm->bScalingLists4x4[i][j] = h->pps.scaling_matrix4[i][zigzag_scan[j]]; 160 161 for (i = 0; i < 2; i++) 162 for (j = 0; j < 64; j++) 163 qm->bScalingLists8x8[i][j] = h->pps.scaling_matrix8[i][ff_zigzag_direct[j]]; 164} 165 166static int is_slice_short(struct dxva_context *ctx) 167{ 168 assert(ctx->cfg->ConfigBitstreamRaw == 1 || 169 ctx->cfg->ConfigBitstreamRaw == 2); 170 return ctx->cfg->ConfigBitstreamRaw == 2; 171} 172 173static void fill_slice_short(DXVA_Slice_H264_Short *slice, 174 unsigned position, unsigned size) 175{ 176 memset(slice, 0, sizeof(*slice)); 177 slice->BSNALunitDataLocation = position; 178 slice->SliceBytesInBuffer = size; 179 slice->wBadSliceChopping = 0; 180} 181 182static void fill_slice_long(AVCodecContext *avctx, DXVA_Slice_H264_Long *slice, 183 unsigned position, unsigned size) 184{ 185 const H264Context *h = avctx->priv_data; 186 struct dxva_context *ctx = avctx->hwaccel_context; 187 const MpegEncContext *s = &h->s; 188 unsigned list; 189 190 memset(slice, 0, sizeof(*slice)); 191 slice->BSNALunitDataLocation = position; 192 slice->SliceBytesInBuffer = size; 193 slice->wBadSliceChopping = 0; 194 195 slice->first_mb_in_slice = (s->mb_y >> FIELD_OR_MBAFF_PICTURE) * s->mb_width + s->mb_x; 196 slice->NumMbsForSlice = 0; /* XXX it is set once we have all slices */ 197 slice->BitOffsetToSliceData = get_bits_count(&s->gb) + 8; 198 slice->slice_type = ff_h264_get_slice_type(h); 199 if (h->slice_type_fixed) 200 slice->slice_type += 5; 201 slice->luma_log2_weight_denom = h->luma_log2_weight_denom; 202 slice->chroma_log2_weight_denom = h->chroma_log2_weight_denom; 203 if (h->list_count > 0) 204 slice->num_ref_idx_l0_active_minus1 = h->ref_count[0] - 1; 205 if (h->list_count > 1) 206 slice->num_ref_idx_l1_active_minus1 = h->ref_count[1] - 1; 207 slice->slice_alpha_c0_offset_div2 = h->slice_alpha_c0_offset / 2 - 26; 208 slice->slice_beta_offset_div2 = h->slice_beta_offset / 2 - 26; 209 slice->Reserved8Bits = 0; 210 211 for (list = 0; list < 2; list++) { 212 unsigned i; 213 for (i = 0; i < FF_ARRAY_ELEMS(slice->RefPicList[list]); i++) { 214 if (list < h->list_count && i < h->ref_count[list]) { 215 const Picture *r = &h->ref_list[list][i]; 216 unsigned plane; 217 fill_picture_entry(&slice->RefPicList[list][i], 218 ff_dxva2_get_surface_index(ctx, r), 219 r->reference == PICT_BOTTOM_FIELD); 220 for (plane = 0; plane < 3; plane++) { 221 int w, o; 222 if (plane == 0 && h->luma_weight_flag[list]) { 223 w = h->luma_weight[i][list][0]; 224 o = h->luma_weight[i][list][1]; 225 } else if (plane >= 1 && h->chroma_weight_flag[list]) { 226 w = h->chroma_weight[i][list][plane-1][0]; 227 o = h->chroma_weight[i][list][plane-1][1]; 228 } else { 229 w = 1 << (plane == 0 ? h->luma_log2_weight_denom : 230 h->chroma_log2_weight_denom); 231 o = 0; 232 } 233 slice->Weights[list][i][plane][0] = w; 234 slice->Weights[list][i][plane][1] = o; 235 } 236 } else { 237 unsigned plane; 238 slice->RefPicList[list][i].bPicEntry = 0xff; 239 for (plane = 0; plane < 3; plane++) { 240 slice->Weights[list][i][plane][0] = 0; 241 slice->Weights[list][i][plane][1] = 0; 242 } 243 } 244 } 245 } 246 slice->slice_qs_delta = 0; /* XXX not implemented by FFmpeg */ 247 slice->slice_qp_delta = s->qscale - h->pps.init_qp; 248 slice->redundant_pic_cnt = h->redundant_pic_count; 249 if (h->slice_type == FF_B_TYPE) 250 slice->direct_spatial_mv_pred_flag = h->direct_spatial_mv_pred; 251 slice->cabac_init_idc = h->pps.cabac ? h->cabac_init_idc : 0; 252 if (h->deblocking_filter < 2) 253 slice->disable_deblocking_filter_idc = 1 - h->deblocking_filter; 254 else 255 slice->disable_deblocking_filter_idc = h->deblocking_filter; 256 slice->slice_id = h->current_slice - 1; 257} 258 259static int commit_bitstream_and_slice_buffer(AVCodecContext *avctx, 260 DXVA2_DecodeBufferDesc *bs, 261 DXVA2_DecodeBufferDesc *sc) 262{ 263 const H264Context *h = avctx->priv_data; 264 const MpegEncContext *s = &h->s; 265 const unsigned mb_count = s->mb_width * s->mb_height; 266 struct dxva_context *ctx = avctx->hwaccel_context; 267 const Picture *current_picture = h->s.current_picture_ptr; 268 struct dxva2_picture_context *ctx_pic = current_picture->hwaccel_picture_private; 269 DXVA_Slice_H264_Short *slice = NULL; 270 uint8_t *dxva_data, *current, *end; 271 unsigned dxva_size; 272 void *slice_data; 273 unsigned slice_size; 274 unsigned padding; 275 unsigned i; 276 277 /* Create an annex B bitstream buffer with only slice NAL and finalize slice */ 278 if (FAILED(IDirectXVideoDecoder_GetBuffer(ctx->decoder, 279 DXVA2_BitStreamDateBufferType, 280 &dxva_data, &dxva_size))) 281 return -1; 282 current = dxva_data; 283 end = dxva_data + dxva_size; 284 285 for (i = 0; i < ctx_pic->slice_count; i++) { 286 static const uint8_t start_code[] = { 0, 0, 1 }; 287 static const unsigned start_code_size = sizeof(start_code); 288 unsigned position, size; 289 290 assert(offsetof(DXVA_Slice_H264_Short, BSNALunitDataLocation) == 291 offsetof(DXVA_Slice_H264_Long, BSNALunitDataLocation)); 292 assert(offsetof(DXVA_Slice_H264_Short, SliceBytesInBuffer) == 293 offsetof(DXVA_Slice_H264_Long, SliceBytesInBuffer)); 294 295 if (is_slice_short(ctx)) 296 slice = &ctx_pic->slice_short[i]; 297 else 298 slice = (DXVA_Slice_H264_Short*)&ctx_pic->slice_long[i]; 299 300 position = slice->BSNALunitDataLocation; 301 size = slice->SliceBytesInBuffer; 302 if (start_code_size + size > end - current) { 303 av_log(avctx, AV_LOG_ERROR, "Failed to build bitstream"); 304 break; 305 } 306 307 slice->BSNALunitDataLocation = current - dxva_data; 308 slice->SliceBytesInBuffer = start_code_size + size; 309 310 if (!is_slice_short(ctx)) { 311 DXVA_Slice_H264_Long *slice_long = (DXVA_Slice_H264_Long*)slice; 312 if (i < ctx_pic->slice_count - 1) 313 slice_long->NumMbsForSlice = 314 slice_long[1].first_mb_in_slice - slice_long[0].first_mb_in_slice; 315 else 316 slice_long->NumMbsForSlice = mb_count - slice_long->first_mb_in_slice; 317 } 318 319 memcpy(current, start_code, start_code_size); 320 current += start_code_size; 321 322 memcpy(current, &ctx_pic->bitstream[position], size); 323 current += size; 324 } 325 padding = FFMIN(128 - ((current - dxva_data) & 127), end - current); 326 if (slice && padding > 0) { 327 memset(current, 0, padding); 328 current += padding; 329 330 slice->SliceBytesInBuffer += padding; 331 } 332 if (FAILED(IDirectXVideoDecoder_ReleaseBuffer(ctx->decoder, 333 DXVA2_BitStreamDateBufferType))) 334 return -1; 335 if (i < ctx_pic->slice_count) 336 return -1; 337 338 memset(bs, 0, sizeof(*bs)); 339 bs->CompressedBufferType = DXVA2_BitStreamDateBufferType; 340 bs->DataSize = current - dxva_data; 341 bs->NumMBsInBuffer = mb_count; 342 343 if (is_slice_short(ctx)) { 344 slice_data = ctx_pic->slice_short; 345 slice_size = ctx_pic->slice_count * sizeof(*ctx_pic->slice_short); 346 } else { 347 slice_data = ctx_pic->slice_long; 348 slice_size = ctx_pic->slice_count * sizeof(*ctx_pic->slice_long); 349 } 350 assert((bs->DataSize & 127) == 0); 351 return ff_dxva2_commit_buffer(avctx, ctx, sc, 352 DXVA2_SliceControlBufferType, 353 slice_data, slice_size, mb_count); 354} 355 356 357static int start_frame(AVCodecContext *avctx, 358 av_unused const uint8_t *buffer, 359 av_unused uint32_t size) 360{ 361 const H264Context *h = avctx->priv_data; 362 struct dxva_context *ctx = avctx->hwaccel_context; 363 struct dxva2_picture_context *ctx_pic = h->s.current_picture_ptr->hwaccel_picture_private; 364 365 if (!ctx->decoder || !ctx->cfg || ctx->surface_count <= 0) 366 return -1; 367 assert(ctx_pic); 368 369 /* Fill up DXVA_PicParams_H264 */ 370 fill_picture_parameters(ctx, h, &ctx_pic->pp); 371 372 /* Fill up DXVA_Qmatrix_H264 */ 373 fill_scaling_lists(h, &ctx_pic->qm); 374 375 ctx_pic->slice_count = 0; 376 ctx_pic->bitstream_size = 0; 377 ctx_pic->bitstream = NULL; 378 return 0; 379} 380 381static int decode_slice(AVCodecContext *avctx, 382 const uint8_t *buffer, uint32_t size) 383{ 384 const H264Context *h = avctx->priv_data; 385 struct dxva_context *ctx = avctx->hwaccel_context; 386 const Picture *current_picture = h->s.current_picture_ptr; 387 struct dxva2_picture_context *ctx_pic = current_picture->hwaccel_picture_private; 388 unsigned position; 389 390 if (ctx_pic->slice_count >= MAX_SLICES) 391 return -1; 392 393 if (!ctx_pic->bitstream) 394 ctx_pic->bitstream = buffer; 395 ctx_pic->bitstream_size += size; 396 397 position = buffer - ctx_pic->bitstream; 398 if (is_slice_short(ctx)) 399 fill_slice_short(&ctx_pic->slice_short[ctx_pic->slice_count], 400 position, size); 401 else 402 fill_slice_long(avctx, &ctx_pic->slice_long[ctx_pic->slice_count], 403 position, size); 404 ctx_pic->slice_count++; 405 406 if (h->slice_type != FF_I_TYPE && h->slice_type != FF_SI_TYPE) 407 ctx_pic->pp.wBitFields &= ~(1 << 15); /* Set IntraPicFlag to 0 */ 408 return 0; 409} 410 411static int end_frame(AVCodecContext *avctx) 412{ 413 H264Context *h = avctx->priv_data; 414 MpegEncContext *s = &h->s; 415 struct dxva2_picture_context *ctx_pic = 416 h->s.current_picture_ptr->hwaccel_picture_private; 417 418 if (ctx_pic->slice_count <= 0 || ctx_pic->bitstream_size <= 0) 419 return -1; 420 return ff_dxva2_common_end_frame(avctx, s, 421 &ctx_pic->pp, sizeof(ctx_pic->pp), 422 &ctx_pic->qm, sizeof(ctx_pic->qm), 423 commit_bitstream_and_slice_buffer); 424} 425 426AVHWAccel h264_dxva2_hwaccel = { 427 .name = "h264_dxva2", 428 .type = AVMEDIA_TYPE_VIDEO, 429 .id = CODEC_ID_H264, 430 .pix_fmt = PIX_FMT_DXVA2_VLD, 431 .capabilities = 0, 432 .start_frame = start_frame, 433 .decode_slice = decode_slice, 434 .end_frame = end_frame, 435 .priv_data_size = sizeof(struct dxva2_picture_context), 436}; 437 438