1/* 2 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at> 3 * 4 * This file is part of FFmpeg. 5 * 6 * FFmpeg is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * FFmpeg is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with FFmpeg; if not, write to the Free Software 18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 19 */ 20 21#include "avcodec.h" 22#include "dsputil.h" 23#include "snow.h" 24 25#include "rangecoder.h" 26#include "mathops.h" 27 28#include "mpegvideo.h" 29 30#undef NDEBUG 31#include <assert.h> 32 33static const int8_t quant3[256]={ 34 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 35 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 36 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 37 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 38 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 39 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 40 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 41 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 42-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 43-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 44-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 45-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 46-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 47-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 48-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 49-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0, 50}; 51static const int8_t quant3b[256]={ 52 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 53 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 54 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 55 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 56 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 57 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 58 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 59 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 60-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 61-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 62-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 63-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 64-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 65-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 66-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 67-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 68}; 69static const int8_t quant3bA[256]={ 70 0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 71 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 72 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 73 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 74 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 75 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 76 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 77 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 78 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 79 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 80 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 81 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 82 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 83 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 84 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 85 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 86}; 87static const int8_t quant5[256]={ 88 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 89 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 90 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 91 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 92 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 93 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 94 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 95 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 96-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, 97-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, 98-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, 99-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, 100-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, 101-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, 102-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, 103-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1, 104}; 105static const int8_t quant7[256]={ 106 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 107 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 108 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 109 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 110 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 111 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 112 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 113 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 114-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3, 115-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3, 116-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3, 117-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3, 118-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3, 119-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2, 120-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, 121-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1, 122}; 123static const int8_t quant9[256]={ 124 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 125 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 126 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 127 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 128 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 129 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 130 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 131 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 132-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, 133-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, 134-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, 135-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, 136-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, 137-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, 138-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3, 139-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1, 140}; 141static const int8_t quant11[256]={ 142 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 143 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 144 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 145 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 146 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 147 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 148 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 149 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 150-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5, 151-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5, 152-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5, 153-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5, 154-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5, 155-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4, 156-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, 157-4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1, 158}; 159static const int8_t quant13[256]={ 160 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 161 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 162 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 163 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 164 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 165 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 166 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 167 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 168-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6, 169-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6, 170-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6, 171-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6, 172-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5, 173-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5, 174-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5, 175-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1, 176}; 177 178#if 0 //64*cubic 179static const uint8_t obmc32[1024]={ 180 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 181 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 182 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0, 183 0, 0, 4, 4, 8, 8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12, 8, 8, 4, 4, 0, 0, 184 0, 0, 4, 8, 8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12, 8, 8, 4, 0, 0, 185 0, 4, 4, 8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12, 8, 4, 4, 0, 186 0, 4, 4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12, 4, 4, 0, 187 0, 4, 8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16, 8, 4, 0, 188 0, 4, 8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16, 8, 4, 0, 189 0, 4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12, 4, 0, 190 0, 4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12, 4, 0, 191 0, 4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12, 4, 0, 192 0, 4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16, 4, 0, 193 0, 8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16, 8, 0, 194 0, 4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16, 4, 0, 195 1, 8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16, 8, 1, 196 1, 8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16, 8, 1, 197 0, 4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16, 4, 0, 198 0, 8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16, 8, 0, 199 0, 4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16, 4, 0, 200 0, 4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12, 4, 0, 201 0, 4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12, 4, 0, 202 0, 4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12, 4, 0, 203 0, 4, 8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16, 8, 4, 0, 204 0, 4, 8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16, 8, 4, 0, 205 0, 4, 4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12, 4, 4, 0, 206 0, 4, 4, 8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12, 8, 4, 4, 0, 207 0, 0, 4, 8, 8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12, 8, 8, 4, 0, 0, 208 0, 0, 4, 4, 8, 8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12, 8, 8, 4, 4, 0, 0, 209 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0, 210 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 211 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 212//error:0.000022 213}; 214static const uint8_t obmc16[256]={ 215 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 216 0, 4, 4, 8, 16, 20, 20, 24, 24, 20, 20, 16, 8, 4, 4, 0, 217 0, 4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16, 4, 0, 218 0, 8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24, 8, 0, 219 0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16, 0, 220 0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20, 0, 221 4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20, 4, 222 4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24, 4, 223 4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24, 4, 224 4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20, 4, 225 0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20, 0, 226 0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16, 0, 227 0, 8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24, 8, 0, 228 0, 4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16, 4, 0, 229 0, 4, 4, 8, 16, 20, 20, 24, 24, 20, 20, 16, 8, 4, 4, 0, 230 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 231//error:0.000033 232}; 233#elif 1 // 64*linear 234static const uint8_t obmc32[1024]={ 235 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 236 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0, 237 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0, 238 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0, 239 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4, 240 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4, 241 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4, 242 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4, 243 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4, 244 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4, 245 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4, 246 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4, 247 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8, 248 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8, 249 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8, 250 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8, 251 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8, 252 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8, 253 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8, 254 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8, 255 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4, 256 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4, 257 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4, 258 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4, 259 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4, 260 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4, 261 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4, 262 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4, 263 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0, 264 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0, 265 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0, 266 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 267 //error:0.000020 268}; 269static const uint8_t obmc16[256]={ 270 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0, 271 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4, 272 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4, 273 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8, 274 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8, 275 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12, 276 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12, 277 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16, 278 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16, 279 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12, 280 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12, 281 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8, 282 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8, 283 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4, 284 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4, 285 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0, 286//error:0.000015 287}; 288#else //64*cos 289static const uint8_t obmc32[1024]={ 290 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 291 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 4, 4, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 292 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0, 293 0, 0, 4, 4, 4, 8, 8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12, 8, 8, 4, 4, 4, 0, 0, 294 0, 0, 4, 4, 8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12, 8, 4, 4, 0, 0, 295 0, 0, 4, 8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12, 8, 4, 0, 0, 296 0, 4, 4, 8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16, 8, 4, 4, 0, 297 0, 4, 8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12, 8, 4, 0, 298 0, 4, 8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16, 8, 4, 0, 299 0, 4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12, 4, 0, 300 0, 4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12, 4, 0, 301 0, 4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12, 4, 0, 302 0, 4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12, 4, 0, 303 0, 4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12, 4, 0, 304 0, 4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16, 4, 0, 305 1, 4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16, 4, 1, 306 1, 4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16, 4, 1, 307 0, 4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16, 4, 0, 308 0, 4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12, 4, 0, 309 0, 4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12, 4, 0, 310 0, 4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12, 4, 0, 311 0, 4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12, 4, 0, 312 0, 4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12, 4, 0, 313 0, 4, 8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16, 8, 4, 0, 314 0, 4, 8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12, 8, 4, 0, 315 0, 4, 4, 8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16, 8, 4, 4, 0, 316 0, 0, 4, 8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12, 8, 4, 0, 0, 317 0, 0, 4, 4, 8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12, 8, 4, 4, 0, 0, 318 0, 0, 4, 4, 4, 8, 8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12, 8, 8, 4, 4, 4, 0, 0, 319 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0, 320 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 4, 4, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 321 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 322//error:0.000022 323}; 324static const uint8_t obmc16[256]={ 325 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 326 0, 0, 4, 8, 12, 16, 20, 20, 20, 20, 16, 12, 8, 4, 0, 0, 327 0, 4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12, 4, 0, 328 0, 8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24, 8, 0, 329 0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12, 0, 330 4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16, 4, 331 4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20, 4, 332 0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20, 0, 333 0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20, 0, 334 4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20, 4, 335 4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16, 4, 336 0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12, 0, 337 0, 8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24, 8, 0, 338 0, 4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12, 4, 0, 339 0, 0, 4, 8, 12, 16, 20, 20, 20, 20, 16, 12, 8, 4, 0, 0, 340 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 341//error:0.000022 342}; 343#endif /* 0 */ 344 345//linear *64 346static const uint8_t obmc8[64]={ 347 4, 12, 20, 28, 28, 20, 12, 4, 348 12, 36, 60, 84, 84, 60, 36, 12, 349 20, 60,100,140,140,100, 60, 20, 350 28, 84,140,196,196,140, 84, 28, 351 28, 84,140,196,196,140, 84, 28, 352 20, 60,100,140,140,100, 60, 20, 353 12, 36, 60, 84, 84, 60, 36, 12, 354 4, 12, 20, 28, 28, 20, 12, 4, 355//error:0.000000 356}; 357 358//linear *64 359static const uint8_t obmc4[16]={ 360 16, 48, 48, 16, 361 48,144,144, 48, 362 48,144,144, 48, 363 16, 48, 48, 16, 364//error:0.000000 365}; 366 367static const uint8_t * const obmc_tab[4]={ 368 obmc32, obmc16, obmc8, obmc4 369}; 370 371static int scale_mv_ref[MAX_REF_FRAMES][MAX_REF_FRAMES]; 372 373typedef struct BlockNode{ 374 int16_t mx; 375 int16_t my; 376 uint8_t ref; 377 uint8_t color[3]; 378 uint8_t type; 379//#define TYPE_SPLIT 1 380#define BLOCK_INTRA 1 381#define BLOCK_OPT 2 382//#define TYPE_NOCOLOR 4 383 uint8_t level; //FIXME merge into type? 384}BlockNode; 385 386static const BlockNode null_block= { //FIXME add border maybe 387 .color= {128,128,128}, 388 .mx= 0, 389 .my= 0, 390 .ref= 0, 391 .type= 0, 392 .level= 0, 393}; 394 395#define LOG2_MB_SIZE 4 396#define MB_SIZE (1<<LOG2_MB_SIZE) 397#define ENCODER_EXTRA_BITS 4 398#define HTAPS_MAX 8 399 400typedef struct x_and_coeff{ 401 int16_t x; 402 uint16_t coeff; 403} x_and_coeff; 404 405typedef struct SubBand{ 406 int level; 407 int stride; 408 int width; 409 int height; 410 int qlog; ///< log(qscale)/log[2^(1/6)] 411 DWTELEM *buf; 412 IDWTELEM *ibuf; 413 int buf_x_offset; 414 int buf_y_offset; 415 int stride_line; ///< Stride measured in lines, not pixels. 416 x_and_coeff * x_coeff; 417 struct SubBand *parent; 418 uint8_t state[/*7*2*/ 7 + 512][32]; 419}SubBand; 420 421typedef struct Plane{ 422 int width; 423 int height; 424 SubBand band[MAX_DECOMPOSITIONS][4]; 425 426 int htaps; 427 int8_t hcoeff[HTAPS_MAX/2]; 428 int diag_mc; 429 int fast_mc; 430 431 int last_htaps; 432 int8_t last_hcoeff[HTAPS_MAX/2]; 433 int last_diag_mc; 434}Plane; 435 436typedef struct SnowContext{ 437// MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to eventually make the motion estimation independent of MpegEncContext, so this will be removed then (FIXME/XXX) 438 439 AVCodecContext *avctx; 440 RangeCoder c; 441 DSPContext dsp; 442 AVFrame new_picture; 443 AVFrame input_picture; ///< new_picture with the internal linesizes 444 AVFrame current_picture; 445 AVFrame last_picture[MAX_REF_FRAMES]; 446 uint8_t *halfpel_plane[MAX_REF_FRAMES][4][4]; 447 AVFrame mconly_picture; 448// uint8_t q_context[16]; 449 uint8_t header_state[32]; 450 uint8_t block_state[128 + 32*128]; 451 int keyframe; 452 int always_reset; 453 int version; 454 int spatial_decomposition_type; 455 int last_spatial_decomposition_type; 456 int temporal_decomposition_type; 457 int spatial_decomposition_count; 458 int last_spatial_decomposition_count; 459 int temporal_decomposition_count; 460 int max_ref_frames; 461 int ref_frames; 462 int16_t (*ref_mvs[MAX_REF_FRAMES])[2]; 463 uint32_t *ref_scores[MAX_REF_FRAMES]; 464 DWTELEM *spatial_dwt_buffer; 465 IDWTELEM *spatial_idwt_buffer; 466 int colorspace_type; 467 int chroma_h_shift; 468 int chroma_v_shift; 469 int spatial_scalability; 470 int qlog; 471 int last_qlog; 472 int lambda; 473 int lambda2; 474 int pass1_rc; 475 int mv_scale; 476 int last_mv_scale; 477 int qbias; 478 int last_qbias; 479#define QBIAS_SHIFT 3 480 int b_width; 481 int b_height; 482 int block_max_depth; 483 int last_block_max_depth; 484 Plane plane[MAX_PLANES]; 485 BlockNode *block; 486#define ME_CACHE_SIZE 1024 487 int me_cache[ME_CACHE_SIZE]; 488 int me_cache_generation; 489 slice_buffer sb; 490 491 MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to eventually make the motion estimation independent of MpegEncContext, so this will be removed then (FIXME/XXX) 492 493 uint8_t *scratchbuf; 494}SnowContext; 495 496typedef struct { 497 IDWTELEM *b0; 498 IDWTELEM *b1; 499 IDWTELEM *b2; 500 IDWTELEM *b3; 501 int y; 502} DWTCompose; 503 504#define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num))) 505//#define slice_buffer_get_line(slice_buf, line_num) (slice_buffer_load_line((slice_buf), (line_num))) 506 507static void iterative_me(SnowContext *s); 508 509static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, IDWTELEM * base_buffer) 510{ 511 int i; 512 513 buf->base_buffer = base_buffer; 514 buf->line_count = line_count; 515 buf->line_width = line_width; 516 buf->data_count = max_allocated_lines; 517 buf->line = av_mallocz (sizeof(IDWTELEM *) * line_count); 518 buf->data_stack = av_malloc (sizeof(IDWTELEM *) * max_allocated_lines); 519 520 for(i = 0; i < max_allocated_lines; i++){ 521 buf->data_stack[i] = av_malloc (sizeof(IDWTELEM) * line_width); 522 } 523 524 buf->data_stack_top = max_allocated_lines - 1; 525} 526 527static IDWTELEM * slice_buffer_load_line(slice_buffer * buf, int line) 528{ 529 int offset; 530 IDWTELEM * buffer; 531 532 assert(buf->data_stack_top >= 0); 533// assert(!buf->line[line]); 534 if (buf->line[line]) 535 return buf->line[line]; 536 537 offset = buf->line_width * line; 538 buffer = buf->data_stack[buf->data_stack_top]; 539 buf->data_stack_top--; 540 buf->line[line] = buffer; 541 542 return buffer; 543} 544 545static void slice_buffer_release(slice_buffer * buf, int line) 546{ 547 int offset; 548 IDWTELEM * buffer; 549 550 assert(line >= 0 && line < buf->line_count); 551 assert(buf->line[line]); 552 553 offset = buf->line_width * line; 554 buffer = buf->line[line]; 555 buf->data_stack_top++; 556 buf->data_stack[buf->data_stack_top] = buffer; 557 buf->line[line] = NULL; 558} 559 560static void slice_buffer_flush(slice_buffer * buf) 561{ 562 int i; 563 for(i = 0; i < buf->line_count; i++){ 564 if (buf->line[i]) 565 slice_buffer_release(buf, i); 566 } 567} 568 569static void slice_buffer_destroy(slice_buffer * buf) 570{ 571 int i; 572 slice_buffer_flush(buf); 573 574 for(i = buf->data_count - 1; i >= 0; i--){ 575 av_freep(&buf->data_stack[i]); 576 } 577 av_freep(&buf->data_stack); 578 av_freep(&buf->line); 579} 580 581#ifdef __sgi 582// Avoid a name clash on SGI IRIX 583#undef qexp 584#endif 585#define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0 586static uint8_t qexp[QROOT]; 587 588static inline int mirror(int v, int m){ 589 while((unsigned)v > (unsigned)m){ 590 v=-v; 591 if(v<0) v+= 2*m; 592 } 593 return v; 594} 595 596static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){ 597 int i; 598 599 if(v){ 600 const int a= FFABS(v); 601 const int e= av_log2(a); 602#if 1 603 const int el= FFMIN(e, 10); 604 put_rac(c, state+0, 0); 605 606 for(i=0; i<el; i++){ 607 put_rac(c, state+1+i, 1); //1..10 608 } 609 for(; i<e; i++){ 610 put_rac(c, state+1+9, 1); //1..10 611 } 612 put_rac(c, state+1+FFMIN(i,9), 0); 613 614 for(i=e-1; i>=el; i--){ 615 put_rac(c, state+22+9, (a>>i)&1); //22..31 616 } 617 for(; i>=0; i--){ 618 put_rac(c, state+22+i, (a>>i)&1); //22..31 619 } 620 621 if(is_signed) 622 put_rac(c, state+11 + el, v < 0); //11..21 623#else 624 625 put_rac(c, state+0, 0); 626 if(e<=9){ 627 for(i=0; i<e; i++){ 628 put_rac(c, state+1+i, 1); //1..10 629 } 630 put_rac(c, state+1+i, 0); 631 632 for(i=e-1; i>=0; i--){ 633 put_rac(c, state+22+i, (a>>i)&1); //22..31 634 } 635 636 if(is_signed) 637 put_rac(c, state+11 + e, v < 0); //11..21 638 }else{ 639 for(i=0; i<e; i++){ 640 put_rac(c, state+1+FFMIN(i,9), 1); //1..10 641 } 642 put_rac(c, state+1+FFMIN(i,9), 0); 643 644 for(i=e-1; i>=0; i--){ 645 put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31 646 } 647 648 if(is_signed) 649 put_rac(c, state+11 + FFMIN(e,10), v < 0); //11..21 650 } 651#endif /* 1 */ 652 }else{ 653 put_rac(c, state+0, 1); 654 } 655} 656 657static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){ 658 if(get_rac(c, state+0)) 659 return 0; 660 else{ 661 int i, e, a; 662 e= 0; 663 while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10 664 e++; 665 } 666 667 a= 1; 668 for(i=e-1; i>=0; i--){ 669 a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31 670 } 671 672 if(is_signed && get_rac(c, state+11 + FFMIN(e,10))) //11..21 673 return -a; 674 else 675 return a; 676 } 677} 678 679static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){ 680 int i; 681 int r= log2>=0 ? 1<<log2 : 1; 682 683 assert(v>=0); 684 assert(log2>=-4); 685 686 while(v >= r){ 687 put_rac(c, state+4+log2, 1); 688 v -= r; 689 log2++; 690 if(log2>0) r+=r; 691 } 692 put_rac(c, state+4+log2, 0); 693 694 for(i=log2-1; i>=0; i--){ 695 put_rac(c, state+31-i, (v>>i)&1); 696 } 697} 698 699static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){ 700 int i; 701 int r= log2>=0 ? 1<<log2 : 1; 702 int v=0; 703 704 assert(log2>=-4); 705 706 while(get_rac(c, state+4+log2)){ 707 v+= r; 708 log2++; 709 if(log2>0) r+=r; 710 } 711 712 for(i=log2-1; i>=0; i--){ 713 v+= get_rac(c, state+31-i)<<i; 714 } 715 716 return v; 717} 718 719static av_always_inline void 720lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, 721 int dst_step, int src_step, int ref_step, 722 int width, int mul, int add, int shift, 723 int highpass, int inverse){ 724 const int mirror_left= !highpass; 725 const int mirror_right= (width&1) ^ highpass; 726 const int w= (width>>1) - 1 + (highpass & width); 727 int i; 728 729#define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref))) 730 if(mirror_left){ 731 dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse); 732 dst += dst_step; 733 src += src_step; 734 } 735 736 for(i=0; i<w; i++){ 737 dst[i*dst_step] = 738 LIFT(src[i*src_step], 739 ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift), 740 inverse); 741 } 742 743 if(mirror_right){ 744 dst[w*dst_step] = 745 LIFT(src[w*src_step], 746 ((mul*2*ref[w*ref_step]+add)>>shift), 747 inverse); 748 } 749} 750 751static av_always_inline void 752inv_lift(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref, 753 int dst_step, int src_step, int ref_step, 754 int width, int mul, int add, int shift, 755 int highpass, int inverse){ 756 const int mirror_left= !highpass; 757 const int mirror_right= (width&1) ^ highpass; 758 const int w= (width>>1) - 1 + (highpass & width); 759 int i; 760 761#define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref))) 762 if(mirror_left){ 763 dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse); 764 dst += dst_step; 765 src += src_step; 766 } 767 768 for(i=0; i<w; i++){ 769 dst[i*dst_step] = 770 LIFT(src[i*src_step], 771 ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift), 772 inverse); 773 } 774 775 if(mirror_right){ 776 dst[w*dst_step] = 777 LIFT(src[w*src_step], 778 ((mul*2*ref[w*ref_step]+add)>>shift), 779 inverse); 780 } 781} 782 783#ifndef liftS 784static av_always_inline void 785liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, 786 int dst_step, int src_step, int ref_step, 787 int width, int mul, int add, int shift, 788 int highpass, int inverse){ 789 const int mirror_left= !highpass; 790 const int mirror_right= (width&1) ^ highpass; 791 const int w= (width>>1) - 1 + (highpass & width); 792 int i; 793 794 assert(shift == 4); 795#define LIFTS(src, ref, inv) \ 796 ((inv) ? \ 797 (src) + (((ref) + 4*(src))>>shift): \ 798 -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23))) 799 if(mirror_left){ 800 dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse); 801 dst += dst_step; 802 src += src_step; 803 } 804 805 for(i=0; i<w; i++){ 806 dst[i*dst_step] = 807 LIFTS(src[i*src_step], 808 mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add, 809 inverse); 810 } 811 812 if(mirror_right){ 813 dst[w*dst_step] = 814 LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse); 815 } 816} 817static av_always_inline void 818inv_liftS(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref, 819 int dst_step, int src_step, int ref_step, 820 int width, int mul, int add, int shift, 821 int highpass, int inverse){ 822 const int mirror_left= !highpass; 823 const int mirror_right= (width&1) ^ highpass; 824 const int w= (width>>1) - 1 + (highpass & width); 825 int i; 826 827 assert(shift == 4); 828#define LIFTS(src, ref, inv) \ 829 ((inv) ? \ 830 (src) + (((ref) + 4*(src))>>shift): \ 831 -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23))) 832 if(mirror_left){ 833 dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse); 834 dst += dst_step; 835 src += src_step; 836 } 837 838 for(i=0; i<w; i++){ 839 dst[i*dst_step] = 840 LIFTS(src[i*src_step], 841 mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add, 842 inverse); 843 } 844 845 if(mirror_right){ 846 dst[w*dst_step] = 847 LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse); 848 } 849} 850#endif /* ! liftS */ 851 852static void horizontal_decompose53i(DWTELEM *b, int width){ 853 DWTELEM temp[width]; 854 const int width2= width>>1; 855 int x; 856 const int w2= (width+1)>>1; 857 858 for(x=0; x<width2; x++){ 859 temp[x ]= b[2*x ]; 860 temp[x+w2]= b[2*x + 1]; 861 } 862 if(width&1) 863 temp[x ]= b[2*x ]; 864#if 0 865 { 866 int A1,A2,A3,A4; 867 A2= temp[1 ]; 868 A4= temp[0 ]; 869 A1= temp[0+width2]; 870 A1 -= (A2 + A4)>>1; 871 A4 += (A1 + 1)>>1; 872 b[0+width2] = A1; 873 b[0 ] = A4; 874 for(x=1; x+1<width2; x+=2){ 875 A3= temp[x+width2]; 876 A4= temp[x+1 ]; 877 A3 -= (A2 + A4)>>1; 878 A2 += (A1 + A3 + 2)>>2; 879 b[x+width2] = A3; 880 b[x ] = A2; 881 882 A1= temp[x+1+width2]; 883 A2= temp[x+2 ]; 884 A1 -= (A2 + A4)>>1; 885 A4 += (A1 + A3 + 2)>>2; 886 b[x+1+width2] = A1; 887 b[x+1 ] = A4; 888 } 889 A3= temp[width-1]; 890 A3 -= A2; 891 A2 += (A1 + A3 + 2)>>2; 892 b[width -1] = A3; 893 b[width2-1] = A2; 894 } 895#else 896 lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0); 897 lift(b , temp , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 0); 898#endif /* 0 */ 899} 900 901static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ 902 int i; 903 904 for(i=0; i<width; i++){ 905 b1[i] -= (b0[i] + b2[i])>>1; 906 } 907} 908 909static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ 910 int i; 911 912 for(i=0; i<width; i++){ 913 b1[i] += (b0[i] + b2[i] + 2)>>2; 914 } 915} 916 917static void spatial_decompose53i(DWTELEM *buffer, int width, int height, int stride){ 918 int y; 919 DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride; 920 DWTELEM *b1= buffer + mirror(-2 , height-1)*stride; 921 922 for(y=-2; y<height; y+=2){ 923 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride; 924 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride; 925 926 if(y+1<(unsigned)height) horizontal_decompose53i(b2, width); 927 if(y+2<(unsigned)height) horizontal_decompose53i(b3, width); 928 929 if(y+1<(unsigned)height) vertical_decompose53iH0(b1, b2, b3, width); 930 if(y+0<(unsigned)height) vertical_decompose53iL0(b0, b1, b2, width); 931 932 b0=b2; 933 b1=b3; 934 } 935} 936 937static void horizontal_decompose97i(DWTELEM *b, int width){ 938 DWTELEM temp[width]; 939 const int w2= (width+1)>>1; 940 941 lift (temp+w2, b +1, b , 1, 2, 2, width, W_AM, W_AO, W_AS, 1, 1); 942 liftS(temp , b , temp+w2, 1, 2, 1, width, W_BM, W_BO, W_BS, 0, 0); 943 lift (b +w2, temp+w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 0); 944 lift (b , temp , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 0); 945} 946 947 948static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ 949 int i; 950 951 for(i=0; i<width; i++){ 952 b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS; 953 } 954} 955 956static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ 957 int i; 958 959 for(i=0; i<width; i++){ 960 b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS; 961 } 962} 963 964static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ 965 int i; 966 967 for(i=0; i<width; i++){ 968#ifdef liftS 969 b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS; 970#else 971 b1[i] = (16*4*b1[i] - 4*(b0[i] + b2[i]) + W_BO*5 + (5<<27)) / (5*16) - (1<<23); 972#endif 973 } 974} 975 976static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ 977 int i; 978 979 for(i=0; i<width; i++){ 980 b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS; 981 } 982} 983 984static void spatial_decompose97i(DWTELEM *buffer, int width, int height, int stride){ 985 int y; 986 DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride; 987 DWTELEM *b1= buffer + mirror(-4 , height-1)*stride; 988 DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride; 989 DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride; 990 991 for(y=-4; y<height; y+=2){ 992 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride; 993 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride; 994 995 if(y+3<(unsigned)height) horizontal_decompose97i(b4, width); 996 if(y+4<(unsigned)height) horizontal_decompose97i(b5, width); 997 998 if(y+3<(unsigned)height) vertical_decompose97iH0(b3, b4, b5, width); 999 if(y+2<(unsigned)height) vertical_decompose97iL0(b2, b3, b4, width); 1000 if(y+1<(unsigned)height) vertical_decompose97iH1(b1, b2, b3, width); 1001 if(y+0<(unsigned)height) vertical_decompose97iL1(b0, b1, b2, width); 1002 1003 b0=b2; 1004 b1=b3; 1005 b2=b4; 1006 b3=b5; 1007 } 1008} 1009 1010void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){ 1011 int level; 1012 1013 for(level=0; level<decomposition_count; level++){ 1014 switch(type){ 1015 case DWT_97: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break; 1016 case DWT_53: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break; 1017 } 1018 } 1019} 1020 1021static void horizontal_compose53i(IDWTELEM *b, int width){ 1022 IDWTELEM temp[width]; 1023 const int width2= width>>1; 1024 const int w2= (width+1)>>1; 1025 int x; 1026 1027#if 0 1028 int A1,A2,A3,A4; 1029 A2= temp[1 ]; 1030 A4= temp[0 ]; 1031 A1= temp[0+width2]; 1032 A1 -= (A2 + A4)>>1; 1033 A4 += (A1 + 1)>>1; 1034 b[0+width2] = A1; 1035 b[0 ] = A4; 1036 for(x=1; x+1<width2; x+=2){ 1037 A3= temp[x+width2]; 1038 A4= temp[x+1 ]; 1039 A3 -= (A2 + A4)>>1; 1040 A2 += (A1 + A3 + 2)>>2; 1041 b[x+width2] = A3; 1042 b[x ] = A2; 1043 1044 A1= temp[x+1+width2]; 1045 A2= temp[x+2 ]; 1046 A1 -= (A2 + A4)>>1; 1047 A4 += (A1 + A3 + 2)>>2; 1048 b[x+1+width2] = A1; 1049 b[x+1 ] = A4; 1050 } 1051 A3= temp[width-1]; 1052 A3 -= A2; 1053 A2 += (A1 + A3 + 2)>>2; 1054 b[width -1] = A3; 1055 b[width2-1] = A2; 1056#else 1057 inv_lift(temp , b , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 1); 1058 inv_lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1); 1059#endif /* 0 */ 1060 for(x=0; x<width2; x++){ 1061 b[2*x ]= temp[x ]; 1062 b[2*x + 1]= temp[x+w2]; 1063 } 1064 if(width&1) 1065 b[2*x ]= temp[x ]; 1066} 1067 1068static void vertical_compose53iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){ 1069 int i; 1070 1071 for(i=0; i<width; i++){ 1072 b1[i] += (b0[i] + b2[i])>>1; 1073 } 1074} 1075 1076static void vertical_compose53iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){ 1077 int i; 1078 1079 for(i=0; i<width; i++){ 1080 b1[i] -= (b0[i] + b2[i] + 2)>>2; 1081 } 1082} 1083 1084static void spatial_compose53i_buffered_init(DWTCompose *cs, slice_buffer * sb, int height, int stride_line){ 1085 cs->b0 = slice_buffer_get_line(sb, mirror(-1-1, height-1) * stride_line); 1086 cs->b1 = slice_buffer_get_line(sb, mirror(-1 , height-1) * stride_line); 1087 cs->y = -1; 1088} 1089 1090static void spatial_compose53i_init(DWTCompose *cs, IDWTELEM *buffer, int height, int stride){ 1091 cs->b0 = buffer + mirror(-1-1, height-1)*stride; 1092 cs->b1 = buffer + mirror(-1 , height-1)*stride; 1093 cs->y = -1; 1094} 1095 1096static void spatial_compose53i_dy_buffered(DWTCompose *cs, slice_buffer * sb, int width, int height, int stride_line){ 1097 int y= cs->y; 1098 1099 IDWTELEM *b0= cs->b0; 1100 IDWTELEM *b1= cs->b1; 1101 IDWTELEM *b2= slice_buffer_get_line(sb, mirror(y+1, height-1) * stride_line); 1102 IDWTELEM *b3= slice_buffer_get_line(sb, mirror(y+2, height-1) * stride_line); 1103 1104 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width); 1105 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width); 1106 1107 if(y-1<(unsigned)height) horizontal_compose53i(b0, width); 1108 if(y+0<(unsigned)height) horizontal_compose53i(b1, width); 1109 1110 cs->b0 = b2; 1111 cs->b1 = b3; 1112 cs->y += 2; 1113} 1114 1115static void spatial_compose53i_dy(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride){ 1116 int y= cs->y; 1117 IDWTELEM *b0= cs->b0; 1118 IDWTELEM *b1= cs->b1; 1119 IDWTELEM *b2= buffer + mirror(y+1, height-1)*stride; 1120 IDWTELEM *b3= buffer + mirror(y+2, height-1)*stride; 1121 1122 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width); 1123 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width); 1124 1125 if(y-1<(unsigned)height) horizontal_compose53i(b0, width); 1126 if(y+0<(unsigned)height) horizontal_compose53i(b1, width); 1127 1128 cs->b0 = b2; 1129 cs->b1 = b3; 1130 cs->y += 2; 1131} 1132 1133static void av_unused spatial_compose53i(IDWTELEM *buffer, int width, int height, int stride){ 1134 DWTCompose cs; 1135 spatial_compose53i_init(&cs, buffer, height, stride); 1136 while(cs.y <= height) 1137 spatial_compose53i_dy(&cs, buffer, width, height, stride); 1138} 1139 1140 1141void ff_snow_horizontal_compose97i(IDWTELEM *b, int width){ 1142 IDWTELEM temp[width]; 1143 const int w2= (width+1)>>1; 1144 1145 inv_lift (temp , b , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 1); 1146 inv_lift (temp+w2, b +w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 1); 1147 inv_liftS(b , temp , temp+w2, 2, 1, 1, width, W_BM, W_BO, W_BS, 0, 1); 1148 inv_lift (b+1 , temp+w2, b , 2, 1, 2, width, W_AM, W_AO, W_AS, 1, 0); 1149} 1150 1151static void vertical_compose97iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){ 1152 int i; 1153 1154 for(i=0; i<width; i++){ 1155 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS; 1156 } 1157} 1158 1159static void vertical_compose97iH1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){ 1160 int i; 1161 1162 for(i=0; i<width; i++){ 1163 b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS; 1164 } 1165} 1166 1167static void vertical_compose97iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){ 1168 int i; 1169 1170 for(i=0; i<width; i++){ 1171#ifdef liftS 1172 b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS; 1173#else 1174 b1[i] += (W_BM*(b0[i] + b2[i])+4*b1[i]+W_BO)>>W_BS; 1175#endif 1176 } 1177} 1178 1179static void vertical_compose97iL1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){ 1180 int i; 1181 1182 for(i=0; i<width; i++){ 1183 b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS; 1184 } 1185} 1186 1187void ff_snow_vertical_compose97i(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width){ 1188 int i; 1189 1190 for(i=0; i<width; i++){ 1191 b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS; 1192 b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS; 1193#ifdef liftS 1194 b2[i] += (W_BM*(b1[i] + b3[i])+W_BO)>>W_BS; 1195#else 1196 b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS; 1197#endif 1198 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS; 1199 } 1200} 1201 1202static void spatial_compose97i_buffered_init(DWTCompose *cs, slice_buffer * sb, int height, int stride_line){ 1203 cs->b0 = slice_buffer_get_line(sb, mirror(-3-1, height-1) * stride_line); 1204 cs->b1 = slice_buffer_get_line(sb, mirror(-3 , height-1) * stride_line); 1205 cs->b2 = slice_buffer_get_line(sb, mirror(-3+1, height-1) * stride_line); 1206 cs->b3 = slice_buffer_get_line(sb, mirror(-3+2, height-1) * stride_line); 1207 cs->y = -3; 1208} 1209 1210static void spatial_compose97i_init(DWTCompose *cs, IDWTELEM *buffer, int height, int stride){ 1211 cs->b0 = buffer + mirror(-3-1, height-1)*stride; 1212 cs->b1 = buffer + mirror(-3 , height-1)*stride; 1213 cs->b2 = buffer + mirror(-3+1, height-1)*stride; 1214 cs->b3 = buffer + mirror(-3+2, height-1)*stride; 1215 cs->y = -3; 1216} 1217 1218static void spatial_compose97i_dy_buffered(DSPContext *dsp, DWTCompose *cs, slice_buffer * sb, int width, int height, int stride_line){ 1219 int y = cs->y; 1220 1221 IDWTELEM *b0= cs->b0; 1222 IDWTELEM *b1= cs->b1; 1223 IDWTELEM *b2= cs->b2; 1224 IDWTELEM *b3= cs->b3; 1225 IDWTELEM *b4= slice_buffer_get_line(sb, mirror(y + 3, height - 1) * stride_line); 1226 IDWTELEM *b5= slice_buffer_get_line(sb, mirror(y + 4, height - 1) * stride_line); 1227 1228 if(y>0 && y+4<height){ 1229 dsp->vertical_compose97i(b0, b1, b2, b3, b4, b5, width); 1230 }else{ 1231 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width); 1232 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width); 1233 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width); 1234 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width); 1235 } 1236 1237 if(y-1<(unsigned)height) dsp->horizontal_compose97i(b0, width); 1238 if(y+0<(unsigned)height) dsp->horizontal_compose97i(b1, width); 1239 1240 cs->b0=b2; 1241 cs->b1=b3; 1242 cs->b2=b4; 1243 cs->b3=b5; 1244 cs->y += 2; 1245} 1246 1247static void spatial_compose97i_dy(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride){ 1248 int y = cs->y; 1249 IDWTELEM *b0= cs->b0; 1250 IDWTELEM *b1= cs->b1; 1251 IDWTELEM *b2= cs->b2; 1252 IDWTELEM *b3= cs->b3; 1253 IDWTELEM *b4= buffer + mirror(y+3, height-1)*stride; 1254 IDWTELEM *b5= buffer + mirror(y+4, height-1)*stride; 1255 1256 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width); 1257 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width); 1258 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width); 1259 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width); 1260 1261 if(y-1<(unsigned)height) ff_snow_horizontal_compose97i(b0, width); 1262 if(y+0<(unsigned)height) ff_snow_horizontal_compose97i(b1, width); 1263 1264 cs->b0=b2; 1265 cs->b1=b3; 1266 cs->b2=b4; 1267 cs->b3=b5; 1268 cs->y += 2; 1269} 1270 1271static void av_unused spatial_compose97i(IDWTELEM *buffer, int width, int height, int stride){ 1272 DWTCompose cs; 1273 spatial_compose97i_init(&cs, buffer, height, stride); 1274 while(cs.y <= height) 1275 spatial_compose97i_dy(&cs, buffer, width, height, stride); 1276} 1277 1278static void ff_spatial_idwt_buffered_init(DWTCompose *cs, slice_buffer * sb, int width, int height, int stride_line, int type, int decomposition_count){ 1279 int level; 1280 for(level=decomposition_count-1; level>=0; level--){ 1281 switch(type){ 1282 case DWT_97: spatial_compose97i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break; 1283 case DWT_53: spatial_compose53i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break; 1284 } 1285 } 1286} 1287 1288static void ff_spatial_idwt_init(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){ 1289 int level; 1290 for(level=decomposition_count-1; level>=0; level--){ 1291 switch(type){ 1292 case DWT_97: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break; 1293 case DWT_53: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break; 1294 } 1295 } 1296} 1297 1298static void ff_spatial_idwt_slice(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){ 1299 const int support = type==1 ? 3 : 5; 1300 int level; 1301 if(type==2) return; 1302 1303 for(level=decomposition_count-1; level>=0; level--){ 1304 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){ 1305 switch(type){ 1306 case DWT_97: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level); 1307 break; 1308 case DWT_53: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level); 1309 break; 1310 } 1311 } 1312 } 1313} 1314 1315static void ff_spatial_idwt_buffered_slice(DSPContext *dsp, DWTCompose *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){ 1316 const int support = type==1 ? 3 : 5; 1317 int level; 1318 if(type==2) return; 1319 1320 for(level=decomposition_count-1; level>=0; level--){ 1321 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){ 1322 switch(type){ 1323 case DWT_97: spatial_compose97i_dy_buffered(dsp, cs+level, slice_buf, width>>level, height>>level, stride_line<<level); 1324 break; 1325 case DWT_53: spatial_compose53i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level); 1326 break; 1327 } 1328 } 1329 } 1330} 1331 1332static void ff_spatial_idwt(IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){ 1333 DWTCompose cs[MAX_DECOMPOSITIONS]; 1334 int y; 1335 ff_spatial_idwt_init(cs, buffer, width, height, stride, type, decomposition_count); 1336 for(y=0; y<height; y+=4) 1337 ff_spatial_idwt_slice(cs, buffer, width, height, stride, type, decomposition_count, y); 1338} 1339 1340static int encode_subband_c0run(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){ 1341 const int w= b->width; 1342 const int h= b->height; 1343 int x, y; 1344 1345 if(1){ 1346 int run=0; 1347 int runs[w*h]; 1348 int run_index=0; 1349 int max_index; 1350 1351 for(y=0; y<h; y++){ 1352 for(x=0; x<w; x++){ 1353 int v, p=0; 1354 int /*ll=0, */l=0, lt=0, t=0, rt=0; 1355 v= src[x + y*stride]; 1356 1357 if(y){ 1358 t= src[x + (y-1)*stride]; 1359 if(x){ 1360 lt= src[x - 1 + (y-1)*stride]; 1361 } 1362 if(x + 1 < w){ 1363 rt= src[x + 1 + (y-1)*stride]; 1364 } 1365 } 1366 if(x){ 1367 l= src[x - 1 + y*stride]; 1368 /*if(x > 1){ 1369 if(orientation==1) ll= src[y + (x-2)*stride]; 1370 else ll= src[x - 2 + y*stride]; 1371 }*/ 1372 } 1373 if(parent){ 1374 int px= x>>1; 1375 int py= y>>1; 1376 if(px<b->parent->width && py<b->parent->height) 1377 p= parent[px + py*2*stride]; 1378 } 1379 if(!(/*ll|*/l|lt|t|rt|p)){ 1380 if(v){ 1381 runs[run_index++]= run; 1382 run=0; 1383 }else{ 1384 run++; 1385 } 1386 } 1387 } 1388 } 1389 max_index= run_index; 1390 runs[run_index++]= run; 1391 run_index=0; 1392 run= runs[run_index++]; 1393 1394 put_symbol2(&s->c, b->state[30], max_index, 0); 1395 if(run_index <= max_index) 1396 put_symbol2(&s->c, b->state[1], run, 3); 1397 1398 for(y=0; y<h; y++){ 1399 if(s->c.bytestream_end - s->c.bytestream < w*40){ 1400 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n"); 1401 return -1; 1402 } 1403 for(x=0; x<w; x++){ 1404 int v, p=0; 1405 int /*ll=0, */l=0, lt=0, t=0, rt=0; 1406 v= src[x + y*stride]; 1407 1408 if(y){ 1409 t= src[x + (y-1)*stride]; 1410 if(x){ 1411 lt= src[x - 1 + (y-1)*stride]; 1412 } 1413 if(x + 1 < w){ 1414 rt= src[x + 1 + (y-1)*stride]; 1415 } 1416 } 1417 if(x){ 1418 l= src[x - 1 + y*stride]; 1419 /*if(x > 1){ 1420 if(orientation==1) ll= src[y + (x-2)*stride]; 1421 else ll= src[x - 2 + y*stride]; 1422 }*/ 1423 } 1424 if(parent){ 1425 int px= x>>1; 1426 int py= y>>1; 1427 if(px<b->parent->width && py<b->parent->height) 1428 p= parent[px + py*2*stride]; 1429 } 1430 if(/*ll|*/l|lt|t|rt|p){ 1431 int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p)); 1432 1433 put_rac(&s->c, &b->state[0][context], !!v); 1434 }else{ 1435 if(!run){ 1436 run= runs[run_index++]; 1437 1438 if(run_index <= max_index) 1439 put_symbol2(&s->c, b->state[1], run, 3); 1440 assert(v); 1441 }else{ 1442 run--; 1443 assert(!v); 1444 } 1445 } 1446 if(v){ 1447 int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p)); 1448 int l2= 2*FFABS(l) + (l<0); 1449 int t2= 2*FFABS(t) + (t<0); 1450 1451 put_symbol2(&s->c, b->state[context + 2], FFABS(v)-1, context-4); 1452 put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l2&0xFF] + 3*quant3bA[t2&0xFF]], v<0); 1453 } 1454 } 1455 } 1456 } 1457 return 0; 1458} 1459 1460static int encode_subband(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){ 1461// encode_subband_qtree(s, b, src, parent, stride, orientation); 1462// encode_subband_z0run(s, b, src, parent, stride, orientation); 1463 return encode_subband_c0run(s, b, src, parent, stride, orientation); 1464// encode_subband_dzr(s, b, src, parent, stride, orientation); 1465} 1466 1467static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){ 1468 const int w= b->width; 1469 const int h= b->height; 1470 int x,y; 1471 1472 if(1){ 1473 int run, runs; 1474 x_and_coeff *xc= b->x_coeff; 1475 x_and_coeff *prev_xc= NULL; 1476 x_and_coeff *prev2_xc= xc; 1477 x_and_coeff *parent_xc= parent ? parent->x_coeff : NULL; 1478 x_and_coeff *prev_parent_xc= parent_xc; 1479 1480 runs= get_symbol2(&s->c, b->state[30], 0); 1481 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3); 1482 else run= INT_MAX; 1483 1484 for(y=0; y<h; y++){ 1485 int v=0; 1486 int lt=0, t=0, rt=0; 1487 1488 if(y && prev_xc->x == 0){ 1489 rt= prev_xc->coeff; 1490 } 1491 for(x=0; x<w; x++){ 1492 int p=0; 1493 const int l= v; 1494 1495 lt= t; t= rt; 1496 1497 if(y){ 1498 if(prev_xc->x <= x) 1499 prev_xc++; 1500 if(prev_xc->x == x + 1) 1501 rt= prev_xc->coeff; 1502 else 1503 rt=0; 1504 } 1505 if(parent_xc){ 1506 if(x>>1 > parent_xc->x){ 1507 parent_xc++; 1508 } 1509 if(x>>1 == parent_xc->x){ 1510 p= parent_xc->coeff; 1511 } 1512 } 1513 if(/*ll|*/l|lt|t|rt|p){ 1514 int context= av_log2(/*FFABS(ll) + */3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1)); 1515 1516 v=get_rac(&s->c, &b->state[0][context]); 1517 if(v){ 1518 v= 2*(get_symbol2(&s->c, b->state[context + 2], context-4) + 1); 1519 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l&0xFF] + 3*quant3bA[t&0xFF]]); 1520 1521 xc->x=x; 1522 (xc++)->coeff= v; 1523 } 1524 }else{ 1525 if(!run){ 1526 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3); 1527 else run= INT_MAX; 1528 v= 2*(get_symbol2(&s->c, b->state[0 + 2], 0-4) + 1); 1529 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3]); 1530 1531 xc->x=x; 1532 (xc++)->coeff= v; 1533 }else{ 1534 int max_run; 1535 run--; 1536 v=0; 1537 1538 if(y) max_run= FFMIN(run, prev_xc->x - x - 2); 1539 else max_run= FFMIN(run, w-x-1); 1540 if(parent_xc) 1541 max_run= FFMIN(max_run, 2*parent_xc->x - x - 1); 1542 x+= max_run; 1543 run-= max_run; 1544 } 1545 } 1546 } 1547 (xc++)->x= w+1; //end marker 1548 prev_xc= prev2_xc; 1549 prev2_xc= xc; 1550 1551 if(parent_xc){ 1552 if(y&1){ 1553 while(parent_xc->x != parent->width+1) 1554 parent_xc++; 1555 parent_xc++; 1556 prev_parent_xc= parent_xc; 1557 }else{ 1558 parent_xc= prev_parent_xc; 1559 } 1560 } 1561 } 1562 1563 (xc++)->x= w+1; //end marker 1564 } 1565} 1566 1567static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){ 1568 const int w= b->width; 1569 int y; 1570 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16); 1571 int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT); 1572 int qadd= (s->qbias*qmul)>>QBIAS_SHIFT; 1573 int new_index = 0; 1574 1575 if(b->ibuf == s->spatial_idwt_buffer || s->qlog == LOSSLESS_QLOG){ 1576 qadd= 0; 1577 qmul= 1<<QEXPSHIFT; 1578 } 1579 1580 /* If we are on the second or later slice, restore our index. */ 1581 if (start_y != 0) 1582 new_index = save_state[0]; 1583 1584 1585 for(y=start_y; y<h; y++){ 1586 int x = 0; 1587 int v; 1588 IDWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset; 1589 memset(line, 0, b->width*sizeof(IDWTELEM)); 1590 v = b->x_coeff[new_index].coeff; 1591 x = b->x_coeff[new_index++].x; 1592 while(x < w){ 1593 register int t= ( (v>>1)*qmul + qadd)>>QEXPSHIFT; 1594 register int u= -(v&1); 1595 line[x] = (t^u) - u; 1596 1597 v = b->x_coeff[new_index].coeff; 1598 x = b->x_coeff[new_index++].x; 1599 } 1600 } 1601 1602 /* Save our variables for the next slice. */ 1603 save_state[0] = new_index; 1604 1605 return; 1606} 1607 1608static void reset_contexts(SnowContext *s){ //FIXME better initial contexts 1609 int plane_index, level, orientation; 1610 1611 for(plane_index=0; plane_index<3; plane_index++){ 1612 for(level=0; level<MAX_DECOMPOSITIONS; level++){ 1613 for(orientation=level ? 1:0; orientation<4; orientation++){ 1614 memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state)); 1615 } 1616 } 1617 } 1618 memset(s->header_state, MID_STATE, sizeof(s->header_state)); 1619 memset(s->block_state, MID_STATE, sizeof(s->block_state)); 1620} 1621 1622static int alloc_blocks(SnowContext *s){ 1623 int w= -((-s->avctx->width )>>LOG2_MB_SIZE); 1624 int h= -((-s->avctx->height)>>LOG2_MB_SIZE); 1625 1626 s->b_width = w; 1627 s->b_height= h; 1628 1629 av_free(s->block); 1630 s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2)); 1631 return 0; 1632} 1633 1634static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){ 1635 uint8_t *bytestream= d->bytestream; 1636 uint8_t *bytestream_start= d->bytestream_start; 1637 *d= *s; 1638 d->bytestream= bytestream; 1639 d->bytestream_start= bytestream_start; 1640} 1641 1642//near copy & paste from dsputil, FIXME 1643static int pix_sum(uint8_t * pix, int line_size, int w) 1644{ 1645 int s, i, j; 1646 1647 s = 0; 1648 for (i = 0; i < w; i++) { 1649 for (j = 0; j < w; j++) { 1650 s += pix[0]; 1651 pix ++; 1652 } 1653 pix += line_size - w; 1654 } 1655 return s; 1656} 1657 1658//near copy & paste from dsputil, FIXME 1659static int pix_norm1(uint8_t * pix, int line_size, int w) 1660{ 1661 int s, i, j; 1662 uint32_t *sq = ff_squareTbl + 256; 1663 1664 s = 0; 1665 for (i = 0; i < w; i++) { 1666 for (j = 0; j < w; j ++) { 1667 s += sq[pix[0]]; 1668 pix ++; 1669 } 1670 pix += line_size - w; 1671 } 1672 return s; 1673} 1674 1675static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int ref, int type){ 1676 const int w= s->b_width << s->block_max_depth; 1677 const int rem_depth= s->block_max_depth - level; 1678 const int index= (x + y*w) << rem_depth; 1679 const int block_w= 1<<rem_depth; 1680 BlockNode block; 1681 int i,j; 1682 1683 block.color[0]= l; 1684 block.color[1]= cb; 1685 block.color[2]= cr; 1686 block.mx= mx; 1687 block.my= my; 1688 block.ref= ref; 1689 block.type= type; 1690 block.level= level; 1691 1692 for(j=0; j<block_w; j++){ 1693 for(i=0; i<block_w; i++){ 1694 s->block[index + i + j*w]= block; 1695 } 1696 } 1697} 1698 1699static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){ 1700 const int offset[3]= { 1701 y*c-> stride + x, 1702 ((y*c->uvstride + x)>>1), 1703 ((y*c->uvstride + x)>>1), 1704 }; 1705 int i; 1706 for(i=0; i<3; i++){ 1707 c->src[0][i]= src [i]; 1708 c->ref[0][i]= ref [i] + offset[i]; 1709 } 1710 assert(!ref_index); 1711} 1712 1713static inline void pred_mv(SnowContext *s, int *mx, int *my, int ref, 1714 const BlockNode *left, const BlockNode *top, const BlockNode *tr){ 1715 if(s->ref_frames == 1){ 1716 *mx = mid_pred(left->mx, top->mx, tr->mx); 1717 *my = mid_pred(left->my, top->my, tr->my); 1718 }else{ 1719 const int *scale = scale_mv_ref[ref]; 1720 *mx = mid_pred((left->mx * scale[left->ref] + 128) >>8, 1721 (top ->mx * scale[top ->ref] + 128) >>8, 1722 (tr ->mx * scale[tr ->ref] + 128) >>8); 1723 *my = mid_pred((left->my * scale[left->ref] + 128) >>8, 1724 (top ->my * scale[top ->ref] + 128) >>8, 1725 (tr ->my * scale[tr ->ref] + 128) >>8); 1726 } 1727} 1728 1729//FIXME copy&paste 1730#define P_LEFT P[1] 1731#define P_TOP P[2] 1732#define P_TOPRIGHT P[3] 1733#define P_MEDIAN P[4] 1734#define P_MV1 P[9] 1735#define FLAG_QPEL 1 //must be 1 1736 1737static int encode_q_branch(SnowContext *s, int level, int x, int y){ 1738 uint8_t p_buffer[1024]; 1739 uint8_t i_buffer[1024]; 1740 uint8_t p_state[sizeof(s->block_state)]; 1741 uint8_t i_state[sizeof(s->block_state)]; 1742 RangeCoder pc, ic; 1743 uint8_t *pbbak= s->c.bytestream; 1744 uint8_t *pbbak_start= s->c.bytestream_start; 1745 int score, score2, iscore, i_len, p_len, block_s, sum, base_bits; 1746 const int w= s->b_width << s->block_max_depth; 1747 const int h= s->b_height << s->block_max_depth; 1748 const int rem_depth= s->block_max_depth - level; 1749 const int index= (x + y*w) << rem_depth; 1750 const int block_w= 1<<(LOG2_MB_SIZE - level); 1751 int trx= (x+1)<<rem_depth; 1752 int try= (y+1)<<rem_depth; 1753 const BlockNode *left = x ? &s->block[index-1] : &null_block; 1754 const BlockNode *top = y ? &s->block[index-w] : &null_block; 1755 const BlockNode *right = trx<w ? &s->block[index+1] : &null_block; 1756 const BlockNode *bottom= try<h ? &s->block[index+w] : &null_block; 1757 const BlockNode *tl = y && x ? &s->block[index-w-1] : left; 1758 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt 1759 int pl = left->color[0]; 1760 int pcb= left->color[1]; 1761 int pcr= left->color[2]; 1762 int pmx, pmy; 1763 int mx=0, my=0; 1764 int l,cr,cb; 1765 const int stride= s->current_picture.linesize[0]; 1766 const int uvstride= s->current_picture.linesize[1]; 1767 uint8_t *current_data[3]= { s->input_picture.data[0] + (x + y* stride)*block_w, 1768 s->input_picture.data[1] + (x + y*uvstride)*block_w/2, 1769 s->input_picture.data[2] + (x + y*uvstride)*block_w/2}; 1770 int P[10][2]; 1771 int16_t last_mv[3][2]; 1772 int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused 1773 const int shift= 1+qpel; 1774 MotionEstContext *c= &s->m.me; 1775 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref); 1776 int mx_context= av_log2(2*FFABS(left->mx - top->mx)); 1777 int my_context= av_log2(2*FFABS(left->my - top->my)); 1778 int s_context= 2*left->level + 2*top->level + tl->level + tr->level; 1779 int ref, best_ref, ref_score, ref_mx, ref_my; 1780 1781 assert(sizeof(s->block_state) >= 256); 1782 if(s->keyframe){ 1783 set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA); 1784 return 0; 1785 } 1786 1787// clip predictors / edge ? 1788 1789 P_LEFT[0]= left->mx; 1790 P_LEFT[1]= left->my; 1791 P_TOP [0]= top->mx; 1792 P_TOP [1]= top->my; 1793 P_TOPRIGHT[0]= tr->mx; 1794 P_TOPRIGHT[1]= tr->my; 1795 1796 last_mv[0][0]= s->block[index].mx; 1797 last_mv[0][1]= s->block[index].my; 1798 last_mv[1][0]= right->mx; 1799 last_mv[1][1]= right->my; 1800 last_mv[2][0]= bottom->mx; 1801 last_mv[2][1]= bottom->my; 1802 1803 s->m.mb_stride=2; 1804 s->m.mb_x= 1805 s->m.mb_y= 0; 1806 c->skip= 0; 1807 1808 assert(c-> stride == stride); 1809 assert(c->uvstride == uvstride); 1810 1811 c->penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp); 1812 c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp); 1813 c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp); 1814 c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV; 1815 1816 c->xmin = - x*block_w - 16+2; 1817 c->ymin = - y*block_w - 16+2; 1818 c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2; 1819 c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2; 1820 1821 if(P_LEFT[0] > (c->xmax<<shift)) P_LEFT[0] = (c->xmax<<shift); 1822 if(P_LEFT[1] > (c->ymax<<shift)) P_LEFT[1] = (c->ymax<<shift); 1823 if(P_TOP[0] > (c->xmax<<shift)) P_TOP[0] = (c->xmax<<shift); 1824 if(P_TOP[1] > (c->ymax<<shift)) P_TOP[1] = (c->ymax<<shift); 1825 if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift); 1826 if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip 1827 if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift); 1828 1829 P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]); 1830 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]); 1831 1832 if (!y) { 1833 c->pred_x= P_LEFT[0]; 1834 c->pred_y= P_LEFT[1]; 1835 } else { 1836 c->pred_x = P_MEDIAN[0]; 1837 c->pred_y = P_MEDIAN[1]; 1838 } 1839 1840 score= INT_MAX; 1841 best_ref= 0; 1842 for(ref=0; ref<s->ref_frames; ref++){ 1843 init_ref(c, current_data, s->last_picture[ref].data, NULL, block_w*x, block_w*y, 0); 1844 1845 ref_score= ff_epzs_motion_search(&s->m, &ref_mx, &ref_my, P, 0, /*ref_index*/ 0, last_mv, 1846 (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w); 1847 1848 assert(ref_mx >= c->xmin); 1849 assert(ref_mx <= c->xmax); 1850 assert(ref_my >= c->ymin); 1851 assert(ref_my <= c->ymax); 1852 1853 ref_score= c->sub_motion_search(&s->m, &ref_mx, &ref_my, ref_score, 0, 0, level-LOG2_MB_SIZE+4, block_w); 1854 ref_score= ff_get_mb_score(&s->m, ref_mx, ref_my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0); 1855 ref_score+= 2*av_log2(2*ref)*c->penalty_factor; 1856 if(s->ref_mvs[ref]){ 1857 s->ref_mvs[ref][index][0]= ref_mx; 1858 s->ref_mvs[ref][index][1]= ref_my; 1859 s->ref_scores[ref][index]= ref_score; 1860 } 1861 if(score > ref_score){ 1862 score= ref_score; 1863 best_ref= ref; 1864 mx= ref_mx; 1865 my= ref_my; 1866 } 1867 } 1868 //FIXME if mb_cmp != SSE then intra cannot be compared currently and mb_penalty vs. lambda2 1869 1870 // subpel search 1871 base_bits= get_rac_count(&s->c) - 8*(s->c.bytestream - s->c.bytestream_start); 1872 pc= s->c; 1873 pc.bytestream_start= 1874 pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo 1875 memcpy(p_state, s->block_state, sizeof(s->block_state)); 1876 1877 if(level!=s->block_max_depth) 1878 put_rac(&pc, &p_state[4 + s_context], 1); 1879 put_rac(&pc, &p_state[1 + left->type + top->type], 0); 1880 if(s->ref_frames > 1) 1881 put_symbol(&pc, &p_state[128 + 1024 + 32*ref_context], best_ref, 0); 1882 pred_mv(s, &pmx, &pmy, best_ref, left, top, tr); 1883 put_symbol(&pc, &p_state[128 + 32*(mx_context + 16*!!best_ref)], mx - pmx, 1); 1884 put_symbol(&pc, &p_state[128 + 32*(my_context + 16*!!best_ref)], my - pmy, 1); 1885 p_len= pc.bytestream - pc.bytestream_start; 1886 score += (s->lambda2*(get_rac_count(&pc)-base_bits))>>FF_LAMBDA_SHIFT; 1887 1888 block_s= block_w*block_w; 1889 sum = pix_sum(current_data[0], stride, block_w); 1890 l= (sum + block_s/2)/block_s; 1891 iscore = pix_norm1(current_data[0], stride, block_w) - 2*l*sum + l*l*block_s; 1892 1893 block_s= block_w*block_w>>2; 1894 sum = pix_sum(current_data[1], uvstride, block_w>>1); 1895 cb= (sum + block_s/2)/block_s; 1896// iscore += pix_norm1(¤t_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s; 1897 sum = pix_sum(current_data[2], uvstride, block_w>>1); 1898 cr= (sum + block_s/2)/block_s; 1899// iscore += pix_norm1(¤t_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s; 1900 1901 ic= s->c; 1902 ic.bytestream_start= 1903 ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo 1904 memcpy(i_state, s->block_state, sizeof(s->block_state)); 1905 if(level!=s->block_max_depth) 1906 put_rac(&ic, &i_state[4 + s_context], 1); 1907 put_rac(&ic, &i_state[1 + left->type + top->type], 1); 1908 put_symbol(&ic, &i_state[32], l-pl , 1); 1909 put_symbol(&ic, &i_state[64], cb-pcb, 1); 1910 put_symbol(&ic, &i_state[96], cr-pcr, 1); 1911 i_len= ic.bytestream - ic.bytestream_start; 1912 iscore += (s->lambda2*(get_rac_count(&ic)-base_bits))>>FF_LAMBDA_SHIFT; 1913 1914// assert(score==256*256*256*64-1); 1915 assert(iscore < 255*255*256 + s->lambda2*10); 1916 assert(iscore >= 0); 1917 assert(l>=0 && l<=255); 1918 assert(pl>=0 && pl<=255); 1919 1920 if(level==0){ 1921 int varc= iscore >> 8; 1922 int vard= score >> 8; 1923 if (vard <= 64 || vard < varc) 1924 c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc); 1925 else 1926 c->scene_change_score+= s->m.qscale; 1927 } 1928 1929 if(level!=s->block_max_depth){ 1930 put_rac(&s->c, &s->block_state[4 + s_context], 0); 1931 score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0); 1932 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0); 1933 score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1); 1934 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1); 1935 score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead 1936 1937 if(score2 < score && score2 < iscore) 1938 return score2; 1939 } 1940 1941 if(iscore < score){ 1942 pred_mv(s, &pmx, &pmy, 0, left, top, tr); 1943 memcpy(pbbak, i_buffer, i_len); 1944 s->c= ic; 1945 s->c.bytestream_start= pbbak_start; 1946 s->c.bytestream= pbbak + i_len; 1947 set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, 0, BLOCK_INTRA); 1948 memcpy(s->block_state, i_state, sizeof(s->block_state)); 1949 return iscore; 1950 }else{ 1951 memcpy(pbbak, p_buffer, p_len); 1952 s->c= pc; 1953 s->c.bytestream_start= pbbak_start; 1954 s->c.bytestream= pbbak + p_len; 1955 set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, best_ref, 0); 1956 memcpy(s->block_state, p_state, sizeof(s->block_state)); 1957 return score; 1958 } 1959} 1960 1961static av_always_inline int same_block(BlockNode *a, BlockNode *b){ 1962 if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){ 1963 return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2])); 1964 }else{ 1965 return !((a->mx - b->mx) | (a->my - b->my) | (a->ref - b->ref) | ((a->type ^ b->type)&BLOCK_INTRA)); 1966 } 1967} 1968 1969static void encode_q_branch2(SnowContext *s, int level, int x, int y){ 1970 const int w= s->b_width << s->block_max_depth; 1971 const int rem_depth= s->block_max_depth - level; 1972 const int index= (x + y*w) << rem_depth; 1973 int trx= (x+1)<<rem_depth; 1974 BlockNode *b= &s->block[index]; 1975 const BlockNode *left = x ? &s->block[index-1] : &null_block; 1976 const BlockNode *top = y ? &s->block[index-w] : &null_block; 1977 const BlockNode *tl = y && x ? &s->block[index-w-1] : left; 1978 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt 1979 int pl = left->color[0]; 1980 int pcb= left->color[1]; 1981 int pcr= left->color[2]; 1982 int pmx, pmy; 1983 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref); 1984 int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 16*!!b->ref; 1985 int my_context= av_log2(2*FFABS(left->my - top->my)) + 16*!!b->ref; 1986 int s_context= 2*left->level + 2*top->level + tl->level + tr->level; 1987 1988 if(s->keyframe){ 1989 set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA); 1990 return; 1991 } 1992 1993 if(level!=s->block_max_depth){ 1994 if(same_block(b,b+1) && same_block(b,b+w) && same_block(b,b+w+1)){ 1995 put_rac(&s->c, &s->block_state[4 + s_context], 1); 1996 }else{ 1997 put_rac(&s->c, &s->block_state[4 + s_context], 0); 1998 encode_q_branch2(s, level+1, 2*x+0, 2*y+0); 1999 encode_q_branch2(s, level+1, 2*x+1, 2*y+0); 2000 encode_q_branch2(s, level+1, 2*x+0, 2*y+1); 2001 encode_q_branch2(s, level+1, 2*x+1, 2*y+1); 2002 return; 2003 } 2004 } 2005 if(b->type & BLOCK_INTRA){ 2006 pred_mv(s, &pmx, &pmy, 0, left, top, tr); 2007 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 1); 2008 put_symbol(&s->c, &s->block_state[32], b->color[0]-pl , 1); 2009 put_symbol(&s->c, &s->block_state[64], b->color[1]-pcb, 1); 2010 put_symbol(&s->c, &s->block_state[96], b->color[2]-pcr, 1); 2011 set_blocks(s, level, x, y, b->color[0], b->color[1], b->color[2], pmx, pmy, 0, BLOCK_INTRA); 2012 }else{ 2013 pred_mv(s, &pmx, &pmy, b->ref, left, top, tr); 2014 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 0); 2015 if(s->ref_frames > 1) 2016 put_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], b->ref, 0); 2017 put_symbol(&s->c, &s->block_state[128 + 32*mx_context], b->mx - pmx, 1); 2018 put_symbol(&s->c, &s->block_state[128 + 32*my_context], b->my - pmy, 1); 2019 set_blocks(s, level, x, y, pl, pcb, pcr, b->mx, b->my, b->ref, 0); 2020 } 2021} 2022 2023static void decode_q_branch(SnowContext *s, int level, int x, int y){ 2024 const int w= s->b_width << s->block_max_depth; 2025 const int rem_depth= s->block_max_depth - level; 2026 const int index= (x + y*w) << rem_depth; 2027 int trx= (x+1)<<rem_depth; 2028 const BlockNode *left = x ? &s->block[index-1] : &null_block; 2029 const BlockNode *top = y ? &s->block[index-w] : &null_block; 2030 const BlockNode *tl = y && x ? &s->block[index-w-1] : left; 2031 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt 2032 int s_context= 2*left->level + 2*top->level + tl->level + tr->level; 2033 2034 if(s->keyframe){ 2035 set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, null_block.ref, BLOCK_INTRA); 2036 return; 2037 } 2038 2039 if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){ 2040 int type, mx, my; 2041 int l = left->color[0]; 2042 int cb= left->color[1]; 2043 int cr= left->color[2]; 2044 int ref = 0; 2045 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref); 2046 int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 0*av_log2(2*FFABS(tr->mx - top->mx)); 2047 int my_context= av_log2(2*FFABS(left->my - top->my)) + 0*av_log2(2*FFABS(tr->my - top->my)); 2048 2049 type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0; 2050 2051 if(type){ 2052 pred_mv(s, &mx, &my, 0, left, top, tr); 2053 l += get_symbol(&s->c, &s->block_state[32], 1); 2054 cb+= get_symbol(&s->c, &s->block_state[64], 1); 2055 cr+= get_symbol(&s->c, &s->block_state[96], 1); 2056 }else{ 2057 if(s->ref_frames > 1) 2058 ref= get_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], 0); 2059 pred_mv(s, &mx, &my, ref, left, top, tr); 2060 mx+= get_symbol(&s->c, &s->block_state[128 + 32*(mx_context + 16*!!ref)], 1); 2061 my+= get_symbol(&s->c, &s->block_state[128 + 32*(my_context + 16*!!ref)], 1); 2062 } 2063 set_blocks(s, level, x, y, l, cb, cr, mx, my, ref, type); 2064 }else{ 2065 decode_q_branch(s, level+1, 2*x+0, 2*y+0); 2066 decode_q_branch(s, level+1, 2*x+1, 2*y+0); 2067 decode_q_branch(s, level+1, 2*x+0, 2*y+1); 2068 decode_q_branch(s, level+1, 2*x+1, 2*y+1); 2069 } 2070} 2071 2072static void encode_blocks(SnowContext *s, int search){ 2073 int x, y; 2074 int w= s->b_width; 2075 int h= s->b_height; 2076 2077 if(s->avctx->me_method == ME_ITER && !s->keyframe && search) 2078 iterative_me(s); 2079 2080 for(y=0; y<h; y++){ 2081 if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit 2082 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n"); 2083 return; 2084 } 2085 for(x=0; x<w; x++){ 2086 if(s->avctx->me_method == ME_ITER || !search) 2087 encode_q_branch2(s, 0, x, y); 2088 else 2089 encode_q_branch (s, 0, x, y); 2090 } 2091 } 2092} 2093 2094static void decode_blocks(SnowContext *s){ 2095 int x, y; 2096 int w= s->b_width; 2097 int h= s->b_height; 2098 2099 for(y=0; y<h; y++){ 2100 for(x=0; x<w; x++){ 2101 decode_q_branch(s, 0, x, y); 2102 } 2103 } 2104} 2105 2106static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){ 2107 static const uint8_t weight[64]={ 2108 8,7,6,5,4,3,2,1, 2109 7,7,0,0,0,0,0,1, 2110 6,0,6,0,0,0,2,0, 2111 5,0,0,5,0,3,0,0, 2112 4,0,0,0,4,0,0,0, 2113 3,0,0,5,0,3,0,0, 2114 2,0,6,0,0,0,2,0, 2115 1,7,0,0,0,0,0,1, 2116 }; 2117 2118 static const uint8_t brane[256]={ 2119 0x00,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x11,0x12,0x12,0x12,0x12,0x12,0x12,0x12, 2120 0x04,0x05,0xcc,0xcc,0xcc,0xcc,0xcc,0x41,0x15,0x16,0xcc,0xcc,0xcc,0xcc,0xcc,0x52, 2121 0x04,0xcc,0x05,0xcc,0xcc,0xcc,0x41,0xcc,0x15,0xcc,0x16,0xcc,0xcc,0xcc,0x52,0xcc, 2122 0x04,0xcc,0xcc,0x05,0xcc,0x41,0xcc,0xcc,0x15,0xcc,0xcc,0x16,0xcc,0x52,0xcc,0xcc, 2123 0x04,0xcc,0xcc,0xcc,0x41,0xcc,0xcc,0xcc,0x15,0xcc,0xcc,0xcc,0x16,0xcc,0xcc,0xcc, 2124 0x04,0xcc,0xcc,0x41,0xcc,0x05,0xcc,0xcc,0x15,0xcc,0xcc,0x52,0xcc,0x16,0xcc,0xcc, 2125 0x04,0xcc,0x41,0xcc,0xcc,0xcc,0x05,0xcc,0x15,0xcc,0x52,0xcc,0xcc,0xcc,0x16,0xcc, 2126 0x04,0x41,0xcc,0xcc,0xcc,0xcc,0xcc,0x05,0x15,0x52,0xcc,0xcc,0xcc,0xcc,0xcc,0x16, 2127 0x44,0x45,0x45,0x45,0x45,0x45,0x45,0x45,0x55,0x56,0x56,0x56,0x56,0x56,0x56,0x56, 2128 0x48,0x49,0xcc,0xcc,0xcc,0xcc,0xcc,0x85,0x59,0x5A,0xcc,0xcc,0xcc,0xcc,0xcc,0x96, 2129 0x48,0xcc,0x49,0xcc,0xcc,0xcc,0x85,0xcc,0x59,0xcc,0x5A,0xcc,0xcc,0xcc,0x96,0xcc, 2130 0x48,0xcc,0xcc,0x49,0xcc,0x85,0xcc,0xcc,0x59,0xcc,0xcc,0x5A,0xcc,0x96,0xcc,0xcc, 2131 0x48,0xcc,0xcc,0xcc,0x49,0xcc,0xcc,0xcc,0x59,0xcc,0xcc,0xcc,0x96,0xcc,0xcc,0xcc, 2132 0x48,0xcc,0xcc,0x85,0xcc,0x49,0xcc,0xcc,0x59,0xcc,0xcc,0x96,0xcc,0x5A,0xcc,0xcc, 2133 0x48,0xcc,0x85,0xcc,0xcc,0xcc,0x49,0xcc,0x59,0xcc,0x96,0xcc,0xcc,0xcc,0x5A,0xcc, 2134 0x48,0x85,0xcc,0xcc,0xcc,0xcc,0xcc,0x49,0x59,0x96,0xcc,0xcc,0xcc,0xcc,0xcc,0x5A, 2135 }; 2136 2137 static const uint8_t needs[16]={ 2138 0,1,0,0, 2139 2,4,2,0, 2140 0,1,0,0, 2141 15 2142 }; 2143 2144 int x, y, b, r, l; 2145 int16_t tmpIt [64*(32+HTAPS_MAX)]; 2146 uint8_t tmp2t[3][stride*(32+HTAPS_MAX)]; 2147 int16_t *tmpI= tmpIt; 2148 uint8_t *tmp2= tmp2t[0]; 2149 const uint8_t *hpel[11]; 2150 assert(dx<16 && dy<16); 2151 r= brane[dx + 16*dy]&15; 2152 l= brane[dx + 16*dy]>>4; 2153 2154 b= needs[l] | needs[r]; 2155 if(p && !p->diag_mc) 2156 b= 15; 2157 2158 if(b&5){ 2159 for(y=0; y < b_h+HTAPS_MAX-1; y++){ 2160 for(x=0; x < b_w; x++){ 2161 int a_1=src[x + HTAPS_MAX/2-4]; 2162 int a0= src[x + HTAPS_MAX/2-3]; 2163 int a1= src[x + HTAPS_MAX/2-2]; 2164 int a2= src[x + HTAPS_MAX/2-1]; 2165 int a3= src[x + HTAPS_MAX/2+0]; 2166 int a4= src[x + HTAPS_MAX/2+1]; 2167 int a5= src[x + HTAPS_MAX/2+2]; 2168 int a6= src[x + HTAPS_MAX/2+3]; 2169 int am=0; 2170 if(!p || p->fast_mc){ 2171 am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5); 2172 tmpI[x]= am; 2173 am= (am+16)>>5; 2174 }else{ 2175 am= p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6); 2176 tmpI[x]= am; 2177 am= (am+32)>>6; 2178 } 2179 2180 if(am&(~255)) am= ~(am>>31); 2181 tmp2[x]= am; 2182 } 2183 tmpI+= 64; 2184 tmp2+= stride; 2185 src += stride; 2186 } 2187 src -= stride*y; 2188 } 2189 src += HTAPS_MAX/2 - 1; 2190 tmp2= tmp2t[1]; 2191 2192 if(b&2){ 2193 for(y=0; y < b_h; y++){ 2194 for(x=0; x < b_w+1; x++){ 2195 int a_1=src[x + (HTAPS_MAX/2-4)*stride]; 2196 int a0= src[x + (HTAPS_MAX/2-3)*stride]; 2197 int a1= src[x + (HTAPS_MAX/2-2)*stride]; 2198 int a2= src[x + (HTAPS_MAX/2-1)*stride]; 2199 int a3= src[x + (HTAPS_MAX/2+0)*stride]; 2200 int a4= src[x + (HTAPS_MAX/2+1)*stride]; 2201 int a5= src[x + (HTAPS_MAX/2+2)*stride]; 2202 int a6= src[x + (HTAPS_MAX/2+3)*stride]; 2203 int am=0; 2204 if(!p || p->fast_mc) 2205 am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 16)>>5; 2206 else 2207 am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 32)>>6; 2208 2209 if(am&(~255)) am= ~(am>>31); 2210 tmp2[x]= am; 2211 } 2212 src += stride; 2213 tmp2+= stride; 2214 } 2215 src -= stride*y; 2216 } 2217 src += stride*(HTAPS_MAX/2 - 1); 2218 tmp2= tmp2t[2]; 2219 tmpI= tmpIt; 2220 if(b&4){ 2221 for(y=0; y < b_h; y++){ 2222 for(x=0; x < b_w; x++){ 2223 int a_1=tmpI[x + (HTAPS_MAX/2-4)*64]; 2224 int a0= tmpI[x + (HTAPS_MAX/2-3)*64]; 2225 int a1= tmpI[x + (HTAPS_MAX/2-2)*64]; 2226 int a2= tmpI[x + (HTAPS_MAX/2-1)*64]; 2227 int a3= tmpI[x + (HTAPS_MAX/2+0)*64]; 2228 int a4= tmpI[x + (HTAPS_MAX/2+1)*64]; 2229 int a5= tmpI[x + (HTAPS_MAX/2+2)*64]; 2230 int a6= tmpI[x + (HTAPS_MAX/2+3)*64]; 2231 int am=0; 2232 if(!p || p->fast_mc) 2233 am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 512)>>10; 2234 else 2235 am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 2048)>>12; 2236 if(am&(~255)) am= ~(am>>31); 2237 tmp2[x]= am; 2238 } 2239 tmpI+= 64; 2240 tmp2+= stride; 2241 } 2242 } 2243 2244 hpel[ 0]= src; 2245 hpel[ 1]= tmp2t[0] + stride*(HTAPS_MAX/2-1); 2246 hpel[ 2]= src + 1; 2247 2248 hpel[ 4]= tmp2t[1]; 2249 hpel[ 5]= tmp2t[2]; 2250 hpel[ 6]= tmp2t[1] + 1; 2251 2252 hpel[ 8]= src + stride; 2253 hpel[ 9]= hpel[1] + stride; 2254 hpel[10]= hpel[8] + 1; 2255 2256 if(b==15){ 2257 const uint8_t *src1= hpel[dx/8 + dy/8*4 ]; 2258 const uint8_t *src2= hpel[dx/8 + dy/8*4+1]; 2259 const uint8_t *src3= hpel[dx/8 + dy/8*4+4]; 2260 const uint8_t *src4= hpel[dx/8 + dy/8*4+5]; 2261 dx&=7; 2262 dy&=7; 2263 for(y=0; y < b_h; y++){ 2264 for(x=0; x < b_w; x++){ 2265 dst[x]= ((8-dx)*(8-dy)*src1[x] + dx*(8-dy)*src2[x]+ 2266 (8-dx)* dy *src3[x] + dx* dy *src4[x]+32)>>6; 2267 } 2268 src1+=stride; 2269 src2+=stride; 2270 src3+=stride; 2271 src4+=stride; 2272 dst +=stride; 2273 } 2274 }else{ 2275 const uint8_t *src1= hpel[l]; 2276 const uint8_t *src2= hpel[r]; 2277 int a= weight[((dx&7) + (8*(dy&7)))]; 2278 int b= 8-a; 2279 for(y=0; y < b_h; y++){ 2280 for(x=0; x < b_w; x++){ 2281 dst[x]= (a*src1[x] + b*src2[x] + 4)>>3; 2282 } 2283 src1+=stride; 2284 src2+=stride; 2285 dst +=stride; 2286 } 2287 } 2288} 2289 2290#define mca(dx,dy,b_w)\ 2291static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, int stride, int h){\ 2292 uint8_t tmp[stride*(b_w+HTAPS_MAX-1)];\ 2293 assert(h==b_w);\ 2294 mc_block(NULL, dst, src-(HTAPS_MAX/2-1)-(HTAPS_MAX/2-1)*stride, tmp, stride, b_w, b_w, dx, dy);\ 2295} 2296 2297mca( 0, 0,16) 2298mca( 8, 0,16) 2299mca( 0, 8,16) 2300mca( 8, 8,16) 2301mca( 0, 0,8) 2302mca( 8, 0,8) 2303mca( 0, 8,8) 2304mca( 8, 8,8) 2305 2306static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){ 2307 if(block->type & BLOCK_INTRA){ 2308 int x, y; 2309 const int color = block->color[plane_index]; 2310 const int color4= color*0x01010101; 2311 if(b_w==32){ 2312 for(y=0; y < b_h; y++){ 2313 *(uint32_t*)&dst[0 + y*stride]= color4; 2314 *(uint32_t*)&dst[4 + y*stride]= color4; 2315 *(uint32_t*)&dst[8 + y*stride]= color4; 2316 *(uint32_t*)&dst[12+ y*stride]= color4; 2317 *(uint32_t*)&dst[16+ y*stride]= color4; 2318 *(uint32_t*)&dst[20+ y*stride]= color4; 2319 *(uint32_t*)&dst[24+ y*stride]= color4; 2320 *(uint32_t*)&dst[28+ y*stride]= color4; 2321 } 2322 }else if(b_w==16){ 2323 for(y=0; y < b_h; y++){ 2324 *(uint32_t*)&dst[0 + y*stride]= color4; 2325 *(uint32_t*)&dst[4 + y*stride]= color4; 2326 *(uint32_t*)&dst[8 + y*stride]= color4; 2327 *(uint32_t*)&dst[12+ y*stride]= color4; 2328 } 2329 }else if(b_w==8){ 2330 for(y=0; y < b_h; y++){ 2331 *(uint32_t*)&dst[0 + y*stride]= color4; 2332 *(uint32_t*)&dst[4 + y*stride]= color4; 2333 } 2334 }else if(b_w==4){ 2335 for(y=0; y < b_h; y++){ 2336 *(uint32_t*)&dst[0 + y*stride]= color4; 2337 } 2338 }else{ 2339 for(y=0; y < b_h; y++){ 2340 for(x=0; x < b_w; x++){ 2341 dst[x + y*stride]= color; 2342 } 2343 } 2344 } 2345 }else{ 2346 uint8_t *src= s->last_picture[block->ref].data[plane_index]; 2347 const int scale= plane_index ? s->mv_scale : 2*s->mv_scale; 2348 int mx= block->mx*scale; 2349 int my= block->my*scale; 2350 const int dx= mx&15; 2351 const int dy= my&15; 2352 const int tab_index= 3 - (b_w>>2) + (b_w>>4); 2353 sx += (mx>>4) - (HTAPS_MAX/2-1); 2354 sy += (my>>4) - (HTAPS_MAX/2-1); 2355 src += sx + sy*stride; 2356 if( (unsigned)sx >= w - b_w - (HTAPS_MAX-2) 2357 || (unsigned)sy >= h - b_h - (HTAPS_MAX-2)){ 2358 ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+HTAPS_MAX-1, b_h+HTAPS_MAX-1, sx, sy, w, h); 2359 src= tmp + MB_SIZE; 2360 } 2361// assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h); 2362// assert(!(b_w&(b_w-1))); 2363 assert(b_w>1 && b_h>1); 2364 assert((tab_index>=0 && tab_index<4) || b_w==32); 2365 if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1)) || !s->plane[plane_index].fast_mc ) 2366 mc_block(&s->plane[plane_index], dst, src, tmp, stride, b_w, b_h, dx, dy); 2367 else if(b_w==32){ 2368 int y; 2369 for(y=0; y<b_h; y+=16){ 2370 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 3 + (y+3)*stride,stride); 2371 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 19 + (y+3)*stride,stride); 2372 } 2373 }else if(b_w==b_h) 2374 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst,src + 3 + 3*stride,stride); 2375 else if(b_w==2*b_h){ 2376 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst ,src + 3 + 3*stride,stride); 2377 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 3 + b_h + 3*stride,stride); 2378 }else{ 2379 assert(2*b_w==b_h); 2380 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst ,src + 3 + 3*stride ,stride); 2381 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst+b_w*stride,src + 3 + 3*stride+b_w*stride,stride); 2382 } 2383 } 2384} 2385 2386void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, 2387 int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){ 2388 int y, x; 2389 IDWTELEM * dst; 2390 for(y=0; y<b_h; y++){ 2391 //FIXME ugly misuse of obmc_stride 2392 const uint8_t *obmc1= obmc + y*obmc_stride; 2393 const uint8_t *obmc2= obmc1+ (obmc_stride>>1); 2394 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1); 2395 const uint8_t *obmc4= obmc3+ (obmc_stride>>1); 2396 dst = slice_buffer_get_line(sb, src_y + y); 2397 for(x=0; x<b_w; x++){ 2398 int v= obmc1[x] * block[3][x + y*src_stride] 2399 +obmc2[x] * block[2][x + y*src_stride] 2400 +obmc3[x] * block[1][x + y*src_stride] 2401 +obmc4[x] * block[0][x + y*src_stride]; 2402 2403 v <<= 8 - LOG2_OBMC_MAX; 2404 if(FRAC_BITS != 8){ 2405 v >>= 8 - FRAC_BITS; 2406 } 2407 if(add){ 2408 v += dst[x + src_x]; 2409 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS; 2410 if(v&(~255)) v= ~(v>>31); 2411 dst8[x + y*src_stride] = v; 2412 }else{ 2413 dst[x + src_x] -= v; 2414 } 2415 } 2416 } 2417} 2418 2419//FIXME name cleanup (b_w, block_w, b_width stuff) 2420static av_always_inline void add_yblock(SnowContext *s, int sliced, slice_buffer *sb, IDWTELEM *dst, uint8_t *dst8, const uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){ 2421 const int b_width = s->b_width << s->block_max_depth; 2422 const int b_height= s->b_height << s->block_max_depth; 2423 const int b_stride= b_width; 2424 BlockNode *lt= &s->block[b_x + b_y*b_stride]; 2425 BlockNode *rt= lt+1; 2426 BlockNode *lb= lt+b_stride; 2427 BlockNode *rb= lb+1; 2428 uint8_t *block[4]; 2429 int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride; 2430 uint8_t *tmp = s->scratchbuf; 2431 uint8_t *ptmp; 2432 int x,y; 2433 2434 if(b_x<0){ 2435 lt= rt; 2436 lb= rb; 2437 }else if(b_x + 1 >= b_width){ 2438 rt= lt; 2439 rb= lb; 2440 } 2441 if(b_y<0){ 2442 lt= lb; 2443 rt= rb; 2444 }else if(b_y + 1 >= b_height){ 2445 lb= lt; 2446 rb= rt; 2447 } 2448 2449 if(src_x<0){ //FIXME merge with prev & always round internal width up to *16 2450 obmc -= src_x; 2451 b_w += src_x; 2452 if(!sliced && !offset_dst) 2453 dst -= src_x; 2454 src_x=0; 2455 }else if(src_x + b_w > w){ 2456 b_w = w - src_x; 2457 } 2458 if(src_y<0){ 2459 obmc -= src_y*obmc_stride; 2460 b_h += src_y; 2461 if(!sliced && !offset_dst) 2462 dst -= src_y*dst_stride; 2463 src_y=0; 2464 }else if(src_y + b_h> h){ 2465 b_h = h - src_y; 2466 } 2467 2468 if(b_w<=0 || b_h<=0) return; 2469 2470 assert(src_stride > 2*MB_SIZE + 5); 2471 2472 if(!sliced && offset_dst) 2473 dst += src_x + src_y*dst_stride; 2474 dst8+= src_x + src_y*src_stride; 2475// src += src_x + src_y*src_stride; 2476 2477 ptmp= tmp + 3*tmp_step; 2478 block[0]= ptmp; 2479 ptmp+=tmp_step; 2480 pred_block(s, block[0], tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h); 2481 2482 if(same_block(lt, rt)){ 2483 block[1]= block[0]; 2484 }else{ 2485 block[1]= ptmp; 2486 ptmp+=tmp_step; 2487 pred_block(s, block[1], tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h); 2488 } 2489 2490 if(same_block(lt, lb)){ 2491 block[2]= block[0]; 2492 }else if(same_block(rt, lb)){ 2493 block[2]= block[1]; 2494 }else{ 2495 block[2]= ptmp; 2496 ptmp+=tmp_step; 2497 pred_block(s, block[2], tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h); 2498 } 2499 2500 if(same_block(lt, rb) ){ 2501 block[3]= block[0]; 2502 }else if(same_block(rt, rb)){ 2503 block[3]= block[1]; 2504 }else if(same_block(lb, rb)){ 2505 block[3]= block[2]; 2506 }else{ 2507 block[3]= ptmp; 2508 pred_block(s, block[3], tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h); 2509 } 2510#if 0 2511 for(y=0; y<b_h; y++){ 2512 for(x=0; x<b_w; x++){ 2513 int v= obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX); 2514 if(add) dst[x + y*dst_stride] += v; 2515 else dst[x + y*dst_stride] -= v; 2516 } 2517 } 2518 for(y=0; y<b_h; y++){ 2519 uint8_t *obmc2= obmc + (obmc_stride>>1); 2520 for(x=0; x<b_w; x++){ 2521 int v= obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX); 2522 if(add) dst[x + y*dst_stride] += v; 2523 else dst[x + y*dst_stride] -= v; 2524 } 2525 } 2526 for(y=0; y<b_h; y++){ 2527 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1); 2528 for(x=0; x<b_w; x++){ 2529 int v= obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX); 2530 if(add) dst[x + y*dst_stride] += v; 2531 else dst[x + y*dst_stride] -= v; 2532 } 2533 } 2534 for(y=0; y<b_h; y++){ 2535 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1); 2536 uint8_t *obmc4= obmc3+ (obmc_stride>>1); 2537 for(x=0; x<b_w; x++){ 2538 int v= obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX); 2539 if(add) dst[x + y*dst_stride] += v; 2540 else dst[x + y*dst_stride] -= v; 2541 } 2542 } 2543#else 2544 if(sliced){ 2545 s->dsp.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8); 2546 }else{ 2547 for(y=0; y<b_h; y++){ 2548 //FIXME ugly misuse of obmc_stride 2549 const uint8_t *obmc1= obmc + y*obmc_stride; 2550 const uint8_t *obmc2= obmc1+ (obmc_stride>>1); 2551 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1); 2552 const uint8_t *obmc4= obmc3+ (obmc_stride>>1); 2553 for(x=0; x<b_w; x++){ 2554 int v= obmc1[x] * block[3][x + y*src_stride] 2555 +obmc2[x] * block[2][x + y*src_stride] 2556 +obmc3[x] * block[1][x + y*src_stride] 2557 +obmc4[x] * block[0][x + y*src_stride]; 2558 2559 v <<= 8 - LOG2_OBMC_MAX; 2560 if(FRAC_BITS != 8){ 2561 v >>= 8 - FRAC_BITS; 2562 } 2563 if(add){ 2564 v += dst[x + y*dst_stride]; 2565 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS; 2566 if(v&(~255)) v= ~(v>>31); 2567 dst8[x + y*src_stride] = v; 2568 }else{ 2569 dst[x + y*dst_stride] -= v; 2570 } 2571 } 2572 } 2573 } 2574#endif /* 0 */ 2575} 2576 2577static av_always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, IDWTELEM * old_buffer, int plane_index, int add, int mb_y){ 2578 Plane *p= &s->plane[plane_index]; 2579 const int mb_w= s->b_width << s->block_max_depth; 2580 const int mb_h= s->b_height << s->block_max_depth; 2581 int x, y, mb_x; 2582 int block_size = MB_SIZE >> s->block_max_depth; 2583 int block_w = plane_index ? block_size/2 : block_size; 2584 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth]; 2585 int obmc_stride= plane_index ? block_size : 2*block_size; 2586 int ref_stride= s->current_picture.linesize[plane_index]; 2587 uint8_t *dst8= s->current_picture.data[plane_index]; 2588 int w= p->width; 2589 int h= p->height; 2590 2591 if(s->keyframe || (s->avctx->debug&512)){ 2592 if(mb_y==mb_h) 2593 return; 2594 2595 if(add){ 2596 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){ 2597// DWTELEM * line = slice_buffer_get_line(sb, y); 2598 IDWTELEM * line = sb->line[y]; 2599 for(x=0; x<w; x++){ 2600// int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1)); 2601 int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1)); 2602 v >>= FRAC_BITS; 2603 if(v&(~255)) v= ~(v>>31); 2604 dst8[x + y*ref_stride]= v; 2605 } 2606 } 2607 }else{ 2608 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){ 2609// DWTELEM * line = slice_buffer_get_line(sb, y); 2610 IDWTELEM * line = sb->line[y]; 2611 for(x=0; x<w; x++){ 2612 line[x] -= 128 << FRAC_BITS; 2613// buf[x + y*w]-= 128<<FRAC_BITS; 2614 } 2615 } 2616 } 2617 2618 return; 2619 } 2620 2621 for(mb_x=0; mb_x<=mb_w; mb_x++){ 2622 add_yblock(s, 1, sb, old_buffer, dst8, obmc, 2623 block_w*mb_x - block_w/2, 2624 block_w*mb_y - block_w/2, 2625 block_w, block_w, 2626 w, h, 2627 w, ref_stride, obmc_stride, 2628 mb_x - 1, mb_y - 1, 2629 add, 0, plane_index); 2630 } 2631} 2632 2633static av_always_inline void predict_slice(SnowContext *s, IDWTELEM *buf, int plane_index, int add, int mb_y){ 2634 Plane *p= &s->plane[plane_index]; 2635 const int mb_w= s->b_width << s->block_max_depth; 2636 const int mb_h= s->b_height << s->block_max_depth; 2637 int x, y, mb_x; 2638 int block_size = MB_SIZE >> s->block_max_depth; 2639 int block_w = plane_index ? block_size/2 : block_size; 2640 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth]; 2641 const int obmc_stride= plane_index ? block_size : 2*block_size; 2642 int ref_stride= s->current_picture.linesize[plane_index]; 2643 uint8_t *dst8= s->current_picture.data[plane_index]; 2644 int w= p->width; 2645 int h= p->height; 2646 2647 if(s->keyframe || (s->avctx->debug&512)){ 2648 if(mb_y==mb_h) 2649 return; 2650 2651 if(add){ 2652 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){ 2653 for(x=0; x<w; x++){ 2654 int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1)); 2655 v >>= FRAC_BITS; 2656 if(v&(~255)) v= ~(v>>31); 2657 dst8[x + y*ref_stride]= v; 2658 } 2659 } 2660 }else{ 2661 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){ 2662 for(x=0; x<w; x++){ 2663 buf[x + y*w]-= 128<<FRAC_BITS; 2664 } 2665 } 2666 } 2667 2668 return; 2669 } 2670 2671 for(mb_x=0; mb_x<=mb_w; mb_x++){ 2672 add_yblock(s, 0, NULL, buf, dst8, obmc, 2673 block_w*mb_x - block_w/2, 2674 block_w*mb_y - block_w/2, 2675 block_w, block_w, 2676 w, h, 2677 w, ref_stride, obmc_stride, 2678 mb_x - 1, mb_y - 1, 2679 add, 1, plane_index); 2680 } 2681} 2682 2683static av_always_inline void predict_plane(SnowContext *s, IDWTELEM *buf, int plane_index, int add){ 2684 const int mb_h= s->b_height << s->block_max_depth; 2685 int mb_y; 2686 for(mb_y=0; mb_y<=mb_h; mb_y++) 2687 predict_slice(s, buf, plane_index, add, mb_y); 2688} 2689 2690static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){ 2691 int i, x2, y2; 2692 Plane *p= &s->plane[plane_index]; 2693 const int block_size = MB_SIZE >> s->block_max_depth; 2694 const int block_w = plane_index ? block_size/2 : block_size; 2695 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth]; 2696 const int obmc_stride= plane_index ? block_size : 2*block_size; 2697 const int ref_stride= s->current_picture.linesize[plane_index]; 2698 uint8_t *src= s-> input_picture.data[plane_index]; 2699 IDWTELEM *dst= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4; //FIXME change to unsigned 2700 const int b_stride = s->b_width << s->block_max_depth; 2701 const int w= p->width; 2702 const int h= p->height; 2703 int index= mb_x + mb_y*b_stride; 2704 BlockNode *b= &s->block[index]; 2705 BlockNode backup= *b; 2706 int ab=0; 2707 int aa=0; 2708 2709 b->type|= BLOCK_INTRA; 2710 b->color[plane_index]= 0; 2711 memset(dst, 0, obmc_stride*obmc_stride*sizeof(IDWTELEM)); 2712 2713 for(i=0; i<4; i++){ 2714 int mb_x2= mb_x + (i &1) - 1; 2715 int mb_y2= mb_y + (i>>1) - 1; 2716 int x= block_w*mb_x2 + block_w/2; 2717 int y= block_w*mb_y2 + block_w/2; 2718 2719 add_yblock(s, 0, NULL, dst + ((i&1)+(i>>1)*obmc_stride)*block_w, NULL, obmc, 2720 x, y, block_w, block_w, w, h, obmc_stride, ref_stride, obmc_stride, mb_x2, mb_y2, 0, 0, plane_index); 2721 2722 for(y2= FFMAX(y, 0); y2<FFMIN(h, y+block_w); y2++){ 2723 for(x2= FFMAX(x, 0); x2<FFMIN(w, x+block_w); x2++){ 2724 int index= x2-(block_w*mb_x - block_w/2) + (y2-(block_w*mb_y - block_w/2))*obmc_stride; 2725 int obmc_v= obmc[index]; 2726 int d; 2727 if(y<0) obmc_v += obmc[index + block_w*obmc_stride]; 2728 if(x<0) obmc_v += obmc[index + block_w]; 2729 if(y+block_w>h) obmc_v += obmc[index - block_w*obmc_stride]; 2730 if(x+block_w>w) obmc_v += obmc[index - block_w]; 2731 //FIXME precalculate this or simplify it somehow else 2732 2733 d = -dst[index] + (1<<(FRAC_BITS-1)); 2734 dst[index] = d; 2735 ab += (src[x2 + y2*ref_stride] - (d>>FRAC_BITS)) * obmc_v; 2736 aa += obmc_v * obmc_v; //FIXME precalculate this 2737 } 2738 } 2739 } 2740 *b= backup; 2741 2742 return av_clip(((ab<<LOG2_OBMC_MAX) + aa/2)/aa, 0, 255); //FIXME we should not need clipping 2743} 2744 2745static inline int get_block_bits(SnowContext *s, int x, int y, int w){ 2746 const int b_stride = s->b_width << s->block_max_depth; 2747 const int b_height = s->b_height<< s->block_max_depth; 2748 int index= x + y*b_stride; 2749 const BlockNode *b = &s->block[index]; 2750 const BlockNode *left = x ? &s->block[index-1] : &null_block; 2751 const BlockNode *top = y ? &s->block[index-b_stride] : &null_block; 2752 const BlockNode *tl = y && x ? &s->block[index-b_stride-1] : left; 2753 const BlockNode *tr = y && x+w<b_stride ? &s->block[index-b_stride+w] : tl; 2754 int dmx, dmy; 2755// int mx_context= av_log2(2*FFABS(left->mx - top->mx)); 2756// int my_context= av_log2(2*FFABS(left->my - top->my)); 2757 2758 if(x<0 || x>=b_stride || y>=b_height) 2759 return 0; 2760/* 27611 0 0 276201X 1-2 1 2763001XX 3-6 2-3 27640001XXX 7-14 4-7 276500001XXXX 15-30 8-15 2766*/ 2767//FIXME try accurate rate 2768//FIXME intra and inter predictors if surrounding blocks are not the same type 2769 if(b->type & BLOCK_INTRA){ 2770 return 3+2*( av_log2(2*FFABS(left->color[0] - b->color[0])) 2771 + av_log2(2*FFABS(left->color[1] - b->color[1])) 2772 + av_log2(2*FFABS(left->color[2] - b->color[2]))); 2773 }else{ 2774 pred_mv(s, &dmx, &dmy, b->ref, left, top, tr); 2775 dmx-= b->mx; 2776 dmy-= b->my; 2777 return 2*(1 + av_log2(2*FFABS(dmx)) //FIXME kill the 2* can be merged in lambda 2778 + av_log2(2*FFABS(dmy)) 2779 + av_log2(2*b->ref)); 2780 } 2781} 2782 2783static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index, const uint8_t *obmc_edged){ 2784 Plane *p= &s->plane[plane_index]; 2785 const int block_size = MB_SIZE >> s->block_max_depth; 2786 const int block_w = plane_index ? block_size/2 : block_size; 2787 const int obmc_stride= plane_index ? block_size : 2*block_size; 2788 const int ref_stride= s->current_picture.linesize[plane_index]; 2789 uint8_t *dst= s->current_picture.data[plane_index]; 2790 uint8_t *src= s-> input_picture.data[plane_index]; 2791 IDWTELEM *pred= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4; 2792 uint8_t *cur = s->scratchbuf; 2793 uint8_t tmp[ref_stride*(2*MB_SIZE+HTAPS_MAX-1)]; 2794 const int b_stride = s->b_width << s->block_max_depth; 2795 const int b_height = s->b_height<< s->block_max_depth; 2796 const int w= p->width; 2797 const int h= p->height; 2798 int distortion; 2799 int rate= 0; 2800 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp); 2801 int sx= block_w*mb_x - block_w/2; 2802 int sy= block_w*mb_y - block_w/2; 2803 int x0= FFMAX(0,-sx); 2804 int y0= FFMAX(0,-sy); 2805 int x1= FFMIN(block_w*2, w-sx); 2806 int y1= FFMIN(block_w*2, h-sy); 2807 int i,x,y; 2808 2809 pred_block(s, cur, tmp, ref_stride, sx, sy, block_w*2, block_w*2, &s->block[mb_x + mb_y*b_stride], plane_index, w, h); 2810 2811 for(y=y0; y<y1; y++){ 2812 const uint8_t *obmc1= obmc_edged + y*obmc_stride; 2813 const IDWTELEM *pred1 = pred + y*obmc_stride; 2814 uint8_t *cur1 = cur + y*ref_stride; 2815 uint8_t *dst1 = dst + sx + (sy+y)*ref_stride; 2816 for(x=x0; x<x1; x++){ 2817#if FRAC_BITS >= LOG2_OBMC_MAX 2818 int v = (cur1[x] * obmc1[x]) << (FRAC_BITS - LOG2_OBMC_MAX); 2819#else 2820 int v = (cur1[x] * obmc1[x] + (1<<(LOG2_OBMC_MAX - FRAC_BITS-1))) >> (LOG2_OBMC_MAX - FRAC_BITS); 2821#endif 2822 v = (v + pred1[x]) >> FRAC_BITS; 2823 if(v&(~255)) v= ~(v>>31); 2824 dst1[x] = v; 2825 } 2826 } 2827 2828 /* copy the regions where obmc[] = (uint8_t)256 */ 2829 if(LOG2_OBMC_MAX == 8 2830 && (mb_x == 0 || mb_x == b_stride-1) 2831 && (mb_y == 0 || mb_y == b_height-1)){ 2832 if(mb_x == 0) 2833 x1 = block_w; 2834 else 2835 x0 = block_w; 2836 if(mb_y == 0) 2837 y1 = block_w; 2838 else 2839 y0 = block_w; 2840 for(y=y0; y<y1; y++) 2841 memcpy(dst + sx+x0 + (sy+y)*ref_stride, cur + x0 + y*ref_stride, x1-x0); 2842 } 2843 2844 if(block_w==16){ 2845 /* FIXME rearrange dsputil to fit 32x32 cmp functions */ 2846 /* FIXME check alignment of the cmp wavelet vs the encoding wavelet */ 2847 /* FIXME cmps overlap but do not cover the wavelet's whole support. 2848 * So improving the score of one block is not strictly guaranteed 2849 * to improve the score of the whole frame, thus iterative motion 2850 * estimation does not always converge. */ 2851 if(s->avctx->me_cmp == FF_CMP_W97) 2852 distortion = w97_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32); 2853 else if(s->avctx->me_cmp == FF_CMP_W53) 2854 distortion = w53_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32); 2855 else{ 2856 distortion = 0; 2857 for(i=0; i<4; i++){ 2858 int off = sx+16*(i&1) + (sy+16*(i>>1))*ref_stride; 2859 distortion += s->dsp.me_cmp[0](&s->m, src + off, dst + off, ref_stride, 16); 2860 } 2861 } 2862 }else{ 2863 assert(block_w==8); 2864 distortion = s->dsp.me_cmp[0](&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, block_w*2); 2865 } 2866 2867 if(plane_index==0){ 2868 for(i=0; i<4; i++){ 2869/* ..RRr 2870 * .RXx. 2871 * rxx.. 2872 */ 2873 rate += get_block_bits(s, mb_x + (i&1) - (i>>1), mb_y + (i>>1), 1); 2874 } 2875 if(mb_x == b_stride-2) 2876 rate += get_block_bits(s, mb_x + 1, mb_y + 1, 1); 2877 } 2878 return distortion + rate*penalty_factor; 2879} 2880 2881static int get_4block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){ 2882 int i, y2; 2883 Plane *p= &s->plane[plane_index]; 2884 const int block_size = MB_SIZE >> s->block_max_depth; 2885 const int block_w = plane_index ? block_size/2 : block_size; 2886 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth]; 2887 const int obmc_stride= plane_index ? block_size : 2*block_size; 2888 const int ref_stride= s->current_picture.linesize[plane_index]; 2889 uint8_t *dst= s->current_picture.data[plane_index]; 2890 uint8_t *src= s-> input_picture.data[plane_index]; 2891 //FIXME zero_dst is const but add_yblock changes dst if add is 0 (this is never the case for dst=zero_dst 2892 // const has only been removed from zero_dst to suppress a warning 2893 static IDWTELEM zero_dst[4096]; //FIXME 2894 const int b_stride = s->b_width << s->block_max_depth; 2895 const int w= p->width; 2896 const int h= p->height; 2897 int distortion= 0; 2898 int rate= 0; 2899 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp); 2900 2901 for(i=0; i<9; i++){ 2902 int mb_x2= mb_x + (i%3) - 1; 2903 int mb_y2= mb_y + (i/3) - 1; 2904 int x= block_w*mb_x2 + block_w/2; 2905 int y= block_w*mb_y2 + block_w/2; 2906 2907 add_yblock(s, 0, NULL, zero_dst, dst, obmc, 2908 x, y, block_w, block_w, w, h, /*dst_stride*/0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, 1, plane_index); 2909 2910 //FIXME find a cleaner/simpler way to skip the outside stuff 2911 for(y2= y; y2<0; y2++) 2912 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w); 2913 for(y2= h; y2<y+block_w; y2++) 2914 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w); 2915 if(x<0){ 2916 for(y2= y; y2<y+block_w; y2++) 2917 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, -x); 2918 } 2919 if(x+block_w > w){ 2920 for(y2= y; y2<y+block_w; y2++) 2921 memcpy(dst + w + y2*ref_stride, src + w + y2*ref_stride, x+block_w - w); 2922 } 2923 2924 assert(block_w== 8 || block_w==16); 2925 distortion += s->dsp.me_cmp[block_w==8](&s->m, src + x + y*ref_stride, dst + x + y*ref_stride, ref_stride, block_w); 2926 } 2927 2928 if(plane_index==0){ 2929 BlockNode *b= &s->block[mb_x+mb_y*b_stride]; 2930 int merged= same_block(b,b+1) && same_block(b,b+b_stride) && same_block(b,b+b_stride+1); 2931 2932/* ..RRRr 2933 * .RXXx. 2934 * .RXXx. 2935 * rxxx. 2936 */ 2937 if(merged) 2938 rate = get_block_bits(s, mb_x, mb_y, 2); 2939 for(i=merged?4:0; i<9; i++){ 2940 static const int dxy[9][2] = {{0,0},{1,0},{0,1},{1,1},{2,0},{2,1},{-1,2},{0,2},{1,2}}; 2941 rate += get_block_bits(s, mb_x + dxy[i][0], mb_y + dxy[i][1], 1); 2942 } 2943 } 2944 return distortion + rate*penalty_factor; 2945} 2946 2947static av_always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, const uint8_t *obmc_edged, int *best_rd){ 2948 const int b_stride= s->b_width << s->block_max_depth; 2949 BlockNode *block= &s->block[mb_x + mb_y * b_stride]; 2950 BlockNode backup= *block; 2951 int rd, index, value; 2952 2953 assert(mb_x>=0 && mb_y>=0); 2954 assert(mb_x<b_stride); 2955 2956 if(intra){ 2957 block->color[0] = p[0]; 2958 block->color[1] = p[1]; 2959 block->color[2] = p[2]; 2960 block->type |= BLOCK_INTRA; 2961 }else{ 2962 index= (p[0] + 31*p[1]) & (ME_CACHE_SIZE-1); 2963 value= s->me_cache_generation + (p[0]>>10) + (p[1]<<6) + (block->ref<<12); 2964 if(s->me_cache[index] == value) 2965 return 0; 2966 s->me_cache[index]= value; 2967 2968 block->mx= p[0]; 2969 block->my= p[1]; 2970 block->type &= ~BLOCK_INTRA; 2971 } 2972 2973 rd= get_block_rd(s, mb_x, mb_y, 0, obmc_edged); 2974 2975//FIXME chroma 2976 if(rd < *best_rd){ 2977 *best_rd= rd; 2978 return 1; 2979 }else{ 2980 *block= backup; 2981 return 0; 2982 } 2983} 2984 2985/* special case for int[2] args we discard afterwards, 2986 * fixes compilation problem with gcc 2.95 */ 2987static av_always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, const uint8_t *obmc_edged, int *best_rd){ 2988 int p[2] = {p0, p1}; 2989 return check_block(s, mb_x, mb_y, p, 0, obmc_edged, best_rd); 2990} 2991 2992static av_always_inline int check_4block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int ref, int *best_rd){ 2993 const int b_stride= s->b_width << s->block_max_depth; 2994 BlockNode *block= &s->block[mb_x + mb_y * b_stride]; 2995 BlockNode backup[4]= {block[0], block[1], block[b_stride], block[b_stride+1]}; 2996 int rd, index, value; 2997 2998 assert(mb_x>=0 && mb_y>=0); 2999 assert(mb_x<b_stride); 3000 assert(((mb_x|mb_y)&1) == 0); 3001 3002 index= (p0 + 31*p1) & (ME_CACHE_SIZE-1); 3003 value= s->me_cache_generation + (p0>>10) + (p1<<6) + (block->ref<<12); 3004 if(s->me_cache[index] == value) 3005 return 0; 3006 s->me_cache[index]= value; 3007 3008 block->mx= p0; 3009 block->my= p1; 3010 block->ref= ref; 3011 block->type &= ~BLOCK_INTRA; 3012 block[1]= block[b_stride]= block[b_stride+1]= *block; 3013 3014 rd= get_4block_rd(s, mb_x, mb_y, 0); 3015 3016//FIXME chroma 3017 if(rd < *best_rd){ 3018 *best_rd= rd; 3019 return 1; 3020 }else{ 3021 block[0]= backup[0]; 3022 block[1]= backup[1]; 3023 block[b_stride]= backup[2]; 3024 block[b_stride+1]= backup[3]; 3025 return 0; 3026 } 3027} 3028 3029static void iterative_me(SnowContext *s){ 3030 int pass, mb_x, mb_y; 3031 const int b_width = s->b_width << s->block_max_depth; 3032 const int b_height= s->b_height << s->block_max_depth; 3033 const int b_stride= b_width; 3034 int color[3]; 3035 3036 { 3037 RangeCoder r = s->c; 3038 uint8_t state[sizeof(s->block_state)]; 3039 memcpy(state, s->block_state, sizeof(s->block_state)); 3040 for(mb_y= 0; mb_y<s->b_height; mb_y++) 3041 for(mb_x= 0; mb_x<s->b_width; mb_x++) 3042 encode_q_branch(s, 0, mb_x, mb_y); 3043 s->c = r; 3044 memcpy(s->block_state, state, sizeof(s->block_state)); 3045 } 3046 3047 for(pass=0; pass<25; pass++){ 3048 int change= 0; 3049 3050 for(mb_y= 0; mb_y<b_height; mb_y++){ 3051 for(mb_x= 0; mb_x<b_width; mb_x++){ 3052 int dia_change, i, j, ref; 3053 int best_rd= INT_MAX, ref_rd; 3054 BlockNode backup, ref_b; 3055 const int index= mb_x + mb_y * b_stride; 3056 BlockNode *block= &s->block[index]; 3057 BlockNode *tb = mb_y ? &s->block[index-b_stride ] : NULL; 3058 BlockNode *lb = mb_x ? &s->block[index -1] : NULL; 3059 BlockNode *rb = mb_x+1<b_width ? &s->block[index +1] : NULL; 3060 BlockNode *bb = mb_y+1<b_height ? &s->block[index+b_stride ] : NULL; 3061 BlockNode *tlb= mb_x && mb_y ? &s->block[index-b_stride-1] : NULL; 3062 BlockNode *trb= mb_x+1<b_width && mb_y ? &s->block[index-b_stride+1] : NULL; 3063 BlockNode *blb= mb_x && mb_y+1<b_height ? &s->block[index+b_stride-1] : NULL; 3064 BlockNode *brb= mb_x+1<b_width && mb_y+1<b_height ? &s->block[index+b_stride+1] : NULL; 3065 const int b_w= (MB_SIZE >> s->block_max_depth); 3066 uint8_t obmc_edged[b_w*2][b_w*2]; 3067 3068 if(pass && (block->type & BLOCK_OPT)) 3069 continue; 3070 block->type |= BLOCK_OPT; 3071 3072 backup= *block; 3073 3074 if(!s->me_cache_generation) 3075 memset(s->me_cache, 0, sizeof(s->me_cache)); 3076 s->me_cache_generation += 1<<22; 3077 3078 //FIXME precalculate 3079 { 3080 int x, y; 3081 memcpy(obmc_edged, obmc_tab[s->block_max_depth], b_w*b_w*4); 3082 if(mb_x==0) 3083 for(y=0; y<b_w*2; y++) 3084 memset(obmc_edged[y], obmc_edged[y][0] + obmc_edged[y][b_w-1], b_w); 3085 if(mb_x==b_stride-1) 3086 for(y=0; y<b_w*2; y++) 3087 memset(obmc_edged[y]+b_w, obmc_edged[y][b_w] + obmc_edged[y][b_w*2-1], b_w); 3088 if(mb_y==0){ 3089 for(x=0; x<b_w*2; x++) 3090 obmc_edged[0][x] += obmc_edged[b_w-1][x]; 3091 for(y=1; y<b_w; y++) 3092 memcpy(obmc_edged[y], obmc_edged[0], b_w*2); 3093 } 3094 if(mb_y==b_height-1){ 3095 for(x=0; x<b_w*2; x++) 3096 obmc_edged[b_w*2-1][x] += obmc_edged[b_w][x]; 3097 for(y=b_w; y<b_w*2-1; y++) 3098 memcpy(obmc_edged[y], obmc_edged[b_w*2-1], b_w*2); 3099 } 3100 } 3101 3102 //skip stuff outside the picture 3103 if(mb_x==0 || mb_y==0 || mb_x==b_width-1 || mb_y==b_height-1){ 3104 uint8_t *src= s-> input_picture.data[0]; 3105 uint8_t *dst= s->current_picture.data[0]; 3106 const int stride= s->current_picture.linesize[0]; 3107 const int block_w= MB_SIZE >> s->block_max_depth; 3108 const int sx= block_w*mb_x - block_w/2; 3109 const int sy= block_w*mb_y - block_w/2; 3110 const int w= s->plane[0].width; 3111 const int h= s->plane[0].height; 3112 int y; 3113 3114 for(y=sy; y<0; y++) 3115 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2); 3116 for(y=h; y<sy+block_w*2; y++) 3117 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2); 3118 if(sx<0){ 3119 for(y=sy; y<sy+block_w*2; y++) 3120 memcpy(dst + sx + y*stride, src + sx + y*stride, -sx); 3121 } 3122 if(sx+block_w*2 > w){ 3123 for(y=sy; y<sy+block_w*2; y++) 3124 memcpy(dst + w + y*stride, src + w + y*stride, sx+block_w*2 - w); 3125 } 3126 } 3127 3128 // intra(black) = neighbors' contribution to the current block 3129 for(i=0; i<3; i++) 3130 color[i]= get_dc(s, mb_x, mb_y, i); 3131 3132 // get previous score (cannot be cached due to OBMC) 3133 if(pass > 0 && (block->type&BLOCK_INTRA)){ 3134 int color0[3]= {block->color[0], block->color[1], block->color[2]}; 3135 check_block(s, mb_x, mb_y, color0, 1, *obmc_edged, &best_rd); 3136 }else 3137 check_block_inter(s, mb_x, mb_y, block->mx, block->my, *obmc_edged, &best_rd); 3138 3139 ref_b= *block; 3140 ref_rd= best_rd; 3141 for(ref=0; ref < s->ref_frames; ref++){ 3142 int16_t (*mvr)[2]= &s->ref_mvs[ref][index]; 3143 if(s->ref_scores[ref][index] > s->ref_scores[ref_b.ref][index]*3/2) //FIXME tune threshold 3144 continue; 3145 block->ref= ref; 3146 best_rd= INT_MAX; 3147 3148 check_block_inter(s, mb_x, mb_y, mvr[0][0], mvr[0][1], *obmc_edged, &best_rd); 3149 check_block_inter(s, mb_x, mb_y, 0, 0, *obmc_edged, &best_rd); 3150 if(tb) 3151 check_block_inter(s, mb_x, mb_y, mvr[-b_stride][0], mvr[-b_stride][1], *obmc_edged, &best_rd); 3152 if(lb) 3153 check_block_inter(s, mb_x, mb_y, mvr[-1][0], mvr[-1][1], *obmc_edged, &best_rd); 3154 if(rb) 3155 check_block_inter(s, mb_x, mb_y, mvr[1][0], mvr[1][1], *obmc_edged, &best_rd); 3156 if(bb) 3157 check_block_inter(s, mb_x, mb_y, mvr[b_stride][0], mvr[b_stride][1], *obmc_edged, &best_rd); 3158 3159 /* fullpel ME */ 3160 //FIXME avoid subpel interpolation / round to nearest integer 3161 do{ 3162 dia_change=0; 3163 for(i=0; i<FFMAX(s->avctx->dia_size, 1); i++){ 3164 for(j=0; j<i; j++){ 3165 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my+(4*j), *obmc_edged, &best_rd); 3166 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my-(4*j), *obmc_edged, &best_rd); 3167 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my-(4*j), *obmc_edged, &best_rd); 3168 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my+(4*j), *obmc_edged, &best_rd); 3169 } 3170 } 3171 }while(dia_change); 3172 /* subpel ME */ 3173 do{ 3174 static const int square[8][2]= {{+1, 0},{-1, 0},{ 0,+1},{ 0,-1},{+1,+1},{-1,-1},{+1,-1},{-1,+1},}; 3175 dia_change=0; 3176 for(i=0; i<8; i++) 3177 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+square[i][0], block->my+square[i][1], *obmc_edged, &best_rd); 3178 }while(dia_change); 3179 //FIXME or try the standard 2 pass qpel or similar 3180 3181 mvr[0][0]= block->mx; 3182 mvr[0][1]= block->my; 3183 if(ref_rd > best_rd){ 3184 ref_rd= best_rd; 3185 ref_b= *block; 3186 } 3187 } 3188 best_rd= ref_rd; 3189 *block= ref_b; 3190#if 1 3191 check_block(s, mb_x, mb_y, color, 1, *obmc_edged, &best_rd); 3192 //FIXME RD style color selection 3193#endif 3194 if(!same_block(block, &backup)){ 3195 if(tb ) tb ->type &= ~BLOCK_OPT; 3196 if(lb ) lb ->type &= ~BLOCK_OPT; 3197 if(rb ) rb ->type &= ~BLOCK_OPT; 3198 if(bb ) bb ->type &= ~BLOCK_OPT; 3199 if(tlb) tlb->type &= ~BLOCK_OPT; 3200 if(trb) trb->type &= ~BLOCK_OPT; 3201 if(blb) blb->type &= ~BLOCK_OPT; 3202 if(brb) brb->type &= ~BLOCK_OPT; 3203 change ++; 3204 } 3205 } 3206 } 3207 av_log(NULL, AV_LOG_ERROR, "pass:%d changed:%d\n", pass, change); 3208 if(!change) 3209 break; 3210 } 3211 3212 if(s->block_max_depth == 1){ 3213 int change= 0; 3214 for(mb_y= 0; mb_y<b_height; mb_y+=2){ 3215 for(mb_x= 0; mb_x<b_width; mb_x+=2){ 3216 int i; 3217 int best_rd, init_rd; 3218 const int index= mb_x + mb_y * b_stride; 3219 BlockNode *b[4]; 3220 3221 b[0]= &s->block[index]; 3222 b[1]= b[0]+1; 3223 b[2]= b[0]+b_stride; 3224 b[3]= b[2]+1; 3225 if(same_block(b[0], b[1]) && 3226 same_block(b[0], b[2]) && 3227 same_block(b[0], b[3])) 3228 continue; 3229 3230 if(!s->me_cache_generation) 3231 memset(s->me_cache, 0, sizeof(s->me_cache)); 3232 s->me_cache_generation += 1<<22; 3233 3234 init_rd= best_rd= get_4block_rd(s, mb_x, mb_y, 0); 3235 3236 //FIXME more multiref search? 3237 check_4block_inter(s, mb_x, mb_y, 3238 (b[0]->mx + b[1]->mx + b[2]->mx + b[3]->mx + 2) >> 2, 3239 (b[0]->my + b[1]->my + b[2]->my + b[3]->my + 2) >> 2, 0, &best_rd); 3240 3241 for(i=0; i<4; i++) 3242 if(!(b[i]->type&BLOCK_INTRA)) 3243 check_4block_inter(s, mb_x, mb_y, b[i]->mx, b[i]->my, b[i]->ref, &best_rd); 3244 3245 if(init_rd != best_rd) 3246 change++; 3247 } 3248 } 3249 av_log(NULL, AV_LOG_ERROR, "pass:4mv changed:%d\n", change*4); 3250 } 3251} 3252 3253static void quantize(SnowContext *s, SubBand *b, IDWTELEM *dst, DWTELEM *src, int stride, int bias){ 3254 const int w= b->width; 3255 const int h= b->height; 3256 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16); 3257 const int qmul= qexp[qlog&(QROOT-1)]<<((qlog>>QSHIFT) + ENCODER_EXTRA_BITS); 3258 int x,y, thres1, thres2; 3259 3260 if(s->qlog == LOSSLESS_QLOG){ 3261 for(y=0; y<h; y++) 3262 for(x=0; x<w; x++) 3263 dst[x + y*stride]= src[x + y*stride]; 3264 return; 3265 } 3266 3267 bias= bias ? 0 : (3*qmul)>>3; 3268 thres1= ((qmul - bias)>>QEXPSHIFT) - 1; 3269 thres2= 2*thres1; 3270 3271 if(!bias){ 3272 for(y=0; y<h; y++){ 3273 for(x=0; x<w; x++){ 3274 int i= src[x + y*stride]; 3275 3276 if((unsigned)(i+thres1) > thres2){ 3277 if(i>=0){ 3278 i<<= QEXPSHIFT; 3279 i/= qmul; //FIXME optimize 3280 dst[x + y*stride]= i; 3281 }else{ 3282 i= -i; 3283 i<<= QEXPSHIFT; 3284 i/= qmul; //FIXME optimize 3285 dst[x + y*stride]= -i; 3286 } 3287 }else 3288 dst[x + y*stride]= 0; 3289 } 3290 } 3291 }else{ 3292 for(y=0; y<h; y++){ 3293 for(x=0; x<w; x++){ 3294 int i= src[x + y*stride]; 3295 3296 if((unsigned)(i+thres1) > thres2){ 3297 if(i>=0){ 3298 i<<= QEXPSHIFT; 3299 i= (i + bias) / qmul; //FIXME optimize 3300 dst[x + y*stride]= i; 3301 }else{ 3302 i= -i; 3303 i<<= QEXPSHIFT; 3304 i= (i + bias) / qmul; //FIXME optimize 3305 dst[x + y*stride]= -i; 3306 } 3307 }else 3308 dst[x + y*stride]= 0; 3309 } 3310 } 3311 } 3312} 3313 3314static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int start_y, int end_y){ 3315 const int w= b->width; 3316 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16); 3317 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT); 3318 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT; 3319 int x,y; 3320 3321 if(s->qlog == LOSSLESS_QLOG) return; 3322 3323 for(y=start_y; y<end_y; y++){ 3324// DWTELEM * line = slice_buffer_get_line_from_address(sb, src + (y * stride)); 3325 IDWTELEM * line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset; 3326 for(x=0; x<w; x++){ 3327 int i= line[x]; 3328 if(i<0){ 3329 line[x]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias 3330 }else if(i>0){ 3331 line[x]= (( i*qmul + qadd)>>(QEXPSHIFT)); 3332 } 3333 } 3334 } 3335} 3336 3337static void dequantize(SnowContext *s, SubBand *b, IDWTELEM *src, int stride){ 3338 const int w= b->width; 3339 const int h= b->height; 3340 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16); 3341 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT); 3342 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT; 3343 int x,y; 3344 3345 if(s->qlog == LOSSLESS_QLOG) return; 3346 3347 for(y=0; y<h; y++){ 3348 for(x=0; x<w; x++){ 3349 int i= src[x + y*stride]; 3350 if(i<0){ 3351 src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias 3352 }else if(i>0){ 3353 src[x + y*stride]= (( i*qmul + qadd)>>(QEXPSHIFT)); 3354 } 3355 } 3356 } 3357} 3358 3359static void decorrelate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){ 3360 const int w= b->width; 3361 const int h= b->height; 3362 int x,y; 3363 3364 for(y=h-1; y>=0; y--){ 3365 for(x=w-1; x>=0; x--){ 3366 int i= x + y*stride; 3367 3368 if(x){ 3369 if(use_median){ 3370 if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]); 3371 else src[i] -= src[i - 1]; 3372 }else{ 3373 if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]); 3374 else src[i] -= src[i - 1]; 3375 } 3376 }else{ 3377 if(y) src[i] -= src[i - stride]; 3378 } 3379 } 3380 } 3381} 3382 3383static void correlate_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median, int start_y, int end_y){ 3384 const int w= b->width; 3385 int x,y; 3386 3387 IDWTELEM * line=0; // silence silly "could be used without having been initialized" warning 3388 IDWTELEM * prev; 3389 3390 if (start_y != 0) 3391 line = slice_buffer_get_line(sb, ((start_y - 1) * b->stride_line) + b->buf_y_offset) + b->buf_x_offset; 3392 3393 for(y=start_y; y<end_y; y++){ 3394 prev = line; 3395// line = slice_buffer_get_line_from_address(sb, src + (y * stride)); 3396 line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset; 3397 for(x=0; x<w; x++){ 3398 if(x){ 3399 if(use_median){ 3400 if(y && x+1<w) line[x] += mid_pred(line[x - 1], prev[x], prev[x + 1]); 3401 else line[x] += line[x - 1]; 3402 }else{ 3403 if(y) line[x] += mid_pred(line[x - 1], prev[x], line[x - 1] + prev[x] - prev[x - 1]); 3404 else line[x] += line[x - 1]; 3405 } 3406 }else{ 3407 if(y) line[x] += prev[x]; 3408 } 3409 } 3410 } 3411} 3412 3413static void correlate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){ 3414 const int w= b->width; 3415 const int h= b->height; 3416 int x,y; 3417 3418 for(y=0; y<h; y++){ 3419 for(x=0; x<w; x++){ 3420 int i= x + y*stride; 3421 3422 if(x){ 3423 if(use_median){ 3424 if(y && x+1<w) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]); 3425 else src[i] += src[i - 1]; 3426 }else{ 3427 if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]); 3428 else src[i] += src[i - 1]; 3429 } 3430 }else{ 3431 if(y) src[i] += src[i - stride]; 3432 } 3433 } 3434 } 3435} 3436 3437static void encode_qlogs(SnowContext *s){ 3438 int plane_index, level, orientation; 3439 3440 for(plane_index=0; plane_index<2; plane_index++){ 3441 for(level=0; level<s->spatial_decomposition_count; level++){ 3442 for(orientation=level ? 1:0; orientation<4; orientation++){ 3443 if(orientation==2) continue; 3444 put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1); 3445 } 3446 } 3447 } 3448} 3449 3450static void encode_header(SnowContext *s){ 3451 int plane_index, i; 3452 uint8_t kstate[32]; 3453 3454 memset(kstate, MID_STATE, sizeof(kstate)); 3455 3456 put_rac(&s->c, kstate, s->keyframe); 3457 if(s->keyframe || s->always_reset){ 3458 reset_contexts(s); 3459 s->last_spatial_decomposition_type= 3460 s->last_qlog= 3461 s->last_qbias= 3462 s->last_mv_scale= 3463 s->last_block_max_depth= 0; 3464 for(plane_index=0; plane_index<2; plane_index++){ 3465 Plane *p= &s->plane[plane_index]; 3466 p->last_htaps=0; 3467 p->last_diag_mc=0; 3468 memset(p->last_hcoeff, 0, sizeof(p->last_hcoeff)); 3469 } 3470 } 3471 if(s->keyframe){ 3472 put_symbol(&s->c, s->header_state, s->version, 0); 3473 put_rac(&s->c, s->header_state, s->always_reset); 3474 put_symbol(&s->c, s->header_state, s->temporal_decomposition_type, 0); 3475 put_symbol(&s->c, s->header_state, s->temporal_decomposition_count, 0); 3476 put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0); 3477 put_symbol(&s->c, s->header_state, s->colorspace_type, 0); 3478 put_symbol(&s->c, s->header_state, s->chroma_h_shift, 0); 3479 put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0); 3480 put_rac(&s->c, s->header_state, s->spatial_scalability); 3481// put_rac(&s->c, s->header_state, s->rate_scalability); 3482 put_symbol(&s->c, s->header_state, s->max_ref_frames-1, 0); 3483 3484 encode_qlogs(s); 3485 } 3486 3487 if(!s->keyframe){ 3488 int update_mc=0; 3489 for(plane_index=0; plane_index<2; plane_index++){ 3490 Plane *p= &s->plane[plane_index]; 3491 update_mc |= p->last_htaps != p->htaps; 3492 update_mc |= p->last_diag_mc != p->diag_mc; 3493 update_mc |= !!memcmp(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff)); 3494 } 3495 put_rac(&s->c, s->header_state, update_mc); 3496 if(update_mc){ 3497 for(plane_index=0; plane_index<2; plane_index++){ 3498 Plane *p= &s->plane[plane_index]; 3499 put_rac(&s->c, s->header_state, p->diag_mc); 3500 put_symbol(&s->c, s->header_state, p->htaps/2-1, 0); 3501 for(i= p->htaps/2; i; i--) 3502 put_symbol(&s->c, s->header_state, FFABS(p->hcoeff[i]), 0); 3503 } 3504 } 3505 if(s->last_spatial_decomposition_count != s->spatial_decomposition_count){ 3506 put_rac(&s->c, s->header_state, 1); 3507 put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0); 3508 encode_qlogs(s); 3509 }else 3510 put_rac(&s->c, s->header_state, 0); 3511 } 3512 3513 put_symbol(&s->c, s->header_state, s->spatial_decomposition_type - s->last_spatial_decomposition_type, 1); 3514 put_symbol(&s->c, s->header_state, s->qlog - s->last_qlog , 1); 3515 put_symbol(&s->c, s->header_state, s->mv_scale - s->last_mv_scale, 1); 3516 put_symbol(&s->c, s->header_state, s->qbias - s->last_qbias , 1); 3517 put_symbol(&s->c, s->header_state, s->block_max_depth - s->last_block_max_depth, 1); 3518 3519} 3520 3521static void update_last_header_values(SnowContext *s){ 3522 int plane_index; 3523 3524 if(!s->keyframe){ 3525 for(plane_index=0; plane_index<2; plane_index++){ 3526 Plane *p= &s->plane[plane_index]; 3527 p->last_diag_mc= p->diag_mc; 3528 p->last_htaps = p->htaps; 3529 memcpy(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff)); 3530 } 3531 } 3532 3533 s->last_spatial_decomposition_type = s->spatial_decomposition_type; 3534 s->last_qlog = s->qlog; 3535 s->last_qbias = s->qbias; 3536 s->last_mv_scale = s->mv_scale; 3537 s->last_block_max_depth = s->block_max_depth; 3538 s->last_spatial_decomposition_count = s->spatial_decomposition_count; 3539} 3540 3541static void decode_qlogs(SnowContext *s){ 3542 int plane_index, level, orientation; 3543 3544 for(plane_index=0; plane_index<3; plane_index++){ 3545 for(level=0; level<s->spatial_decomposition_count; level++){ 3546 for(orientation=level ? 1:0; orientation<4; orientation++){ 3547 int q; 3548 if (plane_index==2) q= s->plane[1].band[level][orientation].qlog; 3549 else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog; 3550 else q= get_symbol(&s->c, s->header_state, 1); 3551 s->plane[plane_index].band[level][orientation].qlog= q; 3552 } 3553 } 3554 } 3555} 3556 3557static int decode_header(SnowContext *s){ 3558 int plane_index, tmp; 3559 uint8_t kstate[32]; 3560 3561 memset(kstate, MID_STATE, sizeof(kstate)); 3562 3563 s->keyframe= get_rac(&s->c, kstate); 3564 if(s->keyframe || s->always_reset){ 3565 reset_contexts(s); 3566 s->spatial_decomposition_type= 3567 s->qlog= 3568 s->qbias= 3569 s->mv_scale= 3570 s->block_max_depth= 0; 3571 } 3572 if(s->keyframe){ 3573 s->version= get_symbol(&s->c, s->header_state, 0); 3574 if(s->version>0){ 3575 av_log(s->avctx, AV_LOG_ERROR, "version %d not supported", s->version); 3576 return -1; 3577 } 3578 s->always_reset= get_rac(&s->c, s->header_state); 3579 s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0); 3580 s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0); 3581 s->spatial_decomposition_count= get_symbol(&s->c, s->header_state, 0); 3582 s->colorspace_type= get_symbol(&s->c, s->header_state, 0); 3583 s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0); 3584 s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0); 3585 s->spatial_scalability= get_rac(&s->c, s->header_state); 3586// s->rate_scalability= get_rac(&s->c, s->header_state); 3587 tmp= get_symbol(&s->c, s->header_state, 0)+1; 3588 if(tmp < 1 || tmp > MAX_REF_FRAMES){ 3589 av_log(s->avctx, AV_LOG_ERROR, "reference frame count is %d\n", tmp); 3590 return -1; 3591 } 3592 s->max_ref_frames= tmp; 3593 3594 decode_qlogs(s); 3595 } 3596 3597 if(!s->keyframe){ 3598 if(get_rac(&s->c, s->header_state)){ 3599 for(plane_index=0; plane_index<2; plane_index++){ 3600 int htaps, i, sum=0; 3601 Plane *p= &s->plane[plane_index]; 3602 p->diag_mc= get_rac(&s->c, s->header_state); 3603 htaps= get_symbol(&s->c, s->header_state, 0)*2 + 2; 3604 if((unsigned)htaps > HTAPS_MAX || htaps==0) 3605 return -1; 3606 p->htaps= htaps; 3607 for(i= htaps/2; i; i--){ 3608 p->hcoeff[i]= get_symbol(&s->c, s->header_state, 0) * (1-2*(i&1)); 3609 sum += p->hcoeff[i]; 3610 } 3611 p->hcoeff[0]= 32-sum; 3612 } 3613 s->plane[2].diag_mc= s->plane[1].diag_mc; 3614 s->plane[2].htaps = s->plane[1].htaps; 3615 memcpy(s->plane[2].hcoeff, s->plane[1].hcoeff, sizeof(s->plane[1].hcoeff)); 3616 } 3617 if(get_rac(&s->c, s->header_state)){ 3618 s->spatial_decomposition_count= get_symbol(&s->c, s->header_state, 0); 3619 decode_qlogs(s); 3620 } 3621 } 3622 3623 s->spatial_decomposition_type+= get_symbol(&s->c, s->header_state, 1); 3624 if(s->spatial_decomposition_type > 1){ 3625 av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type); 3626 return -1; 3627 } 3628 3629 s->qlog += get_symbol(&s->c, s->header_state, 1); 3630 s->mv_scale += get_symbol(&s->c, s->header_state, 1); 3631 s->qbias += get_symbol(&s->c, s->header_state, 1); 3632 s->block_max_depth+= get_symbol(&s->c, s->header_state, 1); 3633 if(s->block_max_depth > 1 || s->block_max_depth < 0){ 3634 av_log(s->avctx, AV_LOG_ERROR, "block_max_depth= %d is too large", s->block_max_depth); 3635 s->block_max_depth= 0; 3636 return -1; 3637 } 3638 3639 return 0; 3640} 3641 3642static void init_qexp(void){ 3643 int i; 3644 double v=128; 3645 3646 for(i=0; i<QROOT; i++){ 3647 qexp[i]= lrintf(v); 3648 v *= pow(2, 1.0 / QROOT); 3649 } 3650} 3651 3652static av_cold int common_init(AVCodecContext *avctx){ 3653 SnowContext *s = avctx->priv_data; 3654 int width, height; 3655 int i, j; 3656 3657 s->avctx= avctx; 3658 s->max_ref_frames=1; //just make sure its not an invalid value in case of no initial keyframe 3659 3660 dsputil_init(&s->dsp, avctx); 3661 3662#define mcf(dx,dy)\ 3663 s->dsp.put_qpel_pixels_tab [0][dy+dx/4]=\ 3664 s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\ 3665 s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];\ 3666 s->dsp.put_qpel_pixels_tab [1][dy+dx/4]=\ 3667 s->dsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\ 3668 s->dsp.put_h264_qpel_pixels_tab[1][dy+dx/4]; 3669 3670 mcf( 0, 0) 3671 mcf( 4, 0) 3672 mcf( 8, 0) 3673 mcf(12, 0) 3674 mcf( 0, 4) 3675 mcf( 4, 4) 3676 mcf( 8, 4) 3677 mcf(12, 4) 3678 mcf( 0, 8) 3679 mcf( 4, 8) 3680 mcf( 8, 8) 3681 mcf(12, 8) 3682 mcf( 0,12) 3683 mcf( 4,12) 3684 mcf( 8,12) 3685 mcf(12,12) 3686 3687#define mcfh(dx,dy)\ 3688 s->dsp.put_pixels_tab [0][dy/4+dx/8]=\ 3689 s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\ 3690 mc_block_hpel ## dx ## dy ## 16;\ 3691 s->dsp.put_pixels_tab [1][dy/4+dx/8]=\ 3692 s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\ 3693 mc_block_hpel ## dx ## dy ## 8; 3694 3695 mcfh(0, 0) 3696 mcfh(8, 0) 3697 mcfh(0, 8) 3698 mcfh(8, 8) 3699 3700 if(!qexp[0]) 3701 init_qexp(); 3702 3703// dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift); 3704 3705 width= s->avctx->width; 3706 height= s->avctx->height; 3707 3708 s->spatial_idwt_buffer= av_mallocz(width*height*sizeof(IDWTELEM)); 3709 s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM)); //FIXME this does not belong here 3710 3711 for(i=0; i<MAX_REF_FRAMES; i++) 3712 for(j=0; j<MAX_REF_FRAMES; j++) 3713 scale_mv_ref[i][j] = 256*(i+1)/(j+1); 3714 3715 s->avctx->get_buffer(s->avctx, &s->mconly_picture); 3716 s->scratchbuf = av_malloc(s->mconly_picture.linesize[0]*7*MB_SIZE); 3717 3718 return 0; 3719} 3720 3721static int common_init_after_header(AVCodecContext *avctx){ 3722 SnowContext *s = avctx->priv_data; 3723 int plane_index, level, orientation; 3724 3725 for(plane_index=0; plane_index<3; plane_index++){ 3726 int w= s->avctx->width; 3727 int h= s->avctx->height; 3728 3729 if(plane_index){ 3730 w>>= s->chroma_h_shift; 3731 h>>= s->chroma_v_shift; 3732 } 3733 s->plane[plane_index].width = w; 3734 s->plane[plane_index].height= h; 3735 3736 for(level=s->spatial_decomposition_count-1; level>=0; level--){ 3737 for(orientation=level ? 1 : 0; orientation<4; orientation++){ 3738 SubBand *b= &s->plane[plane_index].band[level][orientation]; 3739 3740 b->buf= s->spatial_dwt_buffer; 3741 b->level= level; 3742 b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level); 3743 b->width = (w + !(orientation&1))>>1; 3744 b->height= (h + !(orientation>1))>>1; 3745 3746 b->stride_line = 1 << (s->spatial_decomposition_count - level); 3747 b->buf_x_offset = 0; 3748 b->buf_y_offset = 0; 3749 3750 if(orientation&1){ 3751 b->buf += (w+1)>>1; 3752 b->buf_x_offset = (w+1)>>1; 3753 } 3754 if(orientation>1){ 3755 b->buf += b->stride>>1; 3756 b->buf_y_offset = b->stride_line >> 1; 3757 } 3758 b->ibuf= s->spatial_idwt_buffer + (b->buf - s->spatial_dwt_buffer); 3759 3760 if(level) 3761 b->parent= &s->plane[plane_index].band[level-1][orientation]; 3762 //FIXME avoid this realloc 3763 av_freep(&b->x_coeff); 3764 b->x_coeff=av_mallocz(((b->width+1) * b->height+1)*sizeof(x_and_coeff)); 3765 } 3766 w= (w+1)>>1; 3767 h= (h+1)>>1; 3768 } 3769 } 3770 3771 return 0; 3772} 3773 3774static int qscale2qlog(int qscale){ 3775 return rint(QROOT*log(qscale / (float)FF_QP2LAMBDA)/log(2)) 3776 + 61*QROOT/8; //<64 >60 3777} 3778 3779static int ratecontrol_1pass(SnowContext *s, AVFrame *pict) 3780{ 3781 /* Estimate the frame's complexity as a sum of weighted dwt coefficients. 3782 * FIXME we know exact mv bits at this point, 3783 * but ratecontrol isn't set up to include them. */ 3784 uint32_t coef_sum= 0; 3785 int level, orientation, delta_qlog; 3786 3787 for(level=0; level<s->spatial_decomposition_count; level++){ 3788 for(orientation=level ? 1 : 0; orientation<4; orientation++){ 3789 SubBand *b= &s->plane[0].band[level][orientation]; 3790 IDWTELEM *buf= b->ibuf; 3791 const int w= b->width; 3792 const int h= b->height; 3793 const int stride= b->stride; 3794 const int qlog= av_clip(2*QROOT + b->qlog, 0, QROOT*16); 3795 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT); 3796 const int qdiv= (1<<16)/qmul; 3797 int x, y; 3798 //FIXME this is ugly 3799 for(y=0; y<h; y++) 3800 for(x=0; x<w; x++) 3801 buf[x+y*stride]= b->buf[x+y*stride]; 3802 if(orientation==0) 3803 decorrelate(s, b, buf, stride, 1, 0); 3804 for(y=0; y<h; y++) 3805 for(x=0; x<w; x++) 3806 coef_sum+= abs(buf[x+y*stride]) * qdiv >> 16; 3807 } 3808 } 3809 3810 /* ugly, ratecontrol just takes a sqrt again */ 3811 coef_sum = (uint64_t)coef_sum * coef_sum >> 16; 3812 assert(coef_sum < INT_MAX); 3813 3814 if(pict->pict_type == FF_I_TYPE){ 3815 s->m.current_picture.mb_var_sum= coef_sum; 3816 s->m.current_picture.mc_mb_var_sum= 0; 3817 }else{ 3818 s->m.current_picture.mc_mb_var_sum= coef_sum; 3819 s->m.current_picture.mb_var_sum= 0; 3820 } 3821 3822 pict->quality= ff_rate_estimate_qscale(&s->m, 1); 3823 if (pict->quality < 0) 3824 return INT_MIN; 3825 s->lambda= pict->quality * 3/2; 3826 delta_qlog= qscale2qlog(pict->quality) - s->qlog; 3827 s->qlog+= delta_qlog; 3828 return delta_qlog; 3829} 3830 3831static void calculate_visual_weight(SnowContext *s, Plane *p){ 3832 int width = p->width; 3833 int height= p->height; 3834 int level, orientation, x, y; 3835 3836 for(level=0; level<s->spatial_decomposition_count; level++){ 3837 for(orientation=level ? 1 : 0; orientation<4; orientation++){ 3838 SubBand *b= &p->band[level][orientation]; 3839 IDWTELEM *ibuf= b->ibuf; 3840 int64_t error=0; 3841 3842 memset(s->spatial_idwt_buffer, 0, sizeof(*s->spatial_idwt_buffer)*width*height); 3843 ibuf[b->width/2 + b->height/2*b->stride]= 256*16; 3844 ff_spatial_idwt(s->spatial_idwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count); 3845 for(y=0; y<height; y++){ 3846 for(x=0; x<width; x++){ 3847 int64_t d= s->spatial_idwt_buffer[x + y*width]*16; 3848 error += d*d; 3849 } 3850 } 3851 3852 b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5); 3853 } 3854 } 3855} 3856 3857#define QUANTIZE2 0 3858 3859#if QUANTIZE2==1 3860#define Q2_STEP 8 3861 3862static void find_sse(SnowContext *s, Plane *p, int *score, int score_stride, IDWTELEM *r0, IDWTELEM *r1, int level, int orientation){ 3863 SubBand *b= &p->band[level][orientation]; 3864 int x, y; 3865 int xo=0; 3866 int yo=0; 3867 int step= 1 << (s->spatial_decomposition_count - level); 3868 3869 if(orientation&1) 3870 xo= step>>1; 3871 if(orientation&2) 3872 yo= step>>1; 3873 3874 //FIXME bias for nonzero ? 3875 //FIXME optimize 3876 memset(score, 0, sizeof(*score)*score_stride*((p->height + Q2_STEP-1)/Q2_STEP)); 3877 for(y=0; y<p->height; y++){ 3878 for(x=0; x<p->width; x++){ 3879 int sx= (x-xo + step/2) / step / Q2_STEP; 3880 int sy= (y-yo + step/2) / step / Q2_STEP; 3881 int v= r0[x + y*p->width] - r1[x + y*p->width]; 3882 assert(sx>=0 && sy>=0 && sx < score_stride); 3883 v= ((v+8)>>4)<<4; 3884 score[sx + sy*score_stride] += v*v; 3885 assert(score[sx + sy*score_stride] >= 0); 3886 } 3887 } 3888} 3889 3890static void dequantize_all(SnowContext *s, Plane *p, IDWTELEM *buffer, int width, int height){ 3891 int level, orientation; 3892 3893 for(level=0; level<s->spatial_decomposition_count; level++){ 3894 for(orientation=level ? 1 : 0; orientation<4; orientation++){ 3895 SubBand *b= &p->band[level][orientation]; 3896 IDWTELEM *dst= buffer + (b->ibuf - s->spatial_idwt_buffer); 3897 3898 dequantize(s, b, dst, b->stride); 3899 } 3900 } 3901} 3902 3903static void dwt_quantize(SnowContext *s, Plane *p, DWTELEM *buffer, int width, int height, int stride, int type){ 3904 int level, orientation, ys, xs, x, y, pass; 3905 IDWTELEM best_dequant[height * stride]; 3906 IDWTELEM idwt2_buffer[height * stride]; 3907 const int score_stride= (width + 10)/Q2_STEP; 3908 int best_score[(width + 10)/Q2_STEP * (height + 10)/Q2_STEP]; //FIXME size 3909 int score[(width + 10)/Q2_STEP * (height + 10)/Q2_STEP]; //FIXME size 3910 int threshold= (s->m.lambda * s->m.lambda) >> 6; 3911 3912 //FIXME pass the copy cleanly ? 3913 3914// memcpy(dwt_buffer, buffer, height * stride * sizeof(DWTELEM)); 3915 ff_spatial_dwt(buffer, width, height, stride, type, s->spatial_decomposition_count); 3916 3917 for(level=0; level<s->spatial_decomposition_count; level++){ 3918 for(orientation=level ? 1 : 0; orientation<4; orientation++){ 3919 SubBand *b= &p->band[level][orientation]; 3920 IDWTELEM *dst= best_dequant + (b->ibuf - s->spatial_idwt_buffer); 3921 DWTELEM *src= buffer + (b-> buf - s->spatial_dwt_buffer); 3922 assert(src == b->buf); // code does not depend on this but it is true currently 3923 3924 quantize(s, b, dst, src, b->stride, s->qbias); 3925 } 3926 } 3927 for(pass=0; pass<1; pass++){ 3928 if(s->qbias == 0) //keyframe 3929 continue; 3930 for(level=0; level<s->spatial_decomposition_count; level++){ 3931 for(orientation=level ? 1 : 0; orientation<4; orientation++){ 3932 SubBand *b= &p->band[level][orientation]; 3933 IDWTELEM *dst= idwt2_buffer + (b->ibuf - s->spatial_idwt_buffer); 3934 IDWTELEM *best_dst= best_dequant + (b->ibuf - s->spatial_idwt_buffer); 3935 3936 for(ys= 0; ys<Q2_STEP; ys++){ 3937 for(xs= 0; xs<Q2_STEP; xs++){ 3938 memcpy(idwt2_buffer, best_dequant, height * stride * sizeof(IDWTELEM)); 3939 dequantize_all(s, p, idwt2_buffer, width, height); 3940 ff_spatial_idwt(idwt2_buffer, width, height, stride, type, s->spatial_decomposition_count); 3941 find_sse(s, p, best_score, score_stride, idwt2_buffer, s->spatial_idwt_buffer, level, orientation); 3942 memcpy(idwt2_buffer, best_dequant, height * stride * sizeof(IDWTELEM)); 3943 for(y=ys; y<b->height; y+= Q2_STEP){ 3944 for(x=xs; x<b->width; x+= Q2_STEP){ 3945 if(dst[x + y*b->stride]<0) dst[x + y*b->stride]++; 3946 if(dst[x + y*b->stride]>0) dst[x + y*b->stride]--; 3947 //FIXME try more than just -- 3948 } 3949 } 3950 dequantize_all(s, p, idwt2_buffer, width, height); 3951 ff_spatial_idwt(idwt2_buffer, width, height, stride, type, s->spatial_decomposition_count); 3952 find_sse(s, p, score, score_stride, idwt2_buffer, s->spatial_idwt_buffer, level, orientation); 3953 for(y=ys; y<b->height; y+= Q2_STEP){ 3954 for(x=xs; x<b->width; x+= Q2_STEP){ 3955 int score_idx= x/Q2_STEP + (y/Q2_STEP)*score_stride; 3956 if(score[score_idx] <= best_score[score_idx] + threshold){ 3957 best_score[score_idx]= score[score_idx]; 3958 if(best_dst[x + y*b->stride]<0) best_dst[x + y*b->stride]++; 3959 if(best_dst[x + y*b->stride]>0) best_dst[x + y*b->stride]--; 3960 //FIXME copy instead 3961 } 3962 } 3963 } 3964 } 3965 } 3966 } 3967 } 3968 } 3969 memcpy(s->spatial_idwt_buffer, best_dequant, height * stride * sizeof(IDWTELEM)); //FIXME work with that directly instead of copy at the end 3970} 3971 3972#endif /* QUANTIZE2==1 */ 3973 3974static av_cold int encode_init(AVCodecContext *avctx) 3975{ 3976 SnowContext *s = avctx->priv_data; 3977 int plane_index; 3978 3979 if(avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL){ 3980 av_log(avctx, AV_LOG_ERROR, "This codec is under development, files encoded with it may not be decodable with future versions!!!\n" 3981 "Use vstrict=-2 / -strict -2 to use it anyway.\n"); 3982 return -1; 3983 } 3984 3985 if(avctx->prediction_method == DWT_97 3986 && (avctx->flags & CODEC_FLAG_QSCALE) 3987 && avctx->global_quality == 0){ 3988 av_log(avctx, AV_LOG_ERROR, "The 9/7 wavelet is incompatible with lossless mode.\n"); 3989 return -1; 3990 } 3991 3992 s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type 3993 3994 s->chroma_h_shift= 1; //FIXME XXX 3995 s->chroma_v_shift= 1; 3996 3997 s->mv_scale = (avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4; 3998 s->block_max_depth= (avctx->flags & CODEC_FLAG_4MV ) ? 1 : 0; 3999 4000 for(plane_index=0; plane_index<3; plane_index++){ 4001 s->plane[plane_index].diag_mc= 1; 4002 s->plane[plane_index].htaps= 6; 4003 s->plane[plane_index].hcoeff[0]= 40; 4004 s->plane[plane_index].hcoeff[1]= -10; 4005 s->plane[plane_index].hcoeff[2]= 2; 4006 s->plane[plane_index].fast_mc= 1; 4007 } 4008 4009 common_init(avctx); 4010 alloc_blocks(s); 4011 4012 s->version=0; 4013 4014 s->m.avctx = avctx; 4015 s->m.flags = avctx->flags; 4016 s->m.bit_rate= avctx->bit_rate; 4017 4018 s->m.me.temp = 4019 s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t)); 4020 s->m.me.map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t)); 4021 s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t)); 4022 s->m.obmc_scratchpad= av_mallocz(MB_SIZE*MB_SIZE*12*sizeof(uint32_t)); 4023 h263_encode_init(&s->m); //mv_penalty 4024 4025 s->max_ref_frames = FFMAX(FFMIN(avctx->refs, MAX_REF_FRAMES), 1); 4026 4027 if(avctx->flags&CODEC_FLAG_PASS1){ 4028 if(!avctx->stats_out) 4029 avctx->stats_out = av_mallocz(256); 4030 } 4031 if((avctx->flags&CODEC_FLAG_PASS2) || !(avctx->flags&CODEC_FLAG_QSCALE)){ 4032 if(ff_rate_control_init(&s->m) < 0) 4033 return -1; 4034 } 4035 s->pass1_rc= !(avctx->flags & (CODEC_FLAG_QSCALE|CODEC_FLAG_PASS2)); 4036 4037 avctx->coded_frame= &s->current_picture; 4038 switch(avctx->pix_fmt){ 4039// case PIX_FMT_YUV444P: 4040// case PIX_FMT_YUV422P: 4041 case PIX_FMT_YUV420P: 4042 case PIX_FMT_GRAY8: 4043// case PIX_FMT_YUV411P: 4044// case PIX_FMT_YUV410P: 4045 s->colorspace_type= 0; 4046 break; 4047/* case PIX_FMT_RGB32: 4048 s->colorspace= 1; 4049 break;*/ 4050 default: 4051 av_log(avctx, AV_LOG_ERROR, "pixel format not supported\n"); 4052 return -1; 4053 } 4054// avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift); 4055 s->chroma_h_shift= 1; 4056 s->chroma_v_shift= 1; 4057 4058 ff_set_cmp(&s->dsp, s->dsp.me_cmp, s->avctx->me_cmp); 4059 ff_set_cmp(&s->dsp, s->dsp.me_sub_cmp, s->avctx->me_sub_cmp); 4060 4061 s->avctx->get_buffer(s->avctx, &s->input_picture); 4062 4063 if(s->avctx->me_method == ME_ITER){ 4064 int i; 4065 int size= s->b_width * s->b_height << 2*s->block_max_depth; 4066 for(i=0; i<s->max_ref_frames; i++){ 4067 s->ref_mvs[i]= av_mallocz(size*sizeof(int16_t[2])); 4068 s->ref_scores[i]= av_mallocz(size*sizeof(uint32_t)); 4069 } 4070 } 4071 4072 return 0; 4073} 4074 4075#define USE_HALFPEL_PLANE 0 4076 4077static void halfpel_interpol(SnowContext *s, uint8_t *halfpel[4][4], AVFrame *frame){ 4078 int p,x,y; 4079 4080 assert(!(s->avctx->flags & CODEC_FLAG_EMU_EDGE)); 4081 4082 for(p=0; p<3; p++){ 4083 int is_chroma= !!p; 4084 int w= s->avctx->width >>is_chroma; 4085 int h= s->avctx->height >>is_chroma; 4086 int ls= frame->linesize[p]; 4087 uint8_t *src= frame->data[p]; 4088 4089 halfpel[1][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls); 4090 halfpel[2][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls); 4091 halfpel[3][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls); 4092 4093 halfpel[0][p]= src; 4094 for(y=0; y<h; y++){ 4095 for(x=0; x<w; x++){ 4096 int i= y*ls + x; 4097 4098 halfpel[1][p][i]= (20*(src[i] + src[i+1]) - 5*(src[i-1] + src[i+2]) + (src[i-2] + src[i+3]) + 16 )>>5; 4099 } 4100 } 4101 for(y=0; y<h; y++){ 4102 for(x=0; x<w; x++){ 4103 int i= y*ls + x; 4104 4105 halfpel[2][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5; 4106 } 4107 } 4108 src= halfpel[1][p]; 4109 for(y=0; y<h; y++){ 4110 for(x=0; x<w; x++){ 4111 int i= y*ls + x; 4112 4113 halfpel[3][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5; 4114 } 4115 } 4116 4117//FIXME border! 4118 } 4119} 4120 4121static int frame_start(SnowContext *s){ 4122 AVFrame tmp; 4123 int w= s->avctx->width; //FIXME round up to x16 ? 4124 int h= s->avctx->height; 4125 4126 if(s->current_picture.data[0]){ 4127 s->dsp.draw_edges(s->current_picture.data[0], s->current_picture.linesize[0], w , h , EDGE_WIDTH ); 4128 s->dsp.draw_edges(s->current_picture.data[1], s->current_picture.linesize[1], w>>1, h>>1, EDGE_WIDTH/2); 4129 s->dsp.draw_edges(s->current_picture.data[2], s->current_picture.linesize[2], w>>1, h>>1, EDGE_WIDTH/2); 4130 } 4131 4132 tmp= s->last_picture[s->max_ref_frames-1]; 4133 memmove(s->last_picture+1, s->last_picture, (s->max_ref_frames-1)*sizeof(AVFrame)); 4134 memmove(s->halfpel_plane+1, s->halfpel_plane, (s->max_ref_frames-1)*sizeof(void*)*4*4); 4135 if(USE_HALFPEL_PLANE && s->current_picture.data[0]) 4136 halfpel_interpol(s, s->halfpel_plane[0], &s->current_picture); 4137 s->last_picture[0]= s->current_picture; 4138 s->current_picture= tmp; 4139 4140 if(s->keyframe){ 4141 s->ref_frames= 0; 4142 }else{ 4143 int i; 4144 for(i=0; i<s->max_ref_frames && s->last_picture[i].data[0]; i++) 4145 if(i && s->last_picture[i-1].key_frame) 4146 break; 4147 s->ref_frames= i; 4148 } 4149 4150 s->current_picture.reference= 1; 4151 if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){ 4152 av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n"); 4153 return -1; 4154 } 4155 4156 s->current_picture.key_frame= s->keyframe; 4157 4158 return 0; 4159} 4160 4161static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){ 4162 SnowContext *s = avctx->priv_data; 4163 RangeCoder * const c= &s->c; 4164 AVFrame *pict = data; 4165 const int width= s->avctx->width; 4166 const int height= s->avctx->height; 4167 int level, orientation, plane_index, i, y; 4168 uint8_t rc_header_bak[sizeof(s->header_state)]; 4169 uint8_t rc_block_bak[sizeof(s->block_state)]; 4170 4171 ff_init_range_encoder(c, buf, buf_size); 4172 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8); 4173 4174 for(i=0; i<3; i++){ 4175 int shift= !!i; 4176 for(y=0; y<(height>>shift); y++) 4177 memcpy(&s->input_picture.data[i][y * s->input_picture.linesize[i]], 4178 &pict->data[i][y * pict->linesize[i]], 4179 width>>shift); 4180 } 4181 s->new_picture = *pict; 4182 4183 s->m.picture_number= avctx->frame_number; 4184 if(avctx->flags&CODEC_FLAG_PASS2){ 4185 s->m.pict_type = 4186 pict->pict_type= s->m.rc_context.entry[avctx->frame_number].new_pict_type; 4187 s->keyframe= pict->pict_type==FF_I_TYPE; 4188 if(!(avctx->flags&CODEC_FLAG_QSCALE)) { 4189 pict->quality= ff_rate_estimate_qscale(&s->m, 0); 4190 if (pict->quality < 0) 4191 return -1; 4192 } 4193 }else{ 4194 s->keyframe= avctx->gop_size==0 || avctx->frame_number % avctx->gop_size == 0; 4195 s->m.pict_type= 4196 pict->pict_type= s->keyframe ? FF_I_TYPE : FF_P_TYPE; 4197 } 4198 4199 if(s->pass1_rc && avctx->frame_number == 0) 4200 pict->quality= 2*FF_QP2LAMBDA; 4201 if(pict->quality){ 4202 s->qlog= qscale2qlog(pict->quality); 4203 s->lambda = pict->quality * 3/2; 4204 } 4205 if(s->qlog < 0 || (!pict->quality && (avctx->flags & CODEC_FLAG_QSCALE))){ 4206 s->qlog= LOSSLESS_QLOG; 4207 s->lambda = 0; 4208 }//else keep previous frame's qlog until after motion estimation 4209 4210 frame_start(s); 4211 4212 s->m.current_picture_ptr= &s->m.current_picture; 4213 if(pict->pict_type == FF_P_TYPE){ 4214 int block_width = (width +15)>>4; 4215 int block_height= (height+15)>>4; 4216 int stride= s->current_picture.linesize[0]; 4217 4218 assert(s->current_picture.data[0]); 4219 assert(s->last_picture[0].data[0]); 4220 4221 s->m.avctx= s->avctx; 4222 s->m.current_picture.data[0]= s->current_picture.data[0]; 4223 s->m. last_picture.data[0]= s->last_picture[0].data[0]; 4224 s->m. new_picture.data[0]= s-> input_picture.data[0]; 4225 s->m. last_picture_ptr= &s->m. last_picture; 4226 s->m.linesize= 4227 s->m. last_picture.linesize[0]= 4228 s->m. new_picture.linesize[0]= 4229 s->m.current_picture.linesize[0]= stride; 4230 s->m.uvlinesize= s->current_picture.linesize[1]; 4231 s->m.width = width; 4232 s->m.height= height; 4233 s->m.mb_width = block_width; 4234 s->m.mb_height= block_height; 4235 s->m.mb_stride= s->m.mb_width+1; 4236 s->m.b8_stride= 2*s->m.mb_width+1; 4237 s->m.f_code=1; 4238 s->m.pict_type= pict->pict_type; 4239 s->m.me_method= s->avctx->me_method; 4240 s->m.me.scene_change_score=0; 4241 s->m.flags= s->avctx->flags; 4242 s->m.quarter_sample= (s->avctx->flags & CODEC_FLAG_QPEL)!=0; 4243 s->m.out_format= FMT_H263; 4244 s->m.unrestricted_mv= 1; 4245 4246 s->m.lambda = s->lambda; 4247 s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7); 4248 s->lambda2= s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT; 4249 4250 s->m.dsp= s->dsp; //move 4251 ff_init_me(&s->m); 4252 s->dsp= s->m.dsp; 4253 } 4254 4255 if(s->pass1_rc){ 4256 memcpy(rc_header_bak, s->header_state, sizeof(s->header_state)); 4257 memcpy(rc_block_bak, s->block_state, sizeof(s->block_state)); 4258 } 4259 4260redo_frame: 4261 4262 if(pict->pict_type == FF_I_TYPE) 4263 s->spatial_decomposition_count= 5; 4264 else 4265 s->spatial_decomposition_count= 5; 4266 4267 s->m.pict_type = pict->pict_type; 4268 s->qbias= pict->pict_type == FF_P_TYPE ? 2 : 0; 4269 4270 common_init_after_header(avctx); 4271 4272 if(s->last_spatial_decomposition_count != s->spatial_decomposition_count){ 4273 for(plane_index=0; plane_index<3; plane_index++){ 4274 calculate_visual_weight(s, &s->plane[plane_index]); 4275 } 4276 } 4277 4278 encode_header(s); 4279 s->m.misc_bits = 8*(s->c.bytestream - s->c.bytestream_start); 4280 encode_blocks(s, 1); 4281 s->m.mv_bits = 8*(s->c.bytestream - s->c.bytestream_start) - s->m.misc_bits; 4282 4283 for(plane_index=0; plane_index<3; plane_index++){ 4284 Plane *p= &s->plane[plane_index]; 4285 int w= p->width; 4286 int h= p->height; 4287 int x, y; 4288// int bits= put_bits_count(&s->c.pb); 4289 4290 if(!(avctx->flags2 & CODEC_FLAG2_MEMC_ONLY)){ 4291 //FIXME optimize 4292 if(pict->data[plane_index]) //FIXME gray hack 4293 for(y=0; y<h; y++){ 4294 for(x=0; x<w; x++){ 4295 s->spatial_idwt_buffer[y*w + x]= pict->data[plane_index][y*pict->linesize[plane_index] + x]<<FRAC_BITS; 4296 } 4297 } 4298 predict_plane(s, s->spatial_idwt_buffer, plane_index, 0); 4299 4300 if( plane_index==0 4301 && pict->pict_type == FF_P_TYPE 4302 && !(avctx->flags&CODEC_FLAG_PASS2) 4303 && s->m.me.scene_change_score > s->avctx->scenechange_threshold){ 4304 ff_init_range_encoder(c, buf, buf_size); 4305 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8); 4306 pict->pict_type= FF_I_TYPE; 4307 s->keyframe=1; 4308 s->current_picture.key_frame=1; 4309 goto redo_frame; 4310 } 4311 4312 if(s->qlog == LOSSLESS_QLOG){ 4313 for(y=0; y<h; y++){ 4314 for(x=0; x<w; x++){ 4315 s->spatial_dwt_buffer[y*w + x]= (s->spatial_idwt_buffer[y*w + x] + (1<<(FRAC_BITS-1))-1)>>FRAC_BITS; 4316 } 4317 } 4318 }else{ 4319 for(y=0; y<h; y++){ 4320 for(x=0; x<w; x++){ 4321 s->spatial_dwt_buffer[y*w + x]=s->spatial_idwt_buffer[y*w + x]<<ENCODER_EXTRA_BITS; 4322 } 4323 } 4324 } 4325 4326 /* if(QUANTIZE2) 4327 dwt_quantize(s, p, s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type); 4328 else*/ 4329 ff_spatial_dwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count); 4330 4331 if(s->pass1_rc && plane_index==0){ 4332 int delta_qlog = ratecontrol_1pass(s, pict); 4333 if (delta_qlog <= INT_MIN) 4334 return -1; 4335 if(delta_qlog){ 4336 //reordering qlog in the bitstream would eliminate this reset 4337 ff_init_range_encoder(c, buf, buf_size); 4338 memcpy(s->header_state, rc_header_bak, sizeof(s->header_state)); 4339 memcpy(s->block_state, rc_block_bak, sizeof(s->block_state)); 4340 encode_header(s); 4341 encode_blocks(s, 0); 4342 } 4343 } 4344 4345 for(level=0; level<s->spatial_decomposition_count; level++){ 4346 for(orientation=level ? 1 : 0; orientation<4; orientation++){ 4347 SubBand *b= &p->band[level][orientation]; 4348 4349 if(!QUANTIZE2) 4350 quantize(s, b, b->ibuf, b->buf, b->stride, s->qbias); 4351 if(orientation==0) 4352 decorrelate(s, b, b->ibuf, b->stride, pict->pict_type == FF_P_TYPE, 0); 4353 encode_subband(s, b, b->ibuf, b->parent ? b->parent->ibuf : NULL, b->stride, orientation); 4354 assert(b->parent==NULL || b->parent->stride == b->stride*2); 4355 if(orientation==0) 4356 correlate(s, b, b->ibuf, b->stride, 1, 0); 4357 } 4358 } 4359 4360 for(level=0; level<s->spatial_decomposition_count; level++){ 4361 for(orientation=level ? 1 : 0; orientation<4; orientation++){ 4362 SubBand *b= &p->band[level][orientation]; 4363 4364 dequantize(s, b, b->ibuf, b->stride); 4365 } 4366 } 4367 4368 ff_spatial_idwt(s->spatial_idwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count); 4369 if(s->qlog == LOSSLESS_QLOG){ 4370 for(y=0; y<h; y++){ 4371 for(x=0; x<w; x++){ 4372 s->spatial_idwt_buffer[y*w + x]<<=FRAC_BITS; 4373 } 4374 } 4375 } 4376 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1); 4377 }else{ 4378 //ME/MC only 4379 if(pict->pict_type == FF_I_TYPE){ 4380 for(y=0; y<h; y++){ 4381 for(x=0; x<w; x++){ 4382 s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x]= 4383 pict->data[plane_index][y*pict->linesize[plane_index] + x]; 4384 } 4385 } 4386 }else{ 4387 memset(s->spatial_idwt_buffer, 0, sizeof(IDWTELEM)*w*h); 4388 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1); 4389 } 4390 } 4391 if(s->avctx->flags&CODEC_FLAG_PSNR){ 4392 int64_t error= 0; 4393 4394 if(pict->data[plane_index]) //FIXME gray hack 4395 for(y=0; y<h; y++){ 4396 for(x=0; x<w; x++){ 4397 int d= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x] - pict->data[plane_index][y*pict->linesize[plane_index] + x]; 4398 error += d*d; 4399 } 4400 } 4401 s->avctx->error[plane_index] += error; 4402 s->current_picture.error[plane_index] = error; 4403 } 4404 4405 } 4406 4407 update_last_header_values(s); 4408 4409 if(s->last_picture[s->max_ref_frames-1].data[0]){ 4410 avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]); 4411 for(i=0; i<9; i++) 4412 if(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3]) 4413 av_free(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] - EDGE_WIDTH*(1+s->current_picture.linesize[i%3])); 4414 } 4415 4416 s->current_picture.coded_picture_number = avctx->frame_number; 4417 s->current_picture.pict_type = pict->pict_type; 4418 s->current_picture.quality = pict->quality; 4419 s->m.frame_bits = 8*(s->c.bytestream - s->c.bytestream_start); 4420 s->m.p_tex_bits = s->m.frame_bits - s->m.misc_bits - s->m.mv_bits; 4421 s->m.current_picture.display_picture_number = 4422 s->m.current_picture.coded_picture_number = avctx->frame_number; 4423 s->m.current_picture.quality = pict->quality; 4424 s->m.total_bits += 8*(s->c.bytestream - s->c.bytestream_start); 4425 if(s->pass1_rc) 4426 if (ff_rate_estimate_qscale(&s->m, 0) < 0) 4427 return -1; 4428 if(avctx->flags&CODEC_FLAG_PASS1) 4429 ff_write_pass1_stats(&s->m); 4430 s->m.last_pict_type = s->m.pict_type; 4431 avctx->frame_bits = s->m.frame_bits; 4432 avctx->mv_bits = s->m.mv_bits; 4433 avctx->misc_bits = s->m.misc_bits; 4434 avctx->p_tex_bits = s->m.p_tex_bits; 4435 4436 emms_c(); 4437 4438 return ff_rac_terminate(c); 4439} 4440 4441static av_cold void common_end(SnowContext *s){ 4442 int plane_index, level, orientation, i; 4443 4444 av_freep(&s->spatial_dwt_buffer); 4445 av_freep(&s->spatial_idwt_buffer); 4446 4447 s->m.me.temp= NULL; 4448 av_freep(&s->m.me.scratchpad); 4449 av_freep(&s->m.me.map); 4450 av_freep(&s->m.me.score_map); 4451 av_freep(&s->m.obmc_scratchpad); 4452 4453 av_freep(&s->block); 4454 av_freep(&s->scratchbuf); 4455 4456 for(i=0; i<MAX_REF_FRAMES; i++){ 4457 av_freep(&s->ref_mvs[i]); 4458 av_freep(&s->ref_scores[i]); 4459 if(s->last_picture[i].data[0]) 4460 s->avctx->release_buffer(s->avctx, &s->last_picture[i]); 4461 } 4462 4463 for(plane_index=0; plane_index<3; plane_index++){ 4464 for(level=s->spatial_decomposition_count-1; level>=0; level--){ 4465 for(orientation=level ? 1 : 0; orientation<4; orientation++){ 4466 SubBand *b= &s->plane[plane_index].band[level][orientation]; 4467 4468 av_freep(&b->x_coeff); 4469 } 4470 } 4471 } 4472} 4473 4474static av_cold int encode_end(AVCodecContext *avctx) 4475{ 4476 SnowContext *s = avctx->priv_data; 4477 4478 common_end(s); 4479 av_free(avctx->stats_out); 4480 4481 return 0; 4482} 4483 4484static av_cold int decode_init(AVCodecContext *avctx) 4485{ 4486 avctx->pix_fmt= PIX_FMT_YUV420P; 4487 4488 common_init(avctx); 4489 4490 return 0; 4491} 4492 4493static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, const uint8_t *buf, int buf_size){ 4494 SnowContext *s = avctx->priv_data; 4495 RangeCoder * const c= &s->c; 4496 int bytes_read; 4497 AVFrame *picture = data; 4498 int level, orientation, plane_index, i; 4499 4500 ff_init_range_decoder(c, buf, buf_size); 4501 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8); 4502 4503 s->current_picture.pict_type= FF_I_TYPE; //FIXME I vs. P 4504 if(decode_header(s)<0) 4505 return -1; 4506 common_init_after_header(avctx); 4507 4508 // realloc slice buffer for the case that spatial_decomposition_count changed 4509 slice_buffer_destroy(&s->sb); 4510 slice_buffer_init(&s->sb, s->plane[0].height, (MB_SIZE >> s->block_max_depth) + s->spatial_decomposition_count * 8 + 1, s->plane[0].width, s->spatial_idwt_buffer); 4511 4512 for(plane_index=0; plane_index<3; plane_index++){ 4513 Plane *p= &s->plane[plane_index]; 4514 p->fast_mc= p->diag_mc && p->htaps==6 && p->hcoeff[0]==40 4515 && p->hcoeff[1]==-10 4516 && p->hcoeff[2]==2; 4517 } 4518 4519 alloc_blocks(s); 4520 4521 frame_start(s); 4522 //keyframe flag duplication mess FIXME 4523 if(avctx->debug&FF_DEBUG_PICT_INFO) 4524 av_log(avctx, AV_LOG_ERROR, "keyframe:%d qlog:%d\n", s->keyframe, s->qlog); 4525 4526 decode_blocks(s); 4527 4528 for(plane_index=0; plane_index<3; plane_index++){ 4529 Plane *p= &s->plane[plane_index]; 4530 int w= p->width; 4531 int h= p->height; 4532 int x, y; 4533 int decode_state[MAX_DECOMPOSITIONS][4][1]; /* Stored state info for unpack_coeffs. 1 variable per instance. */ 4534 4535 if(s->avctx->debug&2048){ 4536 memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h); 4537 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1); 4538 4539 for(y=0; y<h; y++){ 4540 for(x=0; x<w; x++){ 4541 int v= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x]; 4542 s->mconly_picture.data[plane_index][y*s->mconly_picture.linesize[plane_index] + x]= v; 4543 } 4544 } 4545 } 4546 4547 { 4548 for(level=0; level<s->spatial_decomposition_count; level++){ 4549 for(orientation=level ? 1 : 0; orientation<4; orientation++){ 4550 SubBand *b= &p->band[level][orientation]; 4551 unpack_coeffs(s, b, b->parent, orientation); 4552 } 4553 } 4554 } 4555 4556 { 4557 const int mb_h= s->b_height << s->block_max_depth; 4558 const int block_size = MB_SIZE >> s->block_max_depth; 4559 const int block_w = plane_index ? block_size/2 : block_size; 4560 int mb_y; 4561 DWTCompose cs[MAX_DECOMPOSITIONS]; 4562 int yd=0, yq=0; 4563 int y; 4564 int end_y; 4565 4566 ff_spatial_idwt_buffered_init(cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count); 4567 for(mb_y=0; mb_y<=mb_h; mb_y++){ 4568 4569 int slice_starty = block_w*mb_y; 4570 int slice_h = block_w*(mb_y+1); 4571 if (!(s->keyframe || s->avctx->debug&512)){ 4572 slice_starty = FFMAX(0, slice_starty - (block_w >> 1)); 4573 slice_h -= (block_w >> 1); 4574 } 4575 4576 for(level=0; level<s->spatial_decomposition_count; level++){ 4577 for(orientation=level ? 1 : 0; orientation<4; orientation++){ 4578 SubBand *b= &p->band[level][orientation]; 4579 int start_y; 4580 int end_y; 4581 int our_mb_start = mb_y; 4582 int our_mb_end = (mb_y + 1); 4583 const int extra= 3; 4584 start_y = (mb_y ? ((block_w * our_mb_start) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra: 0); 4585 end_y = (((block_w * our_mb_end) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra); 4586 if (!(s->keyframe || s->avctx->debug&512)){ 4587 start_y = FFMAX(0, start_y - (block_w >> (1+s->spatial_decomposition_count - level))); 4588 end_y = FFMAX(0, end_y - (block_w >> (1+s->spatial_decomposition_count - level))); 4589 } 4590 start_y = FFMIN(b->height, start_y); 4591 end_y = FFMIN(b->height, end_y); 4592 4593 if (start_y != end_y){ 4594 if (orientation == 0){ 4595 SubBand * correlate_band = &p->band[0][0]; 4596 int correlate_end_y = FFMIN(b->height, end_y + 1); 4597 int correlate_start_y = FFMIN(b->height, (start_y ? start_y + 1 : 0)); 4598 decode_subband_slice_buffered(s, correlate_band, &s->sb, correlate_start_y, correlate_end_y, decode_state[0][0]); 4599 correlate_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, 1, 0, correlate_start_y, correlate_end_y); 4600 dequantize_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, start_y, end_y); 4601 } 4602 else 4603 decode_subband_slice_buffered(s, b, &s->sb, start_y, end_y, decode_state[level][orientation]); 4604 } 4605 } 4606 } 4607 4608 for(; yd<slice_h; yd+=4){ 4609 ff_spatial_idwt_buffered_slice(&s->dsp, cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count, yd); 4610 } 4611 4612 if(s->qlog == LOSSLESS_QLOG){ 4613 for(; yq<slice_h && yq<h; yq++){ 4614 IDWTELEM * line = slice_buffer_get_line(&s->sb, yq); 4615 for(x=0; x<w; x++){ 4616 line[x] <<= FRAC_BITS; 4617 } 4618 } 4619 } 4620 4621 predict_slice_buffered(s, &s->sb, s->spatial_idwt_buffer, plane_index, 1, mb_y); 4622 4623 y = FFMIN(p->height, slice_starty); 4624 end_y = FFMIN(p->height, slice_h); 4625 while(y < end_y) 4626 slice_buffer_release(&s->sb, y++); 4627 } 4628 4629 slice_buffer_flush(&s->sb); 4630 } 4631 4632 } 4633 4634 emms_c(); 4635 4636 if(s->last_picture[s->max_ref_frames-1].data[0]){ 4637 avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]); 4638 for(i=0; i<9; i++) 4639 if(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3]) 4640 av_free(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] - EDGE_WIDTH*(1+s->current_picture.linesize[i%3])); 4641 } 4642 4643 if(!(s->avctx->debug&2048)) 4644 *picture= s->current_picture; 4645 else 4646 *picture= s->mconly_picture; 4647 4648 *data_size = sizeof(AVFrame); 4649 4650 bytes_read= c->bytestream - c->bytestream_start; 4651 if(bytes_read ==0) av_log(s->avctx, AV_LOG_ERROR, "error at end of frame\n"); //FIXME 4652 4653 return bytes_read; 4654} 4655 4656static av_cold int decode_end(AVCodecContext *avctx) 4657{ 4658 SnowContext *s = avctx->priv_data; 4659 4660 slice_buffer_destroy(&s->sb); 4661 4662 common_end(s); 4663 4664 return 0; 4665} 4666 4667AVCodec snow_decoder = { 4668 "snow", 4669 CODEC_TYPE_VIDEO, 4670 CODEC_ID_SNOW, 4671 sizeof(SnowContext), 4672 decode_init, 4673 NULL, 4674 decode_end, 4675 decode_frame, 4676 0 /*CODEC_CAP_DR1*/ /*| CODEC_CAP_DRAW_HORIZ_BAND*/, 4677 NULL, 4678 .long_name = NULL_IF_CONFIG_SMALL("Snow"), 4679}; 4680 4681#if CONFIG_SNOW_ENCODER 4682AVCodec snow_encoder = { 4683 "snow", 4684 CODEC_TYPE_VIDEO, 4685 CODEC_ID_SNOW, 4686 sizeof(SnowContext), 4687 encode_init, 4688 encode_frame, 4689 encode_end, 4690 .long_name = NULL_IF_CONFIG_SMALL("Snow"), 4691}; 4692#endif 4693 4694 4695#ifdef TEST 4696#undef malloc 4697#undef free 4698#undef printf 4699#undef random 4700 4701int main(void){ 4702 int width=256; 4703 int height=256; 4704 int buffer[2][width*height]; 4705 SnowContext s; 4706 int i; 4707 s.spatial_decomposition_count=6; 4708 s.spatial_decomposition_type=1; 4709 4710 printf("testing 5/3 DWT\n"); 4711 for(i=0; i<width*height; i++) 4712 buffer[0][i]= buffer[1][i]= random()%54321 - 12345; 4713 4714 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count); 4715 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count); 4716 4717 for(i=0; i<width*height; i++) 4718 if(buffer[0][i]!= buffer[1][i]) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]); 4719 4720 printf("testing 9/7 DWT\n"); 4721 s.spatial_decomposition_type=0; 4722 for(i=0; i<width*height; i++) 4723 buffer[0][i]= buffer[1][i]= random()%54321 - 12345; 4724 4725 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count); 4726 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count); 4727 4728 for(i=0; i<width*height; i++) 4729 if(FFABS(buffer[0][i] - buffer[1][i])>20) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]); 4730 4731#if 0 4732 printf("testing AC coder\n"); 4733 memset(s.header_state, 0, sizeof(s.header_state)); 4734 ff_init_range_encoder(&s.c, buffer[0], 256*256); 4735 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64); 4736 4737 for(i=-256; i<256; i++){ 4738 put_symbol(&s.c, s.header_state, i*i*i/3*FFABS(i), 1); 4739 } 4740 ff_rac_terminate(&s.c); 4741 4742 memset(s.header_state, 0, sizeof(s.header_state)); 4743 ff_init_range_decoder(&s.c, buffer[0], 256*256); 4744 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64); 4745 4746 for(i=-256; i<256; i++){ 4747 int j; 4748 j= get_symbol(&s.c, s.header_state, 1); 4749 if(j!=i*i*i/3*FFABS(i)) printf("fsck: %d != %d\n", i, j); 4750 } 4751#endif 4752 { 4753 int level, orientation, x, y; 4754 int64_t errors[8][4]; 4755 int64_t g=0; 4756 4757 memset(errors, 0, sizeof(errors)); 4758 s.spatial_decomposition_count=3; 4759 s.spatial_decomposition_type=0; 4760 for(level=0; level<s.spatial_decomposition_count; level++){ 4761 for(orientation=level ? 1 : 0; orientation<4; orientation++){ 4762 int w= width >> (s.spatial_decomposition_count-level); 4763 int h= height >> (s.spatial_decomposition_count-level); 4764 int stride= width << (s.spatial_decomposition_count-level); 4765 DWTELEM *buf= buffer[0]; 4766 int64_t error=0; 4767 4768 if(orientation&1) buf+=w; 4769 if(orientation>1) buf+=stride>>1; 4770 4771 memset(buffer[0], 0, sizeof(int)*width*height); 4772 buf[w/2 + h/2*stride]= 256*256; 4773 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count); 4774 for(y=0; y<height; y++){ 4775 for(x=0; x<width; x++){ 4776 int64_t d= buffer[0][x + y*width]; 4777 error += d*d; 4778 if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9 && level==2) printf("%8"PRId64" ", d); 4779 } 4780 if(FFABS(height/2-y)<9 && level==2) printf("\n"); 4781 } 4782 error= (int)(sqrt(error)+0.5); 4783 errors[level][orientation]= error; 4784 if(g) g=av_gcd(g, error); 4785 else g= error; 4786 } 4787 } 4788 printf("static int const visual_weight[][4]={\n"); 4789 for(level=0; level<s.spatial_decomposition_count; level++){ 4790 printf(" {"); 4791 for(orientation=0; orientation<4; orientation++){ 4792 printf("%8"PRId64",", errors[level][orientation]/g); 4793 } 4794 printf("},\n"); 4795 } 4796 printf("};\n"); 4797 { 4798 int level=2; 4799 int w= width >> (s.spatial_decomposition_count-level); 4800 //int h= height >> (s.spatial_decomposition_count-level); 4801 int stride= width << (s.spatial_decomposition_count-level); 4802 DWTELEM *buf= buffer[0]; 4803 int64_t error=0; 4804 4805 buf+=w; 4806 buf+=stride>>1; 4807 4808 memset(buffer[0], 0, sizeof(int)*width*height); 4809#if 1 4810 for(y=0; y<height; y++){ 4811 for(x=0; x<width; x++){ 4812 int tab[4]={0,2,3,1}; 4813 buffer[0][x+width*y]= 256*256*tab[(x&1) + 2*(y&1)]; 4814 } 4815 } 4816 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count); 4817#else 4818 for(y=0; y<h; y++){ 4819 for(x=0; x<w; x++){ 4820 buf[x + y*stride ]=169; 4821 buf[x + y*stride-w]=64; 4822 } 4823 } 4824 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count); 4825#endif 4826 for(y=0; y<height; y++){ 4827 for(x=0; x<width; x++){ 4828 int64_t d= buffer[0][x + y*width]; 4829 error += d*d; 4830 if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9) printf("%8"PRId64" ", d); 4831 } 4832 if(FFABS(height/2-y)<9) printf("\n"); 4833 } 4834 } 4835 4836 } 4837 return 0; 4838} 4839#endif /* TEST */ 4840