1/* 2 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at> 3 * 4 * This file is part of FFmpeg. 5 * 6 * FFmpeg is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * FFmpeg is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with FFmpeg; if not, write to the Free Software 18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 19 */ 20 21#include "libavutil/intmath.h" 22#include "avcodec.h" 23#include "dsputil.h" 24#include "dwt.h" 25#include "snow.h" 26 27#include "rangecoder.h" 28#include "mathops.h" 29 30#include "mpegvideo.h" 31#include "h263.h" 32 33#undef NDEBUG 34#include <assert.h> 35 36static const int8_t quant3[256]={ 37 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 38 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 39 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 40 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 41 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 42 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 43 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 44 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 45-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 46-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 47-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 48-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 49-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 50-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 51-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 52-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0, 53}; 54static const int8_t quant3b[256]={ 55 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 56 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 57 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 58 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 59 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 60 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 61 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 62 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 63-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 64-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 65-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 66-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 67-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 68-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 69-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 70-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 71}; 72static const int8_t quant3bA[256]={ 73 0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 74 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 75 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 76 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 77 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 78 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 79 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 80 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 81 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 82 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 83 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 84 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 85 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 86 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 87 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 88 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 89}; 90static const int8_t quant5[256]={ 91 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 92 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 93 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 94 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 95 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 96 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 97 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 98 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 99-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, 100-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, 101-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, 102-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, 103-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, 104-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, 105-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, 106-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1, 107}; 108static const int8_t quant7[256]={ 109 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 110 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 111 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 112 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 113 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 114 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 115 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 116 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 117-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3, 118-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3, 119-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3, 120-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3, 121-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3, 122-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2, 123-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, 124-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1, 125}; 126static const int8_t quant9[256]={ 127 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 128 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 129 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 130 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 131 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 132 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 133 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 134 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 135-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, 136-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, 137-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, 138-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, 139-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, 140-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, 141-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3, 142-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1, 143}; 144static const int8_t quant11[256]={ 145 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 146 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 147 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 148 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 149 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 150 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 151 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 152 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 153-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5, 154-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5, 155-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5, 156-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5, 157-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5, 158-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4, 159-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, 160-4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1, 161}; 162static const int8_t quant13[256]={ 163 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 164 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 165 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 166 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 167 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 168 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 169 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 170 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 171-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6, 172-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6, 173-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6, 174-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6, 175-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5, 176-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5, 177-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5, 178-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1, 179}; 180 181#if 0 //64*cubic 182static const uint8_t obmc32[1024]={ 183 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 184 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 185 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0, 186 0, 0, 4, 4, 8, 8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12, 8, 8, 4, 4, 0, 0, 187 0, 0, 4, 8, 8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12, 8, 8, 4, 0, 0, 188 0, 4, 4, 8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12, 8, 4, 4, 0, 189 0, 4, 4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12, 4, 4, 0, 190 0, 4, 8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16, 8, 4, 0, 191 0, 4, 8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16, 8, 4, 0, 192 0, 4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12, 4, 0, 193 0, 4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12, 4, 0, 194 0, 4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12, 4, 0, 195 0, 4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16, 4, 0, 196 0, 8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16, 8, 0, 197 0, 4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16, 4, 0, 198 1, 8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16, 8, 1, 199 1, 8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16, 8, 1, 200 0, 4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16, 4, 0, 201 0, 8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16, 8, 0, 202 0, 4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16, 4, 0, 203 0, 4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12, 4, 0, 204 0, 4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12, 4, 0, 205 0, 4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12, 4, 0, 206 0, 4, 8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16, 8, 4, 0, 207 0, 4, 8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16, 8, 4, 0, 208 0, 4, 4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12, 4, 4, 0, 209 0, 4, 4, 8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12, 8, 4, 4, 0, 210 0, 0, 4, 8, 8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12, 8, 8, 4, 0, 0, 211 0, 0, 4, 4, 8, 8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12, 8, 8, 4, 4, 0, 0, 212 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0, 213 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 214 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 215//error:0.000022 216}; 217static const uint8_t obmc16[256]={ 218 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 219 0, 4, 4, 8, 16, 20, 20, 24, 24, 20, 20, 16, 8, 4, 4, 0, 220 0, 4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16, 4, 0, 221 0, 8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24, 8, 0, 222 0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16, 0, 223 0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20, 0, 224 4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20, 4, 225 4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24, 4, 226 4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24, 4, 227 4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20, 4, 228 0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20, 0, 229 0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16, 0, 230 0, 8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24, 8, 0, 231 0, 4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16, 4, 0, 232 0, 4, 4, 8, 16, 20, 20, 24, 24, 20, 20, 16, 8, 4, 4, 0, 233 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 234//error:0.000033 235}; 236#elif 1 // 64*linear 237static const uint8_t obmc32[1024]={ 238 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 239 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0, 240 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0, 241 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0, 242 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4, 243 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4, 244 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4, 245 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4, 246 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4, 247 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4, 248 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4, 249 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4, 250 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8, 251 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8, 252 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8, 253 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8, 254 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8, 255 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8, 256 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8, 257 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8, 258 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4, 259 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4, 260 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4, 261 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4, 262 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4, 263 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4, 264 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4, 265 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4, 266 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0, 267 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0, 268 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0, 269 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 270 //error:0.000020 271}; 272static const uint8_t obmc16[256]={ 273 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0, 274 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4, 275 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4, 276 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8, 277 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8, 278 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12, 279 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12, 280 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16, 281 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16, 282 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12, 283 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12, 284 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8, 285 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8, 286 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4, 287 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4, 288 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0, 289//error:0.000015 290}; 291#else //64*cos 292static const uint8_t obmc32[1024]={ 293 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 294 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 4, 4, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 295 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0, 296 0, 0, 4, 4, 4, 8, 8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12, 8, 8, 4, 4, 4, 0, 0, 297 0, 0, 4, 4, 8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12, 8, 4, 4, 0, 0, 298 0, 0, 4, 8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12, 8, 4, 0, 0, 299 0, 4, 4, 8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16, 8, 4, 4, 0, 300 0, 4, 8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12, 8, 4, 0, 301 0, 4, 8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16, 8, 4, 0, 302 0, 4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12, 4, 0, 303 0, 4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12, 4, 0, 304 0, 4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12, 4, 0, 305 0, 4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12, 4, 0, 306 0, 4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12, 4, 0, 307 0, 4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16, 4, 0, 308 1, 4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16, 4, 1, 309 1, 4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16, 4, 1, 310 0, 4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16, 4, 0, 311 0, 4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12, 4, 0, 312 0, 4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12, 4, 0, 313 0, 4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12, 4, 0, 314 0, 4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12, 4, 0, 315 0, 4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12, 4, 0, 316 0, 4, 8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16, 8, 4, 0, 317 0, 4, 8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12, 8, 4, 0, 318 0, 4, 4, 8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16, 8, 4, 4, 0, 319 0, 0, 4, 8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12, 8, 4, 0, 0, 320 0, 0, 4, 4, 8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12, 8, 4, 4, 0, 0, 321 0, 0, 4, 4, 4, 8, 8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12, 8, 8, 4, 4, 4, 0, 0, 322 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0, 323 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 4, 4, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 324 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 325//error:0.000022 326}; 327static const uint8_t obmc16[256]={ 328 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 329 0, 0, 4, 8, 12, 16, 20, 20, 20, 20, 16, 12, 8, 4, 0, 0, 330 0, 4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12, 4, 0, 331 0, 8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24, 8, 0, 332 0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12, 0, 333 4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16, 4, 334 4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20, 4, 335 0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20, 0, 336 0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20, 0, 337 4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20, 4, 338 4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16, 4, 339 0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12, 0, 340 0, 8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24, 8, 0, 341 0, 4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12, 4, 0, 342 0, 0, 4, 8, 12, 16, 20, 20, 20, 20, 16, 12, 8, 4, 0, 0, 343 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 344//error:0.000022 345}; 346#endif /* 0 */ 347 348//linear *64 349static const uint8_t obmc8[64]={ 350 4, 12, 20, 28, 28, 20, 12, 4, 351 12, 36, 60, 84, 84, 60, 36, 12, 352 20, 60,100,140,140,100, 60, 20, 353 28, 84,140,196,196,140, 84, 28, 354 28, 84,140,196,196,140, 84, 28, 355 20, 60,100,140,140,100, 60, 20, 356 12, 36, 60, 84, 84, 60, 36, 12, 357 4, 12, 20, 28, 28, 20, 12, 4, 358//error:0.000000 359}; 360 361//linear *64 362static const uint8_t obmc4[16]={ 363 16, 48, 48, 16, 364 48,144,144, 48, 365 48,144,144, 48, 366 16, 48, 48, 16, 367//error:0.000000 368}; 369 370static const uint8_t * const obmc_tab[4]={ 371 obmc32, obmc16, obmc8, obmc4 372}; 373 374static int scale_mv_ref[MAX_REF_FRAMES][MAX_REF_FRAMES]; 375 376typedef struct BlockNode{ 377 int16_t mx; 378 int16_t my; 379 uint8_t ref; 380 uint8_t color[3]; 381 uint8_t type; 382//#define TYPE_SPLIT 1 383#define BLOCK_INTRA 1 384#define BLOCK_OPT 2 385//#define TYPE_NOCOLOR 4 386 uint8_t level; //FIXME merge into type? 387}BlockNode; 388 389static const BlockNode null_block= { //FIXME add border maybe 390 .color= {128,128,128}, 391 .mx= 0, 392 .my= 0, 393 .ref= 0, 394 .type= 0, 395 .level= 0, 396}; 397 398#define LOG2_MB_SIZE 4 399#define MB_SIZE (1<<LOG2_MB_SIZE) 400#define ENCODER_EXTRA_BITS 4 401#define HTAPS_MAX 8 402 403typedef struct x_and_coeff{ 404 int16_t x; 405 uint16_t coeff; 406} x_and_coeff; 407 408typedef struct SubBand{ 409 int level; 410 int stride; 411 int width; 412 int height; 413 int qlog; ///< log(qscale)/log[2^(1/6)] 414 DWTELEM *buf; 415 IDWTELEM *ibuf; 416 int buf_x_offset; 417 int buf_y_offset; 418 int stride_line; ///< Stride measured in lines, not pixels. 419 x_and_coeff * x_coeff; 420 struct SubBand *parent; 421 uint8_t state[/*7*2*/ 7 + 512][32]; 422}SubBand; 423 424typedef struct Plane{ 425 int width; 426 int height; 427 SubBand band[MAX_DECOMPOSITIONS][4]; 428 429 int htaps; 430 int8_t hcoeff[HTAPS_MAX/2]; 431 int diag_mc; 432 int fast_mc; 433 434 int last_htaps; 435 int8_t last_hcoeff[HTAPS_MAX/2]; 436 int last_diag_mc; 437}Plane; 438 439typedef struct SnowContext{ 440 441 AVCodecContext *avctx; 442 RangeCoder c; 443 DSPContext dsp; 444 DWTContext dwt; 445 AVFrame new_picture; 446 AVFrame input_picture; ///< new_picture with the internal linesizes 447 AVFrame current_picture; 448 AVFrame last_picture[MAX_REF_FRAMES]; 449 uint8_t *halfpel_plane[MAX_REF_FRAMES][4][4]; 450 AVFrame mconly_picture; 451// uint8_t q_context[16]; 452 uint8_t header_state[32]; 453 uint8_t block_state[128 + 32*128]; 454 int keyframe; 455 int always_reset; 456 int version; 457 int spatial_decomposition_type; 458 int last_spatial_decomposition_type; 459 int temporal_decomposition_type; 460 int spatial_decomposition_count; 461 int last_spatial_decomposition_count; 462 int temporal_decomposition_count; 463 int max_ref_frames; 464 int ref_frames; 465 int16_t (*ref_mvs[MAX_REF_FRAMES])[2]; 466 uint32_t *ref_scores[MAX_REF_FRAMES]; 467 DWTELEM *spatial_dwt_buffer; 468 IDWTELEM *spatial_idwt_buffer; 469 int colorspace_type; 470 int chroma_h_shift; 471 int chroma_v_shift; 472 int spatial_scalability; 473 int qlog; 474 int last_qlog; 475 int lambda; 476 int lambda2; 477 int pass1_rc; 478 int mv_scale; 479 int last_mv_scale; 480 int qbias; 481 int last_qbias; 482#define QBIAS_SHIFT 3 483 int b_width; 484 int b_height; 485 int block_max_depth; 486 int last_block_max_depth; 487 Plane plane[MAX_PLANES]; 488 BlockNode *block; 489#define ME_CACHE_SIZE 1024 490 int me_cache[ME_CACHE_SIZE]; 491 int me_cache_generation; 492 slice_buffer sb; 493 494 MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to eventually make the motion estimation independent of MpegEncContext, so this will be removed then (FIXME/XXX) 495 496 uint8_t *scratchbuf; 497}SnowContext; 498 499#ifdef __sgi 500// Avoid a name clash on SGI IRIX 501#undef qexp 502#endif 503#define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0 504static uint8_t qexp[QROOT]; 505 506static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){ 507 int i; 508 509 if(v){ 510 const int a= FFABS(v); 511 const int e= av_log2(a); 512#if 1 513 const int el= FFMIN(e, 10); 514 put_rac(c, state+0, 0); 515 516 for(i=0; i<el; i++){ 517 put_rac(c, state+1+i, 1); //1..10 518 } 519 for(; i<e; i++){ 520 put_rac(c, state+1+9, 1); //1..10 521 } 522 put_rac(c, state+1+FFMIN(i,9), 0); 523 524 for(i=e-1; i>=el; i--){ 525 put_rac(c, state+22+9, (a>>i)&1); //22..31 526 } 527 for(; i>=0; i--){ 528 put_rac(c, state+22+i, (a>>i)&1); //22..31 529 } 530 531 if(is_signed) 532 put_rac(c, state+11 + el, v < 0); //11..21 533#else 534 535 put_rac(c, state+0, 0); 536 if(e<=9){ 537 for(i=0; i<e; i++){ 538 put_rac(c, state+1+i, 1); //1..10 539 } 540 put_rac(c, state+1+i, 0); 541 542 for(i=e-1; i>=0; i--){ 543 put_rac(c, state+22+i, (a>>i)&1); //22..31 544 } 545 546 if(is_signed) 547 put_rac(c, state+11 + e, v < 0); //11..21 548 }else{ 549 for(i=0; i<e; i++){ 550 put_rac(c, state+1+FFMIN(i,9), 1); //1..10 551 } 552 put_rac(c, state+1+9, 0); 553 554 for(i=e-1; i>=0; i--){ 555 put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31 556 } 557 558 if(is_signed) 559 put_rac(c, state+11 + 10, v < 0); //11..21 560 } 561#endif /* 1 */ 562 }else{ 563 put_rac(c, state+0, 1); 564 } 565} 566 567static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){ 568 if(get_rac(c, state+0)) 569 return 0; 570 else{ 571 int i, e, a; 572 e= 0; 573 while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10 574 e++; 575 } 576 577 a= 1; 578 for(i=e-1; i>=0; i--){ 579 a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31 580 } 581 582 e= -(is_signed && get_rac(c, state+11 + FFMIN(e,10))); //11..21 583 return (a^e)-e; 584 } 585} 586 587static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){ 588 int i; 589 int r= log2>=0 ? 1<<log2 : 1; 590 591 assert(v>=0); 592 assert(log2>=-4); 593 594 while(v >= r){ 595 put_rac(c, state+4+log2, 1); 596 v -= r; 597 log2++; 598 if(log2>0) r+=r; 599 } 600 put_rac(c, state+4+log2, 0); 601 602 for(i=log2-1; i>=0; i--){ 603 put_rac(c, state+31-i, (v>>i)&1); 604 } 605} 606 607static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){ 608 int i; 609 int r= log2>=0 ? 1<<log2 : 1; 610 int v=0; 611 612 assert(log2>=-4); 613 614 while(get_rac(c, state+4+log2)){ 615 v+= r; 616 log2++; 617 if(log2>0) r+=r; 618 } 619 620 for(i=log2-1; i>=0; i--){ 621 v+= get_rac(c, state+31-i)<<i; 622 } 623 624 return v; 625} 626 627static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){ 628 const int w= b->width; 629 const int h= b->height; 630 int x,y; 631 632 int run, runs; 633 x_and_coeff *xc= b->x_coeff; 634 x_and_coeff *prev_xc= NULL; 635 x_and_coeff *prev2_xc= xc; 636 x_and_coeff *parent_xc= parent ? parent->x_coeff : NULL; 637 x_and_coeff *prev_parent_xc= parent_xc; 638 639 runs= get_symbol2(&s->c, b->state[30], 0); 640 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3); 641 else run= INT_MAX; 642 643 for(y=0; y<h; y++){ 644 int v=0; 645 int lt=0, t=0, rt=0; 646 647 if(y && prev_xc->x == 0){ 648 rt= prev_xc->coeff; 649 } 650 for(x=0; x<w; x++){ 651 int p=0; 652 const int l= v; 653 654 lt= t; t= rt; 655 656 if(y){ 657 if(prev_xc->x <= x) 658 prev_xc++; 659 if(prev_xc->x == x + 1) 660 rt= prev_xc->coeff; 661 else 662 rt=0; 663 } 664 if(parent_xc){ 665 if(x>>1 > parent_xc->x){ 666 parent_xc++; 667 } 668 if(x>>1 == parent_xc->x){ 669 p= parent_xc->coeff; 670 } 671 } 672 if(/*ll|*/l|lt|t|rt|p){ 673 int context= av_log2(/*FFABS(ll) + */3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1)); 674 675 v=get_rac(&s->c, &b->state[0][context]); 676 if(v){ 677 v= 2*(get_symbol2(&s->c, b->state[context + 2], context-4) + 1); 678 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l&0xFF] + 3*quant3bA[t&0xFF]]); 679 680 xc->x=x; 681 (xc++)->coeff= v; 682 } 683 }else{ 684 if(!run){ 685 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3); 686 else run= INT_MAX; 687 v= 2*(get_symbol2(&s->c, b->state[0 + 2], 0-4) + 1); 688 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3]); 689 690 xc->x=x; 691 (xc++)->coeff= v; 692 }else{ 693 int max_run; 694 run--; 695 v=0; 696 697 if(y) max_run= FFMIN(run, prev_xc->x - x - 2); 698 else max_run= FFMIN(run, w-x-1); 699 if(parent_xc) 700 max_run= FFMIN(max_run, 2*parent_xc->x - x - 1); 701 x+= max_run; 702 run-= max_run; 703 } 704 } 705 } 706 (xc++)->x= w+1; //end marker 707 prev_xc= prev2_xc; 708 prev2_xc= xc; 709 710 if(parent_xc){ 711 if(y&1){ 712 while(parent_xc->x != parent->width+1) 713 parent_xc++; 714 parent_xc++; 715 prev_parent_xc= parent_xc; 716 }else{ 717 parent_xc= prev_parent_xc; 718 } 719 } 720 } 721 722 (xc++)->x= w+1; //end marker 723} 724 725static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){ 726 const int w= b->width; 727 int y; 728 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16); 729 int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT); 730 int qadd= (s->qbias*qmul)>>QBIAS_SHIFT; 731 int new_index = 0; 732 733 if(b->ibuf == s->spatial_idwt_buffer || s->qlog == LOSSLESS_QLOG){ 734 qadd= 0; 735 qmul= 1<<QEXPSHIFT; 736 } 737 738 /* If we are on the second or later slice, restore our index. */ 739 if (start_y != 0) 740 new_index = save_state[0]; 741 742 743 for(y=start_y; y<h; y++){ 744 int x = 0; 745 int v; 746 IDWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset; 747 memset(line, 0, b->width*sizeof(IDWTELEM)); 748 v = b->x_coeff[new_index].coeff; 749 x = b->x_coeff[new_index++].x; 750 while(x < w){ 751 register int t= ( (v>>1)*qmul + qadd)>>QEXPSHIFT; 752 register int u= -(v&1); 753 line[x] = (t^u) - u; 754 755 v = b->x_coeff[new_index].coeff; 756 x = b->x_coeff[new_index++].x; 757 } 758 } 759 760 /* Save our variables for the next slice. */ 761 save_state[0] = new_index; 762 763 return; 764} 765 766static void reset_contexts(SnowContext *s){ //FIXME better initial contexts 767 int plane_index, level, orientation; 768 769 for(plane_index=0; plane_index<3; plane_index++){ 770 for(level=0; level<MAX_DECOMPOSITIONS; level++){ 771 for(orientation=level ? 1:0; orientation<4; orientation++){ 772 memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state)); 773 } 774 } 775 } 776 memset(s->header_state, MID_STATE, sizeof(s->header_state)); 777 memset(s->block_state, MID_STATE, sizeof(s->block_state)); 778} 779 780static int alloc_blocks(SnowContext *s){ 781 int w= -((-s->avctx->width )>>LOG2_MB_SIZE); 782 int h= -((-s->avctx->height)>>LOG2_MB_SIZE); 783 784 s->b_width = w; 785 s->b_height= h; 786 787 av_free(s->block); 788 s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2)); 789 return 0; 790} 791 792static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){ 793 uint8_t *bytestream= d->bytestream; 794 uint8_t *bytestream_start= d->bytestream_start; 795 *d= *s; 796 d->bytestream= bytestream; 797 d->bytestream_start= bytestream_start; 798} 799 800static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int ref, int type){ 801 const int w= s->b_width << s->block_max_depth; 802 const int rem_depth= s->block_max_depth - level; 803 const int index= (x + y*w) << rem_depth; 804 const int block_w= 1<<rem_depth; 805 BlockNode block; 806 int i,j; 807 808 block.color[0]= l; 809 block.color[1]= cb; 810 block.color[2]= cr; 811 block.mx= mx; 812 block.my= my; 813 block.ref= ref; 814 block.type= type; 815 block.level= level; 816 817 for(j=0; j<block_w; j++){ 818 for(i=0; i<block_w; i++){ 819 s->block[index + i + j*w]= block; 820 } 821 } 822} 823 824static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){ 825 const int offset[3]= { 826 y*c-> stride + x, 827 ((y*c->uvstride + x)>>1), 828 ((y*c->uvstride + x)>>1), 829 }; 830 int i; 831 for(i=0; i<3; i++){ 832 c->src[0][i]= src [i]; 833 c->ref[0][i]= ref [i] + offset[i]; 834 } 835 assert(!ref_index); 836} 837 838static inline void pred_mv(SnowContext *s, int *mx, int *my, int ref, 839 const BlockNode *left, const BlockNode *top, const BlockNode *tr){ 840 if(s->ref_frames == 1){ 841 *mx = mid_pred(left->mx, top->mx, tr->mx); 842 *my = mid_pred(left->my, top->my, tr->my); 843 }else{ 844 const int *scale = scale_mv_ref[ref]; 845 *mx = mid_pred((left->mx * scale[left->ref] + 128) >>8, 846 (top ->mx * scale[top ->ref] + 128) >>8, 847 (tr ->mx * scale[tr ->ref] + 128) >>8); 848 *my = mid_pred((left->my * scale[left->ref] + 128) >>8, 849 (top ->my * scale[top ->ref] + 128) >>8, 850 (tr ->my * scale[tr ->ref] + 128) >>8); 851 } 852} 853 854static av_always_inline int same_block(BlockNode *a, BlockNode *b){ 855 if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){ 856 return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2])); 857 }else{ 858 return !((a->mx - b->mx) | (a->my - b->my) | (a->ref - b->ref) | ((a->type ^ b->type)&BLOCK_INTRA)); 859 } 860} 861 862static void decode_q_branch(SnowContext *s, int level, int x, int y){ 863 const int w= s->b_width << s->block_max_depth; 864 const int rem_depth= s->block_max_depth - level; 865 const int index= (x + y*w) << rem_depth; 866 int trx= (x+1)<<rem_depth; 867 const BlockNode *left = x ? &s->block[index-1] : &null_block; 868 const BlockNode *top = y ? &s->block[index-w] : &null_block; 869 const BlockNode *tl = y && x ? &s->block[index-w-1] : left; 870 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt 871 int s_context= 2*left->level + 2*top->level + tl->level + tr->level; 872 873 if(s->keyframe){ 874 set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, null_block.ref, BLOCK_INTRA); 875 return; 876 } 877 878 if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){ 879 int type, mx, my; 880 int l = left->color[0]; 881 int cb= left->color[1]; 882 int cr= left->color[2]; 883 int ref = 0; 884 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref); 885 int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 0*av_log2(2*FFABS(tr->mx - top->mx)); 886 int my_context= av_log2(2*FFABS(left->my - top->my)) + 0*av_log2(2*FFABS(tr->my - top->my)); 887 888 type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0; 889 890 if(type){ 891 pred_mv(s, &mx, &my, 0, left, top, tr); 892 l += get_symbol(&s->c, &s->block_state[32], 1); 893 cb+= get_symbol(&s->c, &s->block_state[64], 1); 894 cr+= get_symbol(&s->c, &s->block_state[96], 1); 895 }else{ 896 if(s->ref_frames > 1) 897 ref= get_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], 0); 898 pred_mv(s, &mx, &my, ref, left, top, tr); 899 mx+= get_symbol(&s->c, &s->block_state[128 + 32*(mx_context + 16*!!ref)], 1); 900 my+= get_symbol(&s->c, &s->block_state[128 + 32*(my_context + 16*!!ref)], 1); 901 } 902 set_blocks(s, level, x, y, l, cb, cr, mx, my, ref, type); 903 }else{ 904 decode_q_branch(s, level+1, 2*x+0, 2*y+0); 905 decode_q_branch(s, level+1, 2*x+1, 2*y+0); 906 decode_q_branch(s, level+1, 2*x+0, 2*y+1); 907 decode_q_branch(s, level+1, 2*x+1, 2*y+1); 908 } 909} 910 911static void decode_blocks(SnowContext *s){ 912 int x, y; 913 int w= s->b_width; 914 int h= s->b_height; 915 916 for(y=0; y<h; y++){ 917 for(x=0; x<w; x++){ 918 decode_q_branch(s, 0, x, y); 919 } 920 } 921} 922 923static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){ 924 static const uint8_t weight[64]={ 925 8,7,6,5,4,3,2,1, 926 7,7,0,0,0,0,0,1, 927 6,0,6,0,0,0,2,0, 928 5,0,0,5,0,3,0,0, 929 4,0,0,0,4,0,0,0, 930 3,0,0,5,0,3,0,0, 931 2,0,6,0,0,0,2,0, 932 1,7,0,0,0,0,0,1, 933 }; 934 935 static const uint8_t brane[256]={ 936 0x00,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x11,0x12,0x12,0x12,0x12,0x12,0x12,0x12, 937 0x04,0x05,0xcc,0xcc,0xcc,0xcc,0xcc,0x41,0x15,0x16,0xcc,0xcc,0xcc,0xcc,0xcc,0x52, 938 0x04,0xcc,0x05,0xcc,0xcc,0xcc,0x41,0xcc,0x15,0xcc,0x16,0xcc,0xcc,0xcc,0x52,0xcc, 939 0x04,0xcc,0xcc,0x05,0xcc,0x41,0xcc,0xcc,0x15,0xcc,0xcc,0x16,0xcc,0x52,0xcc,0xcc, 940 0x04,0xcc,0xcc,0xcc,0x41,0xcc,0xcc,0xcc,0x15,0xcc,0xcc,0xcc,0x16,0xcc,0xcc,0xcc, 941 0x04,0xcc,0xcc,0x41,0xcc,0x05,0xcc,0xcc,0x15,0xcc,0xcc,0x52,0xcc,0x16,0xcc,0xcc, 942 0x04,0xcc,0x41,0xcc,0xcc,0xcc,0x05,0xcc,0x15,0xcc,0x52,0xcc,0xcc,0xcc,0x16,0xcc, 943 0x04,0x41,0xcc,0xcc,0xcc,0xcc,0xcc,0x05,0x15,0x52,0xcc,0xcc,0xcc,0xcc,0xcc,0x16, 944 0x44,0x45,0x45,0x45,0x45,0x45,0x45,0x45,0x55,0x56,0x56,0x56,0x56,0x56,0x56,0x56, 945 0x48,0x49,0xcc,0xcc,0xcc,0xcc,0xcc,0x85,0x59,0x5A,0xcc,0xcc,0xcc,0xcc,0xcc,0x96, 946 0x48,0xcc,0x49,0xcc,0xcc,0xcc,0x85,0xcc,0x59,0xcc,0x5A,0xcc,0xcc,0xcc,0x96,0xcc, 947 0x48,0xcc,0xcc,0x49,0xcc,0x85,0xcc,0xcc,0x59,0xcc,0xcc,0x5A,0xcc,0x96,0xcc,0xcc, 948 0x48,0xcc,0xcc,0xcc,0x49,0xcc,0xcc,0xcc,0x59,0xcc,0xcc,0xcc,0x96,0xcc,0xcc,0xcc, 949 0x48,0xcc,0xcc,0x85,0xcc,0x49,0xcc,0xcc,0x59,0xcc,0xcc,0x96,0xcc,0x5A,0xcc,0xcc, 950 0x48,0xcc,0x85,0xcc,0xcc,0xcc,0x49,0xcc,0x59,0xcc,0x96,0xcc,0xcc,0xcc,0x5A,0xcc, 951 0x48,0x85,0xcc,0xcc,0xcc,0xcc,0xcc,0x49,0x59,0x96,0xcc,0xcc,0xcc,0xcc,0xcc,0x5A, 952 }; 953 954 static const uint8_t needs[16]={ 955 0,1,0,0, 956 2,4,2,0, 957 0,1,0,0, 958 15 959 }; 960 961 int x, y, b, r, l; 962 int16_t tmpIt [64*(32+HTAPS_MAX)]; 963 uint8_t tmp2t[3][stride*(32+HTAPS_MAX)]; 964 int16_t *tmpI= tmpIt; 965 uint8_t *tmp2= tmp2t[0]; 966 const uint8_t *hpel[11]; 967 assert(dx<16 && dy<16); 968 r= brane[dx + 16*dy]&15; 969 l= brane[dx + 16*dy]>>4; 970 971 b= needs[l] | needs[r]; 972 if(p && !p->diag_mc) 973 b= 15; 974 975 if(b&5){ 976 for(y=0; y < b_h+HTAPS_MAX-1; y++){ 977 for(x=0; x < b_w; x++){ 978 int a_1=src[x + HTAPS_MAX/2-4]; 979 int a0= src[x + HTAPS_MAX/2-3]; 980 int a1= src[x + HTAPS_MAX/2-2]; 981 int a2= src[x + HTAPS_MAX/2-1]; 982 int a3= src[x + HTAPS_MAX/2+0]; 983 int a4= src[x + HTAPS_MAX/2+1]; 984 int a5= src[x + HTAPS_MAX/2+2]; 985 int a6= src[x + HTAPS_MAX/2+3]; 986 int am=0; 987 if(!p || p->fast_mc){ 988 am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5); 989 tmpI[x]= am; 990 am= (am+16)>>5; 991 }else{ 992 am= p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6); 993 tmpI[x]= am; 994 am= (am+32)>>6; 995 } 996 997 if(am&(~255)) am= ~(am>>31); 998 tmp2[x]= am; 999 } 1000 tmpI+= 64; 1001 tmp2+= stride; 1002 src += stride; 1003 } 1004 src -= stride*y; 1005 } 1006 src += HTAPS_MAX/2 - 1; 1007 tmp2= tmp2t[1]; 1008 1009 if(b&2){ 1010 for(y=0; y < b_h; y++){ 1011 for(x=0; x < b_w+1; x++){ 1012 int a_1=src[x + (HTAPS_MAX/2-4)*stride]; 1013 int a0= src[x + (HTAPS_MAX/2-3)*stride]; 1014 int a1= src[x + (HTAPS_MAX/2-2)*stride]; 1015 int a2= src[x + (HTAPS_MAX/2-1)*stride]; 1016 int a3= src[x + (HTAPS_MAX/2+0)*stride]; 1017 int a4= src[x + (HTAPS_MAX/2+1)*stride]; 1018 int a5= src[x + (HTAPS_MAX/2+2)*stride]; 1019 int a6= src[x + (HTAPS_MAX/2+3)*stride]; 1020 int am=0; 1021 if(!p || p->fast_mc) 1022 am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 16)>>5; 1023 else 1024 am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 32)>>6; 1025 1026 if(am&(~255)) am= ~(am>>31); 1027 tmp2[x]= am; 1028 } 1029 src += stride; 1030 tmp2+= stride; 1031 } 1032 src -= stride*y; 1033 } 1034 src += stride*(HTAPS_MAX/2 - 1); 1035 tmp2= tmp2t[2]; 1036 tmpI= tmpIt; 1037 if(b&4){ 1038 for(y=0; y < b_h; y++){ 1039 for(x=0; x < b_w; x++){ 1040 int a_1=tmpI[x + (HTAPS_MAX/2-4)*64]; 1041 int a0= tmpI[x + (HTAPS_MAX/2-3)*64]; 1042 int a1= tmpI[x + (HTAPS_MAX/2-2)*64]; 1043 int a2= tmpI[x + (HTAPS_MAX/2-1)*64]; 1044 int a3= tmpI[x + (HTAPS_MAX/2+0)*64]; 1045 int a4= tmpI[x + (HTAPS_MAX/2+1)*64]; 1046 int a5= tmpI[x + (HTAPS_MAX/2+2)*64]; 1047 int a6= tmpI[x + (HTAPS_MAX/2+3)*64]; 1048 int am=0; 1049 if(!p || p->fast_mc) 1050 am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 512)>>10; 1051 else 1052 am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 2048)>>12; 1053 if(am&(~255)) am= ~(am>>31); 1054 tmp2[x]= am; 1055 } 1056 tmpI+= 64; 1057 tmp2+= stride; 1058 } 1059 } 1060 1061 hpel[ 0]= src; 1062 hpel[ 1]= tmp2t[0] + stride*(HTAPS_MAX/2-1); 1063 hpel[ 2]= src + 1; 1064 1065 hpel[ 4]= tmp2t[1]; 1066 hpel[ 5]= tmp2t[2]; 1067 hpel[ 6]= tmp2t[1] + 1; 1068 1069 hpel[ 8]= src + stride; 1070 hpel[ 9]= hpel[1] + stride; 1071 hpel[10]= hpel[8] + 1; 1072 1073 if(b==15){ 1074 const uint8_t *src1= hpel[dx/8 + dy/8*4 ]; 1075 const uint8_t *src2= hpel[dx/8 + dy/8*4+1]; 1076 const uint8_t *src3= hpel[dx/8 + dy/8*4+4]; 1077 const uint8_t *src4= hpel[dx/8 + dy/8*4+5]; 1078 dx&=7; 1079 dy&=7; 1080 for(y=0; y < b_h; y++){ 1081 for(x=0; x < b_w; x++){ 1082 dst[x]= ((8-dx)*(8-dy)*src1[x] + dx*(8-dy)*src2[x]+ 1083 (8-dx)* dy *src3[x] + dx* dy *src4[x]+32)>>6; 1084 } 1085 src1+=stride; 1086 src2+=stride; 1087 src3+=stride; 1088 src4+=stride; 1089 dst +=stride; 1090 } 1091 }else{ 1092 const uint8_t *src1= hpel[l]; 1093 const uint8_t *src2= hpel[r]; 1094 int a= weight[((dx&7) + (8*(dy&7)))]; 1095 int b= 8-a; 1096 for(y=0; y < b_h; y++){ 1097 for(x=0; x < b_w; x++){ 1098 dst[x]= (a*src1[x] + b*src2[x] + 4)>>3; 1099 } 1100 src1+=stride; 1101 src2+=stride; 1102 dst +=stride; 1103 } 1104 } 1105} 1106 1107#define mca(dx,dy,b_w)\ 1108static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, int stride, int h){\ 1109 uint8_t tmp[stride*(b_w+HTAPS_MAX-1)];\ 1110 assert(h==b_w);\ 1111 mc_block(NULL, dst, src-(HTAPS_MAX/2-1)-(HTAPS_MAX/2-1)*stride, tmp, stride, b_w, b_w, dx, dy);\ 1112} 1113 1114mca( 0, 0,16) 1115mca( 8, 0,16) 1116mca( 0, 8,16) 1117mca( 8, 8,16) 1118mca( 0, 0,8) 1119mca( 8, 0,8) 1120mca( 0, 8,8) 1121mca( 8, 8,8) 1122 1123static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){ 1124 if(block->type & BLOCK_INTRA){ 1125 int x, y; 1126 const int color = block->color[plane_index]; 1127 const int color4= color*0x01010101; 1128 if(b_w==32){ 1129 for(y=0; y < b_h; y++){ 1130 *(uint32_t*)&dst[0 + y*stride]= color4; 1131 *(uint32_t*)&dst[4 + y*stride]= color4; 1132 *(uint32_t*)&dst[8 + y*stride]= color4; 1133 *(uint32_t*)&dst[12+ y*stride]= color4; 1134 *(uint32_t*)&dst[16+ y*stride]= color4; 1135 *(uint32_t*)&dst[20+ y*stride]= color4; 1136 *(uint32_t*)&dst[24+ y*stride]= color4; 1137 *(uint32_t*)&dst[28+ y*stride]= color4; 1138 } 1139 }else if(b_w==16){ 1140 for(y=0; y < b_h; y++){ 1141 *(uint32_t*)&dst[0 + y*stride]= color4; 1142 *(uint32_t*)&dst[4 + y*stride]= color4; 1143 *(uint32_t*)&dst[8 + y*stride]= color4; 1144 *(uint32_t*)&dst[12+ y*stride]= color4; 1145 } 1146 }else if(b_w==8){ 1147 for(y=0; y < b_h; y++){ 1148 *(uint32_t*)&dst[0 + y*stride]= color4; 1149 *(uint32_t*)&dst[4 + y*stride]= color4; 1150 } 1151 }else if(b_w==4){ 1152 for(y=0; y < b_h; y++){ 1153 *(uint32_t*)&dst[0 + y*stride]= color4; 1154 } 1155 }else{ 1156 for(y=0; y < b_h; y++){ 1157 for(x=0; x < b_w; x++){ 1158 dst[x + y*stride]= color; 1159 } 1160 } 1161 } 1162 }else{ 1163 uint8_t *src= s->last_picture[block->ref].data[plane_index]; 1164 const int scale= plane_index ? s->mv_scale : 2*s->mv_scale; 1165 int mx= block->mx*scale; 1166 int my= block->my*scale; 1167 const int dx= mx&15; 1168 const int dy= my&15; 1169 const int tab_index= 3 - (b_w>>2) + (b_w>>4); 1170 sx += (mx>>4) - (HTAPS_MAX/2-1); 1171 sy += (my>>4) - (HTAPS_MAX/2-1); 1172 src += sx + sy*stride; 1173 if( (unsigned)sx >= w - b_w - (HTAPS_MAX-2) 1174 || (unsigned)sy >= h - b_h - (HTAPS_MAX-2)){ 1175 ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+HTAPS_MAX-1, b_h+HTAPS_MAX-1, sx, sy, w, h); 1176 src= tmp + MB_SIZE; 1177 } 1178// assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h); 1179// assert(!(b_w&(b_w-1))); 1180 assert(b_w>1 && b_h>1); 1181 assert((tab_index>=0 && tab_index<4) || b_w==32); 1182 if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1)) || !s->plane[plane_index].fast_mc ) 1183 mc_block(&s->plane[plane_index], dst, src, tmp, stride, b_w, b_h, dx, dy); 1184 else if(b_w==32){ 1185 int y; 1186 for(y=0; y<b_h; y+=16){ 1187 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 3 + (y+3)*stride,stride); 1188 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 19 + (y+3)*stride,stride); 1189 } 1190 }else if(b_w==b_h) 1191 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst,src + 3 + 3*stride,stride); 1192 else if(b_w==2*b_h){ 1193 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst ,src + 3 + 3*stride,stride); 1194 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 3 + b_h + 3*stride,stride); 1195 }else{ 1196 assert(2*b_w==b_h); 1197 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst ,src + 3 + 3*stride ,stride); 1198 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst+b_w*stride,src + 3 + 3*stride+b_w*stride,stride); 1199 } 1200 } 1201} 1202 1203void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, 1204 int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){ 1205 int y, x; 1206 IDWTELEM * dst; 1207 for(y=0; y<b_h; y++){ 1208 //FIXME ugly misuse of obmc_stride 1209 const uint8_t *obmc1= obmc + y*obmc_stride; 1210 const uint8_t *obmc2= obmc1+ (obmc_stride>>1); 1211 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1); 1212 const uint8_t *obmc4= obmc3+ (obmc_stride>>1); 1213 dst = slice_buffer_get_line(sb, src_y + y); 1214 for(x=0; x<b_w; x++){ 1215 int v= obmc1[x] * block[3][x + y*src_stride] 1216 +obmc2[x] * block[2][x + y*src_stride] 1217 +obmc3[x] * block[1][x + y*src_stride] 1218 +obmc4[x] * block[0][x + y*src_stride]; 1219 1220 v <<= 8 - LOG2_OBMC_MAX; 1221 if(FRAC_BITS != 8){ 1222 v >>= 8 - FRAC_BITS; 1223 } 1224 if(add){ 1225 v += dst[x + src_x]; 1226 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS; 1227 if(v&(~255)) v= ~(v>>31); 1228 dst8[x + y*src_stride] = v; 1229 }else{ 1230 dst[x + src_x] -= v; 1231 } 1232 } 1233 } 1234} 1235 1236//FIXME name cleanup (b_w, block_w, b_width stuff) 1237static av_always_inline void add_yblock(SnowContext *s, int sliced, slice_buffer *sb, IDWTELEM *dst, uint8_t *dst8, const uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){ 1238 const int b_width = s->b_width << s->block_max_depth; 1239 const int b_height= s->b_height << s->block_max_depth; 1240 const int b_stride= b_width; 1241 BlockNode *lt= &s->block[b_x + b_y*b_stride]; 1242 BlockNode *rt= lt+1; 1243 BlockNode *lb= lt+b_stride; 1244 BlockNode *rb= lb+1; 1245 uint8_t *block[4]; 1246 int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride; 1247 uint8_t *tmp = s->scratchbuf; 1248 uint8_t *ptmp; 1249 int x,y; 1250 1251 if(b_x<0){ 1252 lt= rt; 1253 lb= rb; 1254 }else if(b_x + 1 >= b_width){ 1255 rt= lt; 1256 rb= lb; 1257 } 1258 if(b_y<0){ 1259 lt= lb; 1260 rt= rb; 1261 }else if(b_y + 1 >= b_height){ 1262 lb= lt; 1263 rb= rt; 1264 } 1265 1266 if(src_x<0){ //FIXME merge with prev & always round internal width up to *16 1267 obmc -= src_x; 1268 b_w += src_x; 1269 if(!sliced && !offset_dst) 1270 dst -= src_x; 1271 src_x=0; 1272 }else if(src_x + b_w > w){ 1273 b_w = w - src_x; 1274 } 1275 if(src_y<0){ 1276 obmc -= src_y*obmc_stride; 1277 b_h += src_y; 1278 if(!sliced && !offset_dst) 1279 dst -= src_y*dst_stride; 1280 src_y=0; 1281 }else if(src_y + b_h> h){ 1282 b_h = h - src_y; 1283 } 1284 1285 if(b_w<=0 || b_h<=0) return; 1286 1287 assert(src_stride > 2*MB_SIZE + 5); 1288 1289 if(!sliced && offset_dst) 1290 dst += src_x + src_y*dst_stride; 1291 dst8+= src_x + src_y*src_stride; 1292// src += src_x + src_y*src_stride; 1293 1294 ptmp= tmp + 3*tmp_step; 1295 block[0]= ptmp; 1296 ptmp+=tmp_step; 1297 pred_block(s, block[0], tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h); 1298 1299 if(same_block(lt, rt)){ 1300 block[1]= block[0]; 1301 }else{ 1302 block[1]= ptmp; 1303 ptmp+=tmp_step; 1304 pred_block(s, block[1], tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h); 1305 } 1306 1307 if(same_block(lt, lb)){ 1308 block[2]= block[0]; 1309 }else if(same_block(rt, lb)){ 1310 block[2]= block[1]; 1311 }else{ 1312 block[2]= ptmp; 1313 ptmp+=tmp_step; 1314 pred_block(s, block[2], tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h); 1315 } 1316 1317 if(same_block(lt, rb) ){ 1318 block[3]= block[0]; 1319 }else if(same_block(rt, rb)){ 1320 block[3]= block[1]; 1321 }else if(same_block(lb, rb)){ 1322 block[3]= block[2]; 1323 }else{ 1324 block[3]= ptmp; 1325 pred_block(s, block[3], tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h); 1326 } 1327#if 0 1328 for(y=0; y<b_h; y++){ 1329 for(x=0; x<b_w; x++){ 1330 int v= obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX); 1331 if(add) dst[x + y*dst_stride] += v; 1332 else dst[x + y*dst_stride] -= v; 1333 } 1334 } 1335 for(y=0; y<b_h; y++){ 1336 uint8_t *obmc2= obmc + (obmc_stride>>1); 1337 for(x=0; x<b_w; x++){ 1338 int v= obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX); 1339 if(add) dst[x + y*dst_stride] += v; 1340 else dst[x + y*dst_stride] -= v; 1341 } 1342 } 1343 for(y=0; y<b_h; y++){ 1344 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1); 1345 for(x=0; x<b_w; x++){ 1346 int v= obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX); 1347 if(add) dst[x + y*dst_stride] += v; 1348 else dst[x + y*dst_stride] -= v; 1349 } 1350 } 1351 for(y=0; y<b_h; y++){ 1352 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1); 1353 uint8_t *obmc4= obmc3+ (obmc_stride>>1); 1354 for(x=0; x<b_w; x++){ 1355 int v= obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX); 1356 if(add) dst[x + y*dst_stride] += v; 1357 else dst[x + y*dst_stride] -= v; 1358 } 1359 } 1360#else 1361 if(sliced){ 1362 s->dwt.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8); 1363 }else{ 1364 for(y=0; y<b_h; y++){ 1365 //FIXME ugly misuse of obmc_stride 1366 const uint8_t *obmc1= obmc + y*obmc_stride; 1367 const uint8_t *obmc2= obmc1+ (obmc_stride>>1); 1368 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1); 1369 const uint8_t *obmc4= obmc3+ (obmc_stride>>1); 1370 for(x=0; x<b_w; x++){ 1371 int v= obmc1[x] * block[3][x + y*src_stride] 1372 +obmc2[x] * block[2][x + y*src_stride] 1373 +obmc3[x] * block[1][x + y*src_stride] 1374 +obmc4[x] * block[0][x + y*src_stride]; 1375 1376 v <<= 8 - LOG2_OBMC_MAX; 1377 if(FRAC_BITS != 8){ 1378 v >>= 8 - FRAC_BITS; 1379 } 1380 if(add){ 1381 v += dst[x + y*dst_stride]; 1382 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS; 1383 if(v&(~255)) v= ~(v>>31); 1384 dst8[x + y*src_stride] = v; 1385 }else{ 1386 dst[x + y*dst_stride] -= v; 1387 } 1388 } 1389 } 1390 } 1391#endif /* 0 */ 1392} 1393 1394static av_always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, IDWTELEM * old_buffer, int plane_index, int add, int mb_y){ 1395 Plane *p= &s->plane[plane_index]; 1396 const int mb_w= s->b_width << s->block_max_depth; 1397 const int mb_h= s->b_height << s->block_max_depth; 1398 int x, y, mb_x; 1399 int block_size = MB_SIZE >> s->block_max_depth; 1400 int block_w = plane_index ? block_size/2 : block_size; 1401 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth]; 1402 int obmc_stride= plane_index ? block_size : 2*block_size; 1403 int ref_stride= s->current_picture.linesize[plane_index]; 1404 uint8_t *dst8= s->current_picture.data[plane_index]; 1405 int w= p->width; 1406 int h= p->height; 1407 1408 if(s->keyframe || (s->avctx->debug&512)){ 1409 if(mb_y==mb_h) 1410 return; 1411 1412 if(add){ 1413 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){ 1414// DWTELEM * line = slice_buffer_get_line(sb, y); 1415 IDWTELEM * line = sb->line[y]; 1416 for(x=0; x<w; x++){ 1417// int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1)); 1418 int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1)); 1419 v >>= FRAC_BITS; 1420 if(v&(~255)) v= ~(v>>31); 1421 dst8[x + y*ref_stride]= v; 1422 } 1423 } 1424 }else{ 1425 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){ 1426// DWTELEM * line = slice_buffer_get_line(sb, y); 1427 IDWTELEM * line = sb->line[y]; 1428 for(x=0; x<w; x++){ 1429 line[x] -= 128 << FRAC_BITS; 1430// buf[x + y*w]-= 128<<FRAC_BITS; 1431 } 1432 } 1433 } 1434 1435 return; 1436 } 1437 1438 for(mb_x=0; mb_x<=mb_w; mb_x++){ 1439 add_yblock(s, 1, sb, old_buffer, dst8, obmc, 1440 block_w*mb_x - block_w/2, 1441 block_w*mb_y - block_w/2, 1442 block_w, block_w, 1443 w, h, 1444 w, ref_stride, obmc_stride, 1445 mb_x - 1, mb_y - 1, 1446 add, 0, plane_index); 1447 } 1448} 1449 1450static av_always_inline void predict_slice(SnowContext *s, IDWTELEM *buf, int plane_index, int add, int mb_y){ 1451 Plane *p= &s->plane[plane_index]; 1452 const int mb_w= s->b_width << s->block_max_depth; 1453 const int mb_h= s->b_height << s->block_max_depth; 1454 int x, y, mb_x; 1455 int block_size = MB_SIZE >> s->block_max_depth; 1456 int block_w = plane_index ? block_size/2 : block_size; 1457 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth]; 1458 const int obmc_stride= plane_index ? block_size : 2*block_size; 1459 int ref_stride= s->current_picture.linesize[plane_index]; 1460 uint8_t *dst8= s->current_picture.data[plane_index]; 1461 int w= p->width; 1462 int h= p->height; 1463 1464 if(s->keyframe || (s->avctx->debug&512)){ 1465 if(mb_y==mb_h) 1466 return; 1467 1468 if(add){ 1469 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){ 1470 for(x=0; x<w; x++){ 1471 int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1)); 1472 v >>= FRAC_BITS; 1473 if(v&(~255)) v= ~(v>>31); 1474 dst8[x + y*ref_stride]= v; 1475 } 1476 } 1477 }else{ 1478 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){ 1479 for(x=0; x<w; x++){ 1480 buf[x + y*w]-= 128<<FRAC_BITS; 1481 } 1482 } 1483 } 1484 1485 return; 1486 } 1487 1488 for(mb_x=0; mb_x<=mb_w; mb_x++){ 1489 add_yblock(s, 0, NULL, buf, dst8, obmc, 1490 block_w*mb_x - block_w/2, 1491 block_w*mb_y - block_w/2, 1492 block_w, block_w, 1493 w, h, 1494 w, ref_stride, obmc_stride, 1495 mb_x - 1, mb_y - 1, 1496 add, 1, plane_index); 1497 } 1498} 1499 1500static av_always_inline void predict_plane(SnowContext *s, IDWTELEM *buf, int plane_index, int add){ 1501 const int mb_h= s->b_height << s->block_max_depth; 1502 int mb_y; 1503 for(mb_y=0; mb_y<=mb_h; mb_y++) 1504 predict_slice(s, buf, plane_index, add, mb_y); 1505} 1506 1507static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int start_y, int end_y){ 1508 const int w= b->width; 1509 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16); 1510 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT); 1511 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT; 1512 int x,y; 1513 1514 if(s->qlog == LOSSLESS_QLOG) return; 1515 1516 for(y=start_y; y<end_y; y++){ 1517// DWTELEM * line = slice_buffer_get_line_from_address(sb, src + (y * stride)); 1518 IDWTELEM * line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset; 1519 for(x=0; x<w; x++){ 1520 int i= line[x]; 1521 if(i<0){ 1522 line[x]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias 1523 }else if(i>0){ 1524 line[x]= (( i*qmul + qadd)>>(QEXPSHIFT)); 1525 } 1526 } 1527 } 1528} 1529 1530static void correlate_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median, int start_y, int end_y){ 1531 const int w= b->width; 1532 int x,y; 1533 1534 IDWTELEM * line=0; // silence silly "could be used without having been initialized" warning 1535 IDWTELEM * prev; 1536 1537 if (start_y != 0) 1538 line = slice_buffer_get_line(sb, ((start_y - 1) * b->stride_line) + b->buf_y_offset) + b->buf_x_offset; 1539 1540 for(y=start_y; y<end_y; y++){ 1541 prev = line; 1542// line = slice_buffer_get_line_from_address(sb, src + (y * stride)); 1543 line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset; 1544 for(x=0; x<w; x++){ 1545 if(x){ 1546 if(use_median){ 1547 if(y && x+1<w) line[x] += mid_pred(line[x - 1], prev[x], prev[x + 1]); 1548 else line[x] += line[x - 1]; 1549 }else{ 1550 if(y) line[x] += mid_pred(line[x - 1], prev[x], line[x - 1] + prev[x] - prev[x - 1]); 1551 else line[x] += line[x - 1]; 1552 } 1553 }else{ 1554 if(y) line[x] += prev[x]; 1555 } 1556 } 1557 } 1558} 1559 1560static void decode_qlogs(SnowContext *s){ 1561 int plane_index, level, orientation; 1562 1563 for(plane_index=0; plane_index<3; plane_index++){ 1564 for(level=0; level<s->spatial_decomposition_count; level++){ 1565 for(orientation=level ? 1:0; orientation<4; orientation++){ 1566 int q; 1567 if (plane_index==2) q= s->plane[1].band[level][orientation].qlog; 1568 else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog; 1569 else q= get_symbol(&s->c, s->header_state, 1); 1570 s->plane[plane_index].band[level][orientation].qlog= q; 1571 } 1572 } 1573 } 1574} 1575 1576#define GET_S(dst, check) \ 1577 tmp= get_symbol(&s->c, s->header_state, 0);\ 1578 if(!(check)){\ 1579 av_log(s->avctx, AV_LOG_ERROR, "Error " #dst " is %d\n", tmp);\ 1580 return -1;\ 1581 }\ 1582 dst= tmp; 1583 1584static int decode_header(SnowContext *s){ 1585 int plane_index, tmp; 1586 uint8_t kstate[32]; 1587 1588 memset(kstate, MID_STATE, sizeof(kstate)); 1589 1590 s->keyframe= get_rac(&s->c, kstate); 1591 if(s->keyframe || s->always_reset){ 1592 reset_contexts(s); 1593 s->spatial_decomposition_type= 1594 s->qlog= 1595 s->qbias= 1596 s->mv_scale= 1597 s->block_max_depth= 0; 1598 } 1599 if(s->keyframe){ 1600 GET_S(s->version, tmp <= 0U) 1601 s->always_reset= get_rac(&s->c, s->header_state); 1602 s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0); 1603 s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0); 1604 GET_S(s->spatial_decomposition_count, 0 < tmp && tmp <= MAX_DECOMPOSITIONS) 1605 s->colorspace_type= get_symbol(&s->c, s->header_state, 0); 1606 s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0); 1607 s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0); 1608 s->spatial_scalability= get_rac(&s->c, s->header_state); 1609// s->rate_scalability= get_rac(&s->c, s->header_state); 1610 GET_S(s->max_ref_frames, tmp < (unsigned)MAX_REF_FRAMES) 1611 s->max_ref_frames++; 1612 1613 decode_qlogs(s); 1614 } 1615 1616 if(!s->keyframe){ 1617 if(get_rac(&s->c, s->header_state)){ 1618 for(plane_index=0; plane_index<2; plane_index++){ 1619 int htaps, i, sum=0; 1620 Plane *p= &s->plane[plane_index]; 1621 p->diag_mc= get_rac(&s->c, s->header_state); 1622 htaps= get_symbol(&s->c, s->header_state, 0)*2 + 2; 1623 if((unsigned)htaps > HTAPS_MAX || htaps==0) 1624 return -1; 1625 p->htaps= htaps; 1626 for(i= htaps/2; i; i--){ 1627 p->hcoeff[i]= get_symbol(&s->c, s->header_state, 0) * (1-2*(i&1)); 1628 sum += p->hcoeff[i]; 1629 } 1630 p->hcoeff[0]= 32-sum; 1631 } 1632 s->plane[2].diag_mc= s->plane[1].diag_mc; 1633 s->plane[2].htaps = s->plane[1].htaps; 1634 memcpy(s->plane[2].hcoeff, s->plane[1].hcoeff, sizeof(s->plane[1].hcoeff)); 1635 } 1636 if(get_rac(&s->c, s->header_state)){ 1637 GET_S(s->spatial_decomposition_count, 0 < tmp && tmp <= MAX_DECOMPOSITIONS) 1638 decode_qlogs(s); 1639 } 1640 } 1641 1642 s->spatial_decomposition_type+= get_symbol(&s->c, s->header_state, 1); 1643 if(s->spatial_decomposition_type > 1U){ 1644 av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type); 1645 return -1; 1646 } 1647 if(FFMIN(s->avctx-> width>>s->chroma_h_shift, 1648 s->avctx->height>>s->chroma_v_shift) >> (s->spatial_decomposition_count-1) <= 0){ 1649 av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_count %d too large for size", s->spatial_decomposition_count); 1650 return -1; 1651 } 1652 1653 s->qlog += get_symbol(&s->c, s->header_state, 1); 1654 s->mv_scale += get_symbol(&s->c, s->header_state, 1); 1655 s->qbias += get_symbol(&s->c, s->header_state, 1); 1656 s->block_max_depth+= get_symbol(&s->c, s->header_state, 1); 1657 if(s->block_max_depth > 1 || s->block_max_depth < 0){ 1658 av_log(s->avctx, AV_LOG_ERROR, "block_max_depth= %d is too large", s->block_max_depth); 1659 s->block_max_depth= 0; 1660 return -1; 1661 } 1662 1663 return 0; 1664} 1665 1666static void init_qexp(void){ 1667 int i; 1668 double v=128; 1669 1670 for(i=0; i<QROOT; i++){ 1671 qexp[i]= lrintf(v); 1672 v *= pow(2, 1.0 / QROOT); 1673 } 1674} 1675 1676static av_cold int common_init(AVCodecContext *avctx){ 1677 SnowContext *s = avctx->priv_data; 1678 int width, height; 1679 int i, j; 1680 1681 s->avctx= avctx; 1682 s->max_ref_frames=1; //just make sure its not an invalid value in case of no initial keyframe 1683 1684 dsputil_init(&s->dsp, avctx); 1685 ff_dwt_init(&s->dwt); 1686 1687#define mcf(dx,dy)\ 1688 s->dsp.put_qpel_pixels_tab [0][dy+dx/4]=\ 1689 s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\ 1690 s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];\ 1691 s->dsp.put_qpel_pixels_tab [1][dy+dx/4]=\ 1692 s->dsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\ 1693 s->dsp.put_h264_qpel_pixels_tab[1][dy+dx/4]; 1694 1695 mcf( 0, 0) 1696 mcf( 4, 0) 1697 mcf( 8, 0) 1698 mcf(12, 0) 1699 mcf( 0, 4) 1700 mcf( 4, 4) 1701 mcf( 8, 4) 1702 mcf(12, 4) 1703 mcf( 0, 8) 1704 mcf( 4, 8) 1705 mcf( 8, 8) 1706 mcf(12, 8) 1707 mcf( 0,12) 1708 mcf( 4,12) 1709 mcf( 8,12) 1710 mcf(12,12) 1711 1712#define mcfh(dx,dy)\ 1713 s->dsp.put_pixels_tab [0][dy/4+dx/8]=\ 1714 s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\ 1715 mc_block_hpel ## dx ## dy ## 16;\ 1716 s->dsp.put_pixels_tab [1][dy/4+dx/8]=\ 1717 s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\ 1718 mc_block_hpel ## dx ## dy ## 8; 1719 1720 mcfh(0, 0) 1721 mcfh(8, 0) 1722 mcfh(0, 8) 1723 mcfh(8, 8) 1724 1725 if(!qexp[0]) 1726 init_qexp(); 1727 1728// dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift); 1729 1730 width= s->avctx->width; 1731 height= s->avctx->height; 1732 1733 s->spatial_idwt_buffer= av_mallocz(width*height*sizeof(IDWTELEM)); 1734 s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM)); //FIXME this does not belong here 1735 1736 for(i=0; i<MAX_REF_FRAMES; i++) 1737 for(j=0; j<MAX_REF_FRAMES; j++) 1738 scale_mv_ref[i][j] = 256*(i+1)/(j+1); 1739 1740 s->avctx->get_buffer(s->avctx, &s->mconly_picture); 1741 s->scratchbuf = av_malloc(s->mconly_picture.linesize[0]*7*MB_SIZE); 1742 1743 return 0; 1744} 1745 1746static int common_init_after_header(AVCodecContext *avctx){ 1747 SnowContext *s = avctx->priv_data; 1748 int plane_index, level, orientation; 1749 1750 for(plane_index=0; plane_index<3; plane_index++){ 1751 int w= s->avctx->width; 1752 int h= s->avctx->height; 1753 1754 if(plane_index){ 1755 w>>= s->chroma_h_shift; 1756 h>>= s->chroma_v_shift; 1757 } 1758 s->plane[plane_index].width = w; 1759 s->plane[plane_index].height= h; 1760 1761 for(level=s->spatial_decomposition_count-1; level>=0; level--){ 1762 for(orientation=level ? 1 : 0; orientation<4; orientation++){ 1763 SubBand *b= &s->plane[plane_index].band[level][orientation]; 1764 1765 b->buf= s->spatial_dwt_buffer; 1766 b->level= level; 1767 b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level); 1768 b->width = (w + !(orientation&1))>>1; 1769 b->height= (h + !(orientation>1))>>1; 1770 1771 b->stride_line = 1 << (s->spatial_decomposition_count - level); 1772 b->buf_x_offset = 0; 1773 b->buf_y_offset = 0; 1774 1775 if(orientation&1){ 1776 b->buf += (w+1)>>1; 1777 b->buf_x_offset = (w+1)>>1; 1778 } 1779 if(orientation>1){ 1780 b->buf += b->stride>>1; 1781 b->buf_y_offset = b->stride_line >> 1; 1782 } 1783 b->ibuf= s->spatial_idwt_buffer + (b->buf - s->spatial_dwt_buffer); 1784 1785 if(level) 1786 b->parent= &s->plane[plane_index].band[level-1][orientation]; 1787 //FIXME avoid this realloc 1788 av_freep(&b->x_coeff); 1789 b->x_coeff=av_mallocz(((b->width+1) * b->height+1)*sizeof(x_and_coeff)); 1790 } 1791 w= (w+1)>>1; 1792 h= (h+1)>>1; 1793 } 1794 } 1795 1796 return 0; 1797} 1798 1799#define QUANTIZE2 0 1800 1801#if QUANTIZE2==1 1802#define Q2_STEP 8 1803 1804static void find_sse(SnowContext *s, Plane *p, int *score, int score_stride, IDWTELEM *r0, IDWTELEM *r1, int level, int orientation){ 1805 SubBand *b= &p->band[level][orientation]; 1806 int x, y; 1807 int xo=0; 1808 int yo=0; 1809 int step= 1 << (s->spatial_decomposition_count - level); 1810 1811 if(orientation&1) 1812 xo= step>>1; 1813 if(orientation&2) 1814 yo= step>>1; 1815 1816 //FIXME bias for nonzero ? 1817 //FIXME optimize 1818 memset(score, 0, sizeof(*score)*score_stride*((p->height + Q2_STEP-1)/Q2_STEP)); 1819 for(y=0; y<p->height; y++){ 1820 for(x=0; x<p->width; x++){ 1821 int sx= (x-xo + step/2) / step / Q2_STEP; 1822 int sy= (y-yo + step/2) / step / Q2_STEP; 1823 int v= r0[x + y*p->width] - r1[x + y*p->width]; 1824 assert(sx>=0 && sy>=0 && sx < score_stride); 1825 v= ((v+8)>>4)<<4; 1826 score[sx + sy*score_stride] += v*v; 1827 assert(score[sx + sy*score_stride] >= 0); 1828 } 1829 } 1830} 1831 1832static void dequantize_all(SnowContext *s, Plane *p, IDWTELEM *buffer, int width, int height){ 1833 int level, orientation; 1834 1835 for(level=0; level<s->spatial_decomposition_count; level++){ 1836 for(orientation=level ? 1 : 0; orientation<4; orientation++){ 1837 SubBand *b= &p->band[level][orientation]; 1838 IDWTELEM *dst= buffer + (b->ibuf - s->spatial_idwt_buffer); 1839 1840 dequantize(s, b, dst, b->stride); 1841 } 1842 } 1843} 1844 1845static void dwt_quantize(SnowContext *s, Plane *p, DWTELEM *buffer, int width, int height, int stride, int type){ 1846 int level, orientation, ys, xs, x, y, pass; 1847 IDWTELEM best_dequant[height * stride]; 1848 IDWTELEM idwt2_buffer[height * stride]; 1849 const int score_stride= (width + 10)/Q2_STEP; 1850 int best_score[(width + 10)/Q2_STEP * (height + 10)/Q2_STEP]; //FIXME size 1851 int score[(width + 10)/Q2_STEP * (height + 10)/Q2_STEP]; //FIXME size 1852 int threshold= (s->m.lambda * s->m.lambda) >> 6; 1853 1854 //FIXME pass the copy cleanly ? 1855 1856// memcpy(dwt_buffer, buffer, height * stride * sizeof(DWTELEM)); 1857 ff_spatial_dwt(buffer, width, height, stride, type, s->spatial_decomposition_count); 1858 1859 for(level=0; level<s->spatial_decomposition_count; level++){ 1860 for(orientation=level ? 1 : 0; orientation<4; orientation++){ 1861 SubBand *b= &p->band[level][orientation]; 1862 IDWTELEM *dst= best_dequant + (b->ibuf - s->spatial_idwt_buffer); 1863 DWTELEM *src= buffer + (b-> buf - s->spatial_dwt_buffer); 1864 assert(src == b->buf); // code does not depend on this but it is true currently 1865 1866 quantize(s, b, dst, src, b->stride, s->qbias); 1867 } 1868 } 1869 for(pass=0; pass<1; pass++){ 1870 if(s->qbias == 0) //keyframe 1871 continue; 1872 for(level=0; level<s->spatial_decomposition_count; level++){ 1873 for(orientation=level ? 1 : 0; orientation<4; orientation++){ 1874 SubBand *b= &p->band[level][orientation]; 1875 IDWTELEM *dst= idwt2_buffer + (b->ibuf - s->spatial_idwt_buffer); 1876 IDWTELEM *best_dst= best_dequant + (b->ibuf - s->spatial_idwt_buffer); 1877 1878 for(ys= 0; ys<Q2_STEP; ys++){ 1879 for(xs= 0; xs<Q2_STEP; xs++){ 1880 memcpy(idwt2_buffer, best_dequant, height * stride * sizeof(IDWTELEM)); 1881 dequantize_all(s, p, idwt2_buffer, width, height); 1882 ff_spatial_idwt(idwt2_buffer, width, height, stride, type, s->spatial_decomposition_count); 1883 find_sse(s, p, best_score, score_stride, idwt2_buffer, s->spatial_idwt_buffer, level, orientation); 1884 memcpy(idwt2_buffer, best_dequant, height * stride * sizeof(IDWTELEM)); 1885 for(y=ys; y<b->height; y+= Q2_STEP){ 1886 for(x=xs; x<b->width; x+= Q2_STEP){ 1887 if(dst[x + y*b->stride]<0) dst[x + y*b->stride]++; 1888 if(dst[x + y*b->stride]>0) dst[x + y*b->stride]--; 1889 //FIXME try more than just -- 1890 } 1891 } 1892 dequantize_all(s, p, idwt2_buffer, width, height); 1893 ff_spatial_idwt(idwt2_buffer, width, height, stride, type, s->spatial_decomposition_count); 1894 find_sse(s, p, score, score_stride, idwt2_buffer, s->spatial_idwt_buffer, level, orientation); 1895 for(y=ys; y<b->height; y+= Q2_STEP){ 1896 for(x=xs; x<b->width; x+= Q2_STEP){ 1897 int score_idx= x/Q2_STEP + (y/Q2_STEP)*score_stride; 1898 if(score[score_idx] <= best_score[score_idx] + threshold){ 1899 best_score[score_idx]= score[score_idx]; 1900 if(best_dst[x + y*b->stride]<0) best_dst[x + y*b->stride]++; 1901 if(best_dst[x + y*b->stride]>0) best_dst[x + y*b->stride]--; 1902 //FIXME copy instead 1903 } 1904 } 1905 } 1906 } 1907 } 1908 } 1909 } 1910 } 1911 memcpy(s->spatial_idwt_buffer, best_dequant, height * stride * sizeof(IDWTELEM)); //FIXME work with that directly instead of copy at the end 1912} 1913 1914#endif /* QUANTIZE2==1 */ 1915 1916#define USE_HALFPEL_PLANE 0 1917 1918static void halfpel_interpol(SnowContext *s, uint8_t *halfpel[4][4], AVFrame *frame){ 1919 int p,x,y; 1920 1921 assert(!(s->avctx->flags & CODEC_FLAG_EMU_EDGE)); 1922 1923 for(p=0; p<3; p++){ 1924 int is_chroma= !!p; 1925 int w= s->avctx->width >>is_chroma; 1926 int h= s->avctx->height >>is_chroma; 1927 int ls= frame->linesize[p]; 1928 uint8_t *src= frame->data[p]; 1929 1930 halfpel[1][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls); 1931 halfpel[2][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls); 1932 halfpel[3][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls); 1933 1934 halfpel[0][p]= src; 1935 for(y=0; y<h; y++){ 1936 for(x=0; x<w; x++){ 1937 int i= y*ls + x; 1938 1939 halfpel[1][p][i]= (20*(src[i] + src[i+1]) - 5*(src[i-1] + src[i+2]) + (src[i-2] + src[i+3]) + 16 )>>5; 1940 } 1941 } 1942 for(y=0; y<h; y++){ 1943 for(x=0; x<w; x++){ 1944 int i= y*ls + x; 1945 1946 halfpel[2][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5; 1947 } 1948 } 1949 src= halfpel[1][p]; 1950 for(y=0; y<h; y++){ 1951 for(x=0; x<w; x++){ 1952 int i= y*ls + x; 1953 1954 halfpel[3][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5; 1955 } 1956 } 1957 1958//FIXME border! 1959 } 1960} 1961 1962static void release_buffer(AVCodecContext *avctx){ 1963 SnowContext *s = avctx->priv_data; 1964 int i; 1965 1966 if(s->last_picture[s->max_ref_frames-1].data[0]){ 1967 avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]); 1968 for(i=0; i<9; i++) 1969 if(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3]) 1970 av_free(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] - EDGE_WIDTH*(1+s->current_picture.linesize[i%3])); 1971 } 1972} 1973 1974static int frame_start(SnowContext *s){ 1975 AVFrame tmp; 1976 int w= s->avctx->width; //FIXME round up to x16 ? 1977 int h= s->avctx->height; 1978 1979 if(s->current_picture.data[0]){ 1980 s->dsp.draw_edges(s->current_picture.data[0], s->current_picture.linesize[0], w , h , EDGE_WIDTH ); 1981 s->dsp.draw_edges(s->current_picture.data[1], s->current_picture.linesize[1], w>>1, h>>1, EDGE_WIDTH/2); 1982 s->dsp.draw_edges(s->current_picture.data[2], s->current_picture.linesize[2], w>>1, h>>1, EDGE_WIDTH/2); 1983 } 1984 1985 release_buffer(s->avctx); 1986 1987 tmp= s->last_picture[s->max_ref_frames-1]; 1988 memmove(s->last_picture+1, s->last_picture, (s->max_ref_frames-1)*sizeof(AVFrame)); 1989 memmove(s->halfpel_plane+1, s->halfpel_plane, (s->max_ref_frames-1)*sizeof(void*)*4*4); 1990 if(USE_HALFPEL_PLANE && s->current_picture.data[0]) 1991 halfpel_interpol(s, s->halfpel_plane[0], &s->current_picture); 1992 s->last_picture[0]= s->current_picture; 1993 s->current_picture= tmp; 1994 1995 if(s->keyframe){ 1996 s->ref_frames= 0; 1997 }else{ 1998 int i; 1999 for(i=0; i<s->max_ref_frames && s->last_picture[i].data[0]; i++) 2000 if(i && s->last_picture[i-1].key_frame) 2001 break; 2002 s->ref_frames= i; 2003 if(s->ref_frames==0){ 2004 av_log(s->avctx,AV_LOG_ERROR, "No reference frames\n"); 2005 return -1; 2006 } 2007 } 2008 2009 s->current_picture.reference= 1; 2010 if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){ 2011 av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n"); 2012 return -1; 2013 } 2014 2015 s->current_picture.key_frame= s->keyframe; 2016 2017 return 0; 2018} 2019 2020static av_cold void common_end(SnowContext *s){ 2021 int plane_index, level, orientation, i; 2022 2023 av_freep(&s->spatial_dwt_buffer); 2024 av_freep(&s->spatial_idwt_buffer); 2025 2026 s->m.me.temp= NULL; 2027 av_freep(&s->m.me.scratchpad); 2028 av_freep(&s->m.me.map); 2029 av_freep(&s->m.me.score_map); 2030 av_freep(&s->m.obmc_scratchpad); 2031 2032 av_freep(&s->block); 2033 av_freep(&s->scratchbuf); 2034 2035 for(i=0; i<MAX_REF_FRAMES; i++){ 2036 av_freep(&s->ref_mvs[i]); 2037 av_freep(&s->ref_scores[i]); 2038 if(s->last_picture[i].data[0]) 2039 s->avctx->release_buffer(s->avctx, &s->last_picture[i]); 2040 } 2041 2042 for(plane_index=0; plane_index<3; plane_index++){ 2043 for(level=s->spatial_decomposition_count-1; level>=0; level--){ 2044 for(orientation=level ? 1 : 0; orientation<4; orientation++){ 2045 SubBand *b= &s->plane[plane_index].band[level][orientation]; 2046 2047 av_freep(&b->x_coeff); 2048 } 2049 } 2050 } 2051 if (s->mconly_picture.data[0]) 2052 s->avctx->release_buffer(s->avctx, &s->mconly_picture); 2053 if (s->current_picture.data[0]) 2054 s->avctx->release_buffer(s->avctx, &s->current_picture); 2055} 2056 2057static av_cold int decode_init(AVCodecContext *avctx) 2058{ 2059 avctx->pix_fmt= PIX_FMT_YUV420P; 2060 2061 common_init(avctx); 2062 2063 return 0; 2064} 2065 2066static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, AVPacket *avpkt){ 2067 const uint8_t *buf = avpkt->data; 2068 int buf_size = avpkt->size; 2069 SnowContext *s = avctx->priv_data; 2070 RangeCoder * const c= &s->c; 2071 int bytes_read; 2072 AVFrame *picture = data; 2073 int level, orientation, plane_index; 2074 2075 ff_init_range_decoder(c, buf, buf_size); 2076 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8); 2077 2078 s->current_picture.pict_type= FF_I_TYPE; //FIXME I vs. P 2079 if(decode_header(s)<0) 2080 return -1; 2081 common_init_after_header(avctx); 2082 2083 // realloc slice buffer for the case that spatial_decomposition_count changed 2084 ff_slice_buffer_destroy(&s->sb); 2085 ff_slice_buffer_init(&s->sb, s->plane[0].height, (MB_SIZE >> s->block_max_depth) + s->spatial_decomposition_count * 8 + 1, s->plane[0].width, s->spatial_idwt_buffer); 2086 2087 for(plane_index=0; plane_index<3; plane_index++){ 2088 Plane *p= &s->plane[plane_index]; 2089 p->fast_mc= p->diag_mc && p->htaps==6 && p->hcoeff[0]==40 2090 && p->hcoeff[1]==-10 2091 && p->hcoeff[2]==2; 2092 } 2093 2094 alloc_blocks(s); 2095 2096 if(frame_start(s) < 0) 2097 return -1; 2098 //keyframe flag duplication mess FIXME 2099 if(avctx->debug&FF_DEBUG_PICT_INFO) 2100 av_log(avctx, AV_LOG_ERROR, "keyframe:%d qlog:%d\n", s->keyframe, s->qlog); 2101 2102 decode_blocks(s); 2103 2104 for(plane_index=0; plane_index<3; plane_index++){ 2105 Plane *p= &s->plane[plane_index]; 2106 int w= p->width; 2107 int h= p->height; 2108 int x, y; 2109 int decode_state[MAX_DECOMPOSITIONS][4][1]; /* Stored state info for unpack_coeffs. 1 variable per instance. */ 2110 2111 if(s->avctx->debug&2048){ 2112 memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h); 2113 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1); 2114 2115 for(y=0; y<h; y++){ 2116 for(x=0; x<w; x++){ 2117 int v= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x]; 2118 s->mconly_picture.data[plane_index][y*s->mconly_picture.linesize[plane_index] + x]= v; 2119 } 2120 } 2121 } 2122 2123 { 2124 for(level=0; level<s->spatial_decomposition_count; level++){ 2125 for(orientation=level ? 1 : 0; orientation<4; orientation++){ 2126 SubBand *b= &p->band[level][orientation]; 2127 unpack_coeffs(s, b, b->parent, orientation); 2128 } 2129 } 2130 } 2131 2132 { 2133 const int mb_h= s->b_height << s->block_max_depth; 2134 const int block_size = MB_SIZE >> s->block_max_depth; 2135 const int block_w = plane_index ? block_size/2 : block_size; 2136 int mb_y; 2137 DWTCompose cs[MAX_DECOMPOSITIONS]; 2138 int yd=0, yq=0; 2139 int y; 2140 int end_y; 2141 2142 ff_spatial_idwt_buffered_init(cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count); 2143 for(mb_y=0; mb_y<=mb_h; mb_y++){ 2144 2145 int slice_starty = block_w*mb_y; 2146 int slice_h = block_w*(mb_y+1); 2147 if (!(s->keyframe || s->avctx->debug&512)){ 2148 slice_starty = FFMAX(0, slice_starty - (block_w >> 1)); 2149 slice_h -= (block_w >> 1); 2150 } 2151 2152 for(level=0; level<s->spatial_decomposition_count; level++){ 2153 for(orientation=level ? 1 : 0; orientation<4; orientation++){ 2154 SubBand *b= &p->band[level][orientation]; 2155 int start_y; 2156 int end_y; 2157 int our_mb_start = mb_y; 2158 int our_mb_end = (mb_y + 1); 2159 const int extra= 3; 2160 start_y = (mb_y ? ((block_w * our_mb_start) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra: 0); 2161 end_y = (((block_w * our_mb_end) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra); 2162 if (!(s->keyframe || s->avctx->debug&512)){ 2163 start_y = FFMAX(0, start_y - (block_w >> (1+s->spatial_decomposition_count - level))); 2164 end_y = FFMAX(0, end_y - (block_w >> (1+s->spatial_decomposition_count - level))); 2165 } 2166 start_y = FFMIN(b->height, start_y); 2167 end_y = FFMIN(b->height, end_y); 2168 2169 if (start_y != end_y){ 2170 if (orientation == 0){ 2171 SubBand * correlate_band = &p->band[0][0]; 2172 int correlate_end_y = FFMIN(b->height, end_y + 1); 2173 int correlate_start_y = FFMIN(b->height, (start_y ? start_y + 1 : 0)); 2174 decode_subband_slice_buffered(s, correlate_band, &s->sb, correlate_start_y, correlate_end_y, decode_state[0][0]); 2175 correlate_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, 1, 0, correlate_start_y, correlate_end_y); 2176 dequantize_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, start_y, end_y); 2177 } 2178 else 2179 decode_subband_slice_buffered(s, b, &s->sb, start_y, end_y, decode_state[level][orientation]); 2180 } 2181 } 2182 } 2183 2184 for(; yd<slice_h; yd+=4){ 2185 ff_spatial_idwt_buffered_slice(&s->dwt, cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count, yd); 2186 } 2187 2188 if(s->qlog == LOSSLESS_QLOG){ 2189 for(; yq<slice_h && yq<h; yq++){ 2190 IDWTELEM * line = slice_buffer_get_line(&s->sb, yq); 2191 for(x=0; x<w; x++){ 2192 line[x] <<= FRAC_BITS; 2193 } 2194 } 2195 } 2196 2197 predict_slice_buffered(s, &s->sb, s->spatial_idwt_buffer, plane_index, 1, mb_y); 2198 2199 y = FFMIN(p->height, slice_starty); 2200 end_y = FFMIN(p->height, slice_h); 2201 while(y < end_y) 2202 ff_slice_buffer_release(&s->sb, y++); 2203 } 2204 2205 ff_slice_buffer_flush(&s->sb); 2206 } 2207 2208 } 2209 2210 emms_c(); 2211 2212 release_buffer(avctx); 2213 2214 if(!(s->avctx->debug&2048)) 2215 *picture= s->current_picture; 2216 else 2217 *picture= s->mconly_picture; 2218 2219 *data_size = sizeof(AVFrame); 2220 2221 bytes_read= c->bytestream - c->bytestream_start; 2222 if(bytes_read ==0) av_log(s->avctx, AV_LOG_ERROR, "error at end of frame\n"); //FIXME 2223 2224 return bytes_read; 2225} 2226 2227static av_cold int decode_end(AVCodecContext *avctx) 2228{ 2229 SnowContext *s = avctx->priv_data; 2230 2231 ff_slice_buffer_destroy(&s->sb); 2232 2233 common_end(s); 2234 2235 return 0; 2236} 2237 2238AVCodec snow_decoder = { 2239 "snow", 2240 AVMEDIA_TYPE_VIDEO, 2241 CODEC_ID_SNOW, 2242 sizeof(SnowContext), 2243 decode_init, 2244 NULL, 2245 decode_end, 2246 decode_frame, 2247 CODEC_CAP_DR1 /*| CODEC_CAP_DRAW_HORIZ_BAND*/, 2248 NULL, 2249 .long_name = NULL_IF_CONFIG_SMALL("Snow"), 2250}; 2251 2252#if CONFIG_SNOW_ENCODER 2253static av_cold int encode_init(AVCodecContext *avctx) 2254{ 2255 SnowContext *s = avctx->priv_data; 2256 int plane_index; 2257 2258 if(avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL){ 2259 av_log(avctx, AV_LOG_ERROR, "This codec is under development, files encoded with it may not be decodable with future versions!!!\n" 2260 "Use vstrict=-2 / -strict -2 to use it anyway.\n"); 2261 return -1; 2262 } 2263 2264 if(avctx->prediction_method == DWT_97 2265 && (avctx->flags & CODEC_FLAG_QSCALE) 2266 && avctx->global_quality == 0){ 2267 av_log(avctx, AV_LOG_ERROR, "The 9/7 wavelet is incompatible with lossless mode.\n"); 2268 return -1; 2269 } 2270 2271 s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type 2272 2273 s->mv_scale = (avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4; 2274 s->block_max_depth= (avctx->flags & CODEC_FLAG_4MV ) ? 1 : 0; 2275 2276 for(plane_index=0; plane_index<3; plane_index++){ 2277 s->plane[plane_index].diag_mc= 1; 2278 s->plane[plane_index].htaps= 6; 2279 s->plane[plane_index].hcoeff[0]= 40; 2280 s->plane[plane_index].hcoeff[1]= -10; 2281 s->plane[plane_index].hcoeff[2]= 2; 2282 s->plane[plane_index].fast_mc= 1; 2283 } 2284 2285 common_init(avctx); 2286 alloc_blocks(s); 2287 2288 s->version=0; 2289 2290 s->m.avctx = avctx; 2291 s->m.flags = avctx->flags; 2292 s->m.bit_rate= avctx->bit_rate; 2293 2294 s->m.me.temp = 2295 s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t)); 2296 s->m.me.map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t)); 2297 s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t)); 2298 s->m.obmc_scratchpad= av_mallocz(MB_SIZE*MB_SIZE*12*sizeof(uint32_t)); 2299 h263_encode_init(&s->m); //mv_penalty 2300 2301 s->max_ref_frames = FFMAX(FFMIN(avctx->refs, MAX_REF_FRAMES), 1); 2302 2303 if(avctx->flags&CODEC_FLAG_PASS1){ 2304 if(!avctx->stats_out) 2305 avctx->stats_out = av_mallocz(256); 2306 } 2307 if((avctx->flags&CODEC_FLAG_PASS2) || !(avctx->flags&CODEC_FLAG_QSCALE)){ 2308 if(ff_rate_control_init(&s->m) < 0) 2309 return -1; 2310 } 2311 s->pass1_rc= !(avctx->flags & (CODEC_FLAG_QSCALE|CODEC_FLAG_PASS2)); 2312 2313 avctx->coded_frame= &s->current_picture; 2314 switch(avctx->pix_fmt){ 2315// case PIX_FMT_YUV444P: 2316// case PIX_FMT_YUV422P: 2317 case PIX_FMT_YUV420P: 2318 case PIX_FMT_GRAY8: 2319// case PIX_FMT_YUV411P: 2320// case PIX_FMT_YUV410P: 2321 s->colorspace_type= 0; 2322 break; 2323/* case PIX_FMT_RGB32: 2324 s->colorspace= 1; 2325 break;*/ 2326 default: 2327 av_log(avctx, AV_LOG_ERROR, "pixel format not supported\n"); 2328 return -1; 2329 } 2330// avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift); 2331 s->chroma_h_shift= 1; 2332 s->chroma_v_shift= 1; 2333 2334 ff_set_cmp(&s->dsp, s->dsp.me_cmp, s->avctx->me_cmp); 2335 ff_set_cmp(&s->dsp, s->dsp.me_sub_cmp, s->avctx->me_sub_cmp); 2336 2337 s->avctx->get_buffer(s->avctx, &s->input_picture); 2338 2339 if(s->avctx->me_method == ME_ITER){ 2340 int i; 2341 int size= s->b_width * s->b_height << 2*s->block_max_depth; 2342 for(i=0; i<s->max_ref_frames; i++){ 2343 s->ref_mvs[i]= av_mallocz(size*sizeof(int16_t[2])); 2344 s->ref_scores[i]= av_mallocz(size*sizeof(uint32_t)); 2345 } 2346 } 2347 2348 return 0; 2349} 2350 2351//near copy & paste from dsputil, FIXME 2352static int pix_sum(uint8_t * pix, int line_size, int w) 2353{ 2354 int s, i, j; 2355 2356 s = 0; 2357 for (i = 0; i < w; i++) { 2358 for (j = 0; j < w; j++) { 2359 s += pix[0]; 2360 pix ++; 2361 } 2362 pix += line_size - w; 2363 } 2364 return s; 2365} 2366 2367//near copy & paste from dsputil, FIXME 2368static int pix_norm1(uint8_t * pix, int line_size, int w) 2369{ 2370 int s, i, j; 2371 uint32_t *sq = ff_squareTbl + 256; 2372 2373 s = 0; 2374 for (i = 0; i < w; i++) { 2375 for (j = 0; j < w; j ++) { 2376 s += sq[pix[0]]; 2377 pix ++; 2378 } 2379 pix += line_size - w; 2380 } 2381 return s; 2382} 2383 2384//FIXME copy&paste 2385#define P_LEFT P[1] 2386#define P_TOP P[2] 2387#define P_TOPRIGHT P[3] 2388#define P_MEDIAN P[4] 2389#define P_MV1 P[9] 2390#define FLAG_QPEL 1 //must be 1 2391 2392static int encode_q_branch(SnowContext *s, int level, int x, int y){ 2393 uint8_t p_buffer[1024]; 2394 uint8_t i_buffer[1024]; 2395 uint8_t p_state[sizeof(s->block_state)]; 2396 uint8_t i_state[sizeof(s->block_state)]; 2397 RangeCoder pc, ic; 2398 uint8_t *pbbak= s->c.bytestream; 2399 uint8_t *pbbak_start= s->c.bytestream_start; 2400 int score, score2, iscore, i_len, p_len, block_s, sum, base_bits; 2401 const int w= s->b_width << s->block_max_depth; 2402 const int h= s->b_height << s->block_max_depth; 2403 const int rem_depth= s->block_max_depth - level; 2404 const int index= (x + y*w) << rem_depth; 2405 const int block_w= 1<<(LOG2_MB_SIZE - level); 2406 int trx= (x+1)<<rem_depth; 2407 int try= (y+1)<<rem_depth; 2408 const BlockNode *left = x ? &s->block[index-1] : &null_block; 2409 const BlockNode *top = y ? &s->block[index-w] : &null_block; 2410 const BlockNode *right = trx<w ? &s->block[index+1] : &null_block; 2411 const BlockNode *bottom= try<h ? &s->block[index+w] : &null_block; 2412 const BlockNode *tl = y && x ? &s->block[index-w-1] : left; 2413 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt 2414 int pl = left->color[0]; 2415 int pcb= left->color[1]; 2416 int pcr= left->color[2]; 2417 int pmx, pmy; 2418 int mx=0, my=0; 2419 int l,cr,cb; 2420 const int stride= s->current_picture.linesize[0]; 2421 const int uvstride= s->current_picture.linesize[1]; 2422 uint8_t *current_data[3]= { s->input_picture.data[0] + (x + y* stride)*block_w, 2423 s->input_picture.data[1] + (x + y*uvstride)*block_w/2, 2424 s->input_picture.data[2] + (x + y*uvstride)*block_w/2}; 2425 int P[10][2]; 2426 int16_t last_mv[3][2]; 2427 int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused 2428 const int shift= 1+qpel; 2429 MotionEstContext *c= &s->m.me; 2430 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref); 2431 int mx_context= av_log2(2*FFABS(left->mx - top->mx)); 2432 int my_context= av_log2(2*FFABS(left->my - top->my)); 2433 int s_context= 2*left->level + 2*top->level + tl->level + tr->level; 2434 int ref, best_ref, ref_score, ref_mx, ref_my; 2435 2436 assert(sizeof(s->block_state) >= 256); 2437 if(s->keyframe){ 2438 set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA); 2439 return 0; 2440 } 2441 2442// clip predictors / edge ? 2443 2444 P_LEFT[0]= left->mx; 2445 P_LEFT[1]= left->my; 2446 P_TOP [0]= top->mx; 2447 P_TOP [1]= top->my; 2448 P_TOPRIGHT[0]= tr->mx; 2449 P_TOPRIGHT[1]= tr->my; 2450 2451 last_mv[0][0]= s->block[index].mx; 2452 last_mv[0][1]= s->block[index].my; 2453 last_mv[1][0]= right->mx; 2454 last_mv[1][1]= right->my; 2455 last_mv[2][0]= bottom->mx; 2456 last_mv[2][1]= bottom->my; 2457 2458 s->m.mb_stride=2; 2459 s->m.mb_x= 2460 s->m.mb_y= 0; 2461 c->skip= 0; 2462 2463 assert(c-> stride == stride); 2464 assert(c->uvstride == uvstride); 2465 2466 c->penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp); 2467 c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp); 2468 c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp); 2469 c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV; 2470 2471 c->xmin = - x*block_w - 16+3; 2472 c->ymin = - y*block_w - 16+3; 2473 c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-3; 2474 c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-3; 2475 2476 if(P_LEFT[0] > (c->xmax<<shift)) P_LEFT[0] = (c->xmax<<shift); 2477 if(P_LEFT[1] > (c->ymax<<shift)) P_LEFT[1] = (c->ymax<<shift); 2478 if(P_TOP[0] > (c->xmax<<shift)) P_TOP[0] = (c->xmax<<shift); 2479 if(P_TOP[1] > (c->ymax<<shift)) P_TOP[1] = (c->ymax<<shift); 2480 if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift); 2481 if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip 2482 if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift); 2483 2484 P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]); 2485 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]); 2486 2487 if (!y) { 2488 c->pred_x= P_LEFT[0]; 2489 c->pred_y= P_LEFT[1]; 2490 } else { 2491 c->pred_x = P_MEDIAN[0]; 2492 c->pred_y = P_MEDIAN[1]; 2493 } 2494 2495 score= INT_MAX; 2496 best_ref= 0; 2497 for(ref=0; ref<s->ref_frames; ref++){ 2498 init_ref(c, current_data, s->last_picture[ref].data, NULL, block_w*x, block_w*y, 0); 2499 2500 ref_score= ff_epzs_motion_search(&s->m, &ref_mx, &ref_my, P, 0, /*ref_index*/ 0, last_mv, 2501 (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w); 2502 2503 assert(ref_mx >= c->xmin); 2504 assert(ref_mx <= c->xmax); 2505 assert(ref_my >= c->ymin); 2506 assert(ref_my <= c->ymax); 2507 2508 ref_score= c->sub_motion_search(&s->m, &ref_mx, &ref_my, ref_score, 0, 0, level-LOG2_MB_SIZE+4, block_w); 2509 ref_score= ff_get_mb_score(&s->m, ref_mx, ref_my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0); 2510 ref_score+= 2*av_log2(2*ref)*c->penalty_factor; 2511 if(s->ref_mvs[ref]){ 2512 s->ref_mvs[ref][index][0]= ref_mx; 2513 s->ref_mvs[ref][index][1]= ref_my; 2514 s->ref_scores[ref][index]= ref_score; 2515 } 2516 if(score > ref_score){ 2517 score= ref_score; 2518 best_ref= ref; 2519 mx= ref_mx; 2520 my= ref_my; 2521 } 2522 } 2523 //FIXME if mb_cmp != SSE then intra cannot be compared currently and mb_penalty vs. lambda2 2524 2525 // subpel search 2526 base_bits= get_rac_count(&s->c) - 8*(s->c.bytestream - s->c.bytestream_start); 2527 pc= s->c; 2528 pc.bytestream_start= 2529 pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo 2530 memcpy(p_state, s->block_state, sizeof(s->block_state)); 2531 2532 if(level!=s->block_max_depth) 2533 put_rac(&pc, &p_state[4 + s_context], 1); 2534 put_rac(&pc, &p_state[1 + left->type + top->type], 0); 2535 if(s->ref_frames > 1) 2536 put_symbol(&pc, &p_state[128 + 1024 + 32*ref_context], best_ref, 0); 2537 pred_mv(s, &pmx, &pmy, best_ref, left, top, tr); 2538 put_symbol(&pc, &p_state[128 + 32*(mx_context + 16*!!best_ref)], mx - pmx, 1); 2539 put_symbol(&pc, &p_state[128 + 32*(my_context + 16*!!best_ref)], my - pmy, 1); 2540 p_len= pc.bytestream - pc.bytestream_start; 2541 score += (s->lambda2*(get_rac_count(&pc)-base_bits))>>FF_LAMBDA_SHIFT; 2542 2543 block_s= block_w*block_w; 2544 sum = pix_sum(current_data[0], stride, block_w); 2545 l= (sum + block_s/2)/block_s; 2546 iscore = pix_norm1(current_data[0], stride, block_w) - 2*l*sum + l*l*block_s; 2547 2548 block_s= block_w*block_w>>2; 2549 sum = pix_sum(current_data[1], uvstride, block_w>>1); 2550 cb= (sum + block_s/2)/block_s; 2551// iscore += pix_norm1(¤t_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s; 2552 sum = pix_sum(current_data[2], uvstride, block_w>>1); 2553 cr= (sum + block_s/2)/block_s; 2554// iscore += pix_norm1(¤t_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s; 2555 2556 ic= s->c; 2557 ic.bytestream_start= 2558 ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo 2559 memcpy(i_state, s->block_state, sizeof(s->block_state)); 2560 if(level!=s->block_max_depth) 2561 put_rac(&ic, &i_state[4 + s_context], 1); 2562 put_rac(&ic, &i_state[1 + left->type + top->type], 1); 2563 put_symbol(&ic, &i_state[32], l-pl , 1); 2564 put_symbol(&ic, &i_state[64], cb-pcb, 1); 2565 put_symbol(&ic, &i_state[96], cr-pcr, 1); 2566 i_len= ic.bytestream - ic.bytestream_start; 2567 iscore += (s->lambda2*(get_rac_count(&ic)-base_bits))>>FF_LAMBDA_SHIFT; 2568 2569// assert(score==256*256*256*64-1); 2570 assert(iscore < 255*255*256 + s->lambda2*10); 2571 assert(iscore >= 0); 2572 assert(l>=0 && l<=255); 2573 assert(pl>=0 && pl<=255); 2574 2575 if(level==0){ 2576 int varc= iscore >> 8; 2577 int vard= score >> 8; 2578 if (vard <= 64 || vard < varc) 2579 c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc); 2580 else 2581 c->scene_change_score+= s->m.qscale; 2582 } 2583 2584 if(level!=s->block_max_depth){ 2585 put_rac(&s->c, &s->block_state[4 + s_context], 0); 2586 score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0); 2587 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0); 2588 score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1); 2589 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1); 2590 score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead 2591 2592 if(score2 < score && score2 < iscore) 2593 return score2; 2594 } 2595 2596 if(iscore < score){ 2597 pred_mv(s, &pmx, &pmy, 0, left, top, tr); 2598 memcpy(pbbak, i_buffer, i_len); 2599 s->c= ic; 2600 s->c.bytestream_start= pbbak_start; 2601 s->c.bytestream= pbbak + i_len; 2602 set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, 0, BLOCK_INTRA); 2603 memcpy(s->block_state, i_state, sizeof(s->block_state)); 2604 return iscore; 2605 }else{ 2606 memcpy(pbbak, p_buffer, p_len); 2607 s->c= pc; 2608 s->c.bytestream_start= pbbak_start; 2609 s->c.bytestream= pbbak + p_len; 2610 set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, best_ref, 0); 2611 memcpy(s->block_state, p_state, sizeof(s->block_state)); 2612 return score; 2613 } 2614} 2615 2616static void encode_q_branch2(SnowContext *s, int level, int x, int y){ 2617 const int w= s->b_width << s->block_max_depth; 2618 const int rem_depth= s->block_max_depth - level; 2619 const int index= (x + y*w) << rem_depth; 2620 int trx= (x+1)<<rem_depth; 2621 BlockNode *b= &s->block[index]; 2622 const BlockNode *left = x ? &s->block[index-1] : &null_block; 2623 const BlockNode *top = y ? &s->block[index-w] : &null_block; 2624 const BlockNode *tl = y && x ? &s->block[index-w-1] : left; 2625 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt 2626 int pl = left->color[0]; 2627 int pcb= left->color[1]; 2628 int pcr= left->color[2]; 2629 int pmx, pmy; 2630 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref); 2631 int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 16*!!b->ref; 2632 int my_context= av_log2(2*FFABS(left->my - top->my)) + 16*!!b->ref; 2633 int s_context= 2*left->level + 2*top->level + tl->level + tr->level; 2634 2635 if(s->keyframe){ 2636 set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA); 2637 return; 2638 } 2639 2640 if(level!=s->block_max_depth){ 2641 if(same_block(b,b+1) && same_block(b,b+w) && same_block(b,b+w+1)){ 2642 put_rac(&s->c, &s->block_state[4 + s_context], 1); 2643 }else{ 2644 put_rac(&s->c, &s->block_state[4 + s_context], 0); 2645 encode_q_branch2(s, level+1, 2*x+0, 2*y+0); 2646 encode_q_branch2(s, level+1, 2*x+1, 2*y+0); 2647 encode_q_branch2(s, level+1, 2*x+0, 2*y+1); 2648 encode_q_branch2(s, level+1, 2*x+1, 2*y+1); 2649 return; 2650 } 2651 } 2652 if(b->type & BLOCK_INTRA){ 2653 pred_mv(s, &pmx, &pmy, 0, left, top, tr); 2654 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 1); 2655 put_symbol(&s->c, &s->block_state[32], b->color[0]-pl , 1); 2656 put_symbol(&s->c, &s->block_state[64], b->color[1]-pcb, 1); 2657 put_symbol(&s->c, &s->block_state[96], b->color[2]-pcr, 1); 2658 set_blocks(s, level, x, y, b->color[0], b->color[1], b->color[2], pmx, pmy, 0, BLOCK_INTRA); 2659 }else{ 2660 pred_mv(s, &pmx, &pmy, b->ref, left, top, tr); 2661 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 0); 2662 if(s->ref_frames > 1) 2663 put_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], b->ref, 0); 2664 put_symbol(&s->c, &s->block_state[128 + 32*mx_context], b->mx - pmx, 1); 2665 put_symbol(&s->c, &s->block_state[128 + 32*my_context], b->my - pmy, 1); 2666 set_blocks(s, level, x, y, pl, pcb, pcr, b->mx, b->my, b->ref, 0); 2667 } 2668} 2669 2670static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){ 2671 int i, x2, y2; 2672 Plane *p= &s->plane[plane_index]; 2673 const int block_size = MB_SIZE >> s->block_max_depth; 2674 const int block_w = plane_index ? block_size/2 : block_size; 2675 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth]; 2676 const int obmc_stride= plane_index ? block_size : 2*block_size; 2677 const int ref_stride= s->current_picture.linesize[plane_index]; 2678 uint8_t *src= s-> input_picture.data[plane_index]; 2679 IDWTELEM *dst= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4; //FIXME change to unsigned 2680 const int b_stride = s->b_width << s->block_max_depth; 2681 const int w= p->width; 2682 const int h= p->height; 2683 int index= mb_x + mb_y*b_stride; 2684 BlockNode *b= &s->block[index]; 2685 BlockNode backup= *b; 2686 int ab=0; 2687 int aa=0; 2688 2689 b->type|= BLOCK_INTRA; 2690 b->color[plane_index]= 0; 2691 memset(dst, 0, obmc_stride*obmc_stride*sizeof(IDWTELEM)); 2692 2693 for(i=0; i<4; i++){ 2694 int mb_x2= mb_x + (i &1) - 1; 2695 int mb_y2= mb_y + (i>>1) - 1; 2696 int x= block_w*mb_x2 + block_w/2; 2697 int y= block_w*mb_y2 + block_w/2; 2698 2699 add_yblock(s, 0, NULL, dst + ((i&1)+(i>>1)*obmc_stride)*block_w, NULL, obmc, 2700 x, y, block_w, block_w, w, h, obmc_stride, ref_stride, obmc_stride, mb_x2, mb_y2, 0, 0, plane_index); 2701 2702 for(y2= FFMAX(y, 0); y2<FFMIN(h, y+block_w); y2++){ 2703 for(x2= FFMAX(x, 0); x2<FFMIN(w, x+block_w); x2++){ 2704 int index= x2-(block_w*mb_x - block_w/2) + (y2-(block_w*mb_y - block_w/2))*obmc_stride; 2705 int obmc_v= obmc[index]; 2706 int d; 2707 if(y<0) obmc_v += obmc[index + block_w*obmc_stride]; 2708 if(x<0) obmc_v += obmc[index + block_w]; 2709 if(y+block_w>h) obmc_v += obmc[index - block_w*obmc_stride]; 2710 if(x+block_w>w) obmc_v += obmc[index - block_w]; 2711 //FIXME precalculate this or simplify it somehow else 2712 2713 d = -dst[index] + (1<<(FRAC_BITS-1)); 2714 dst[index] = d; 2715 ab += (src[x2 + y2*ref_stride] - (d>>FRAC_BITS)) * obmc_v; 2716 aa += obmc_v * obmc_v; //FIXME precalculate this 2717 } 2718 } 2719 } 2720 *b= backup; 2721 2722 return av_clip(((ab<<LOG2_OBMC_MAX) + aa/2)/aa, 0, 255); //FIXME we should not need clipping 2723} 2724 2725static inline int get_block_bits(SnowContext *s, int x, int y, int w){ 2726 const int b_stride = s->b_width << s->block_max_depth; 2727 const int b_height = s->b_height<< s->block_max_depth; 2728 int index= x + y*b_stride; 2729 const BlockNode *b = &s->block[index]; 2730 const BlockNode *left = x ? &s->block[index-1] : &null_block; 2731 const BlockNode *top = y ? &s->block[index-b_stride] : &null_block; 2732 const BlockNode *tl = y && x ? &s->block[index-b_stride-1] : left; 2733 const BlockNode *tr = y && x+w<b_stride ? &s->block[index-b_stride+w] : tl; 2734 int dmx, dmy; 2735// int mx_context= av_log2(2*FFABS(left->mx - top->mx)); 2736// int my_context= av_log2(2*FFABS(left->my - top->my)); 2737 2738 if(x<0 || x>=b_stride || y>=b_height) 2739 return 0; 2740/* 27411 0 0 274201X 1-2 1 2743001XX 3-6 2-3 27440001XXX 7-14 4-7 274500001XXXX 15-30 8-15 2746*/ 2747//FIXME try accurate rate 2748//FIXME intra and inter predictors if surrounding blocks are not the same type 2749 if(b->type & BLOCK_INTRA){ 2750 return 3+2*( av_log2(2*FFABS(left->color[0] - b->color[0])) 2751 + av_log2(2*FFABS(left->color[1] - b->color[1])) 2752 + av_log2(2*FFABS(left->color[2] - b->color[2]))); 2753 }else{ 2754 pred_mv(s, &dmx, &dmy, b->ref, left, top, tr); 2755 dmx-= b->mx; 2756 dmy-= b->my; 2757 return 2*(1 + av_log2(2*FFABS(dmx)) //FIXME kill the 2* can be merged in lambda 2758 + av_log2(2*FFABS(dmy)) 2759 + av_log2(2*b->ref)); 2760 } 2761} 2762 2763static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index, const uint8_t *obmc_edged){ 2764 Plane *p= &s->plane[plane_index]; 2765 const int block_size = MB_SIZE >> s->block_max_depth; 2766 const int block_w = plane_index ? block_size/2 : block_size; 2767 const int obmc_stride= plane_index ? block_size : 2*block_size; 2768 const int ref_stride= s->current_picture.linesize[plane_index]; 2769 uint8_t *dst= s->current_picture.data[plane_index]; 2770 uint8_t *src= s-> input_picture.data[plane_index]; 2771 IDWTELEM *pred= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4; 2772 uint8_t *cur = s->scratchbuf; 2773 uint8_t tmp[ref_stride*(2*MB_SIZE+HTAPS_MAX-1)]; 2774 const int b_stride = s->b_width << s->block_max_depth; 2775 const int b_height = s->b_height<< s->block_max_depth; 2776 const int w= p->width; 2777 const int h= p->height; 2778 int distortion; 2779 int rate= 0; 2780 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp); 2781 int sx= block_w*mb_x - block_w/2; 2782 int sy= block_w*mb_y - block_w/2; 2783 int x0= FFMAX(0,-sx); 2784 int y0= FFMAX(0,-sy); 2785 int x1= FFMIN(block_w*2, w-sx); 2786 int y1= FFMIN(block_w*2, h-sy); 2787 int i,x,y; 2788 2789 pred_block(s, cur, tmp, ref_stride, sx, sy, block_w*2, block_w*2, &s->block[mb_x + mb_y*b_stride], plane_index, w, h); 2790 2791 for(y=y0; y<y1; y++){ 2792 const uint8_t *obmc1= obmc_edged + y*obmc_stride; 2793 const IDWTELEM *pred1 = pred + y*obmc_stride; 2794 uint8_t *cur1 = cur + y*ref_stride; 2795 uint8_t *dst1 = dst + sx + (sy+y)*ref_stride; 2796 for(x=x0; x<x1; x++){ 2797#if FRAC_BITS >= LOG2_OBMC_MAX 2798 int v = (cur1[x] * obmc1[x]) << (FRAC_BITS - LOG2_OBMC_MAX); 2799#else 2800 int v = (cur1[x] * obmc1[x] + (1<<(LOG2_OBMC_MAX - FRAC_BITS-1))) >> (LOG2_OBMC_MAX - FRAC_BITS); 2801#endif 2802 v = (v + pred1[x]) >> FRAC_BITS; 2803 if(v&(~255)) v= ~(v>>31); 2804 dst1[x] = v; 2805 } 2806 } 2807 2808 /* copy the regions where obmc[] = (uint8_t)256 */ 2809 if(LOG2_OBMC_MAX == 8 2810 && (mb_x == 0 || mb_x == b_stride-1) 2811 && (mb_y == 0 || mb_y == b_height-1)){ 2812 if(mb_x == 0) 2813 x1 = block_w; 2814 else 2815 x0 = block_w; 2816 if(mb_y == 0) 2817 y1 = block_w; 2818 else 2819 y0 = block_w; 2820 for(y=y0; y<y1; y++) 2821 memcpy(dst + sx+x0 + (sy+y)*ref_stride, cur + x0 + y*ref_stride, x1-x0); 2822 } 2823 2824 if(block_w==16){ 2825 /* FIXME rearrange dsputil to fit 32x32 cmp functions */ 2826 /* FIXME check alignment of the cmp wavelet vs the encoding wavelet */ 2827 /* FIXME cmps overlap but do not cover the wavelet's whole support. 2828 * So improving the score of one block is not strictly guaranteed 2829 * to improve the score of the whole frame, thus iterative motion 2830 * estimation does not always converge. */ 2831 if(s->avctx->me_cmp == FF_CMP_W97) 2832 distortion = ff_w97_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32); 2833 else if(s->avctx->me_cmp == FF_CMP_W53) 2834 distortion = ff_w53_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32); 2835 else{ 2836 distortion = 0; 2837 for(i=0; i<4; i++){ 2838 int off = sx+16*(i&1) + (sy+16*(i>>1))*ref_stride; 2839 distortion += s->dsp.me_cmp[0](&s->m, src + off, dst + off, ref_stride, 16); 2840 } 2841 } 2842 }else{ 2843 assert(block_w==8); 2844 distortion = s->dsp.me_cmp[0](&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, block_w*2); 2845 } 2846 2847 if(plane_index==0){ 2848 for(i=0; i<4; i++){ 2849/* ..RRr 2850 * .RXx. 2851 * rxx.. 2852 */ 2853 rate += get_block_bits(s, mb_x + (i&1) - (i>>1), mb_y + (i>>1), 1); 2854 } 2855 if(mb_x == b_stride-2) 2856 rate += get_block_bits(s, mb_x + 1, mb_y + 1, 1); 2857 } 2858 return distortion + rate*penalty_factor; 2859} 2860 2861static int get_4block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){ 2862 int i, y2; 2863 Plane *p= &s->plane[plane_index]; 2864 const int block_size = MB_SIZE >> s->block_max_depth; 2865 const int block_w = plane_index ? block_size/2 : block_size; 2866 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth]; 2867 const int obmc_stride= plane_index ? block_size : 2*block_size; 2868 const int ref_stride= s->current_picture.linesize[plane_index]; 2869 uint8_t *dst= s->current_picture.data[plane_index]; 2870 uint8_t *src= s-> input_picture.data[plane_index]; 2871 //FIXME zero_dst is const but add_yblock changes dst if add is 0 (this is never the case for dst=zero_dst 2872 // const has only been removed from zero_dst to suppress a warning 2873 static IDWTELEM zero_dst[4096]; //FIXME 2874 const int b_stride = s->b_width << s->block_max_depth; 2875 const int w= p->width; 2876 const int h= p->height; 2877 int distortion= 0; 2878 int rate= 0; 2879 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp); 2880 2881 for(i=0; i<9; i++){ 2882 int mb_x2= mb_x + (i%3) - 1; 2883 int mb_y2= mb_y + (i/3) - 1; 2884 int x= block_w*mb_x2 + block_w/2; 2885 int y= block_w*mb_y2 + block_w/2; 2886 2887 add_yblock(s, 0, NULL, zero_dst, dst, obmc, 2888 x, y, block_w, block_w, w, h, /*dst_stride*/0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, 1, plane_index); 2889 2890 //FIXME find a cleaner/simpler way to skip the outside stuff 2891 for(y2= y; y2<0; y2++) 2892 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w); 2893 for(y2= h; y2<y+block_w; y2++) 2894 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w); 2895 if(x<0){ 2896 for(y2= y; y2<y+block_w; y2++) 2897 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, -x); 2898 } 2899 if(x+block_w > w){ 2900 for(y2= y; y2<y+block_w; y2++) 2901 memcpy(dst + w + y2*ref_stride, src + w + y2*ref_stride, x+block_w - w); 2902 } 2903 2904 assert(block_w== 8 || block_w==16); 2905 distortion += s->dsp.me_cmp[block_w==8](&s->m, src + x + y*ref_stride, dst + x + y*ref_stride, ref_stride, block_w); 2906 } 2907 2908 if(plane_index==0){ 2909 BlockNode *b= &s->block[mb_x+mb_y*b_stride]; 2910 int merged= same_block(b,b+1) && same_block(b,b+b_stride) && same_block(b,b+b_stride+1); 2911 2912/* ..RRRr 2913 * .RXXx. 2914 * .RXXx. 2915 * rxxx. 2916 */ 2917 if(merged) 2918 rate = get_block_bits(s, mb_x, mb_y, 2); 2919 for(i=merged?4:0; i<9; i++){ 2920 static const int dxy[9][2] = {{0,0},{1,0},{0,1},{1,1},{2,0},{2,1},{-1,2},{0,2},{1,2}}; 2921 rate += get_block_bits(s, mb_x + dxy[i][0], mb_y + dxy[i][1], 1); 2922 } 2923 } 2924 return distortion + rate*penalty_factor; 2925} 2926 2927static int encode_subband_c0run(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){ 2928 const int w= b->width; 2929 const int h= b->height; 2930 int x, y; 2931 2932 if(1){ 2933 int run=0; 2934 int runs[w*h]; 2935 int run_index=0; 2936 int max_index; 2937 2938 for(y=0; y<h; y++){ 2939 for(x=0; x<w; x++){ 2940 int v, p=0; 2941 int /*ll=0, */l=0, lt=0, t=0, rt=0; 2942 v= src[x + y*stride]; 2943 2944 if(y){ 2945 t= src[x + (y-1)*stride]; 2946 if(x){ 2947 lt= src[x - 1 + (y-1)*stride]; 2948 } 2949 if(x + 1 < w){ 2950 rt= src[x + 1 + (y-1)*stride]; 2951 } 2952 } 2953 if(x){ 2954 l= src[x - 1 + y*stride]; 2955 /*if(x > 1){ 2956 if(orientation==1) ll= src[y + (x-2)*stride]; 2957 else ll= src[x - 2 + y*stride]; 2958 }*/ 2959 } 2960 if(parent){ 2961 int px= x>>1; 2962 int py= y>>1; 2963 if(px<b->parent->width && py<b->parent->height) 2964 p= parent[px + py*2*stride]; 2965 } 2966 if(!(/*ll|*/l|lt|t|rt|p)){ 2967 if(v){ 2968 runs[run_index++]= run; 2969 run=0; 2970 }else{ 2971 run++; 2972 } 2973 } 2974 } 2975 } 2976 max_index= run_index; 2977 runs[run_index++]= run; 2978 run_index=0; 2979 run= runs[run_index++]; 2980 2981 put_symbol2(&s->c, b->state[30], max_index, 0); 2982 if(run_index <= max_index) 2983 put_symbol2(&s->c, b->state[1], run, 3); 2984 2985 for(y=0; y<h; y++){ 2986 if(s->c.bytestream_end - s->c.bytestream < w*40){ 2987 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n"); 2988 return -1; 2989 } 2990 for(x=0; x<w; x++){ 2991 int v, p=0; 2992 int /*ll=0, */l=0, lt=0, t=0, rt=0; 2993 v= src[x + y*stride]; 2994 2995 if(y){ 2996 t= src[x + (y-1)*stride]; 2997 if(x){ 2998 lt= src[x - 1 + (y-1)*stride]; 2999 } 3000 if(x + 1 < w){ 3001 rt= src[x + 1 + (y-1)*stride]; 3002 } 3003 } 3004 if(x){ 3005 l= src[x - 1 + y*stride]; 3006 /*if(x > 1){ 3007 if(orientation==1) ll= src[y + (x-2)*stride]; 3008 else ll= src[x - 2 + y*stride]; 3009 }*/ 3010 } 3011 if(parent){ 3012 int px= x>>1; 3013 int py= y>>1; 3014 if(px<b->parent->width && py<b->parent->height) 3015 p= parent[px + py*2*stride]; 3016 } 3017 if(/*ll|*/l|lt|t|rt|p){ 3018 int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p)); 3019 3020 put_rac(&s->c, &b->state[0][context], !!v); 3021 }else{ 3022 if(!run){ 3023 run= runs[run_index++]; 3024 3025 if(run_index <= max_index) 3026 put_symbol2(&s->c, b->state[1], run, 3); 3027 assert(v); 3028 }else{ 3029 run--; 3030 assert(!v); 3031 } 3032 } 3033 if(v){ 3034 int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p)); 3035 int l2= 2*FFABS(l) + (l<0); 3036 int t2= 2*FFABS(t) + (t<0); 3037 3038 put_symbol2(&s->c, b->state[context + 2], FFABS(v)-1, context-4); 3039 put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l2&0xFF] + 3*quant3bA[t2&0xFF]], v<0); 3040 } 3041 } 3042 } 3043 } 3044 return 0; 3045} 3046 3047static int encode_subband(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){ 3048// encode_subband_qtree(s, b, src, parent, stride, orientation); 3049// encode_subband_z0run(s, b, src, parent, stride, orientation); 3050 return encode_subband_c0run(s, b, src, parent, stride, orientation); 3051// encode_subband_dzr(s, b, src, parent, stride, orientation); 3052} 3053 3054static av_always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, const uint8_t *obmc_edged, int *best_rd){ 3055 const int b_stride= s->b_width << s->block_max_depth; 3056 BlockNode *block= &s->block[mb_x + mb_y * b_stride]; 3057 BlockNode backup= *block; 3058 int rd, index, value; 3059 3060 assert(mb_x>=0 && mb_y>=0); 3061 assert(mb_x<b_stride); 3062 3063 if(intra){ 3064 block->color[0] = p[0]; 3065 block->color[1] = p[1]; 3066 block->color[2] = p[2]; 3067 block->type |= BLOCK_INTRA; 3068 }else{ 3069 index= (p[0] + 31*p[1]) & (ME_CACHE_SIZE-1); 3070 value= s->me_cache_generation + (p[0]>>10) + (p[1]<<6) + (block->ref<<12); 3071 if(s->me_cache[index] == value) 3072 return 0; 3073 s->me_cache[index]= value; 3074 3075 block->mx= p[0]; 3076 block->my= p[1]; 3077 block->type &= ~BLOCK_INTRA; 3078 } 3079 3080 rd= get_block_rd(s, mb_x, mb_y, 0, obmc_edged); 3081 3082//FIXME chroma 3083 if(rd < *best_rd){ 3084 *best_rd= rd; 3085 return 1; 3086 }else{ 3087 *block= backup; 3088 return 0; 3089 } 3090} 3091 3092/* special case for int[2] args we discard afterwards, 3093 * fixes compilation problem with gcc 2.95 */ 3094static av_always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, const uint8_t *obmc_edged, int *best_rd){ 3095 int p[2] = {p0, p1}; 3096 return check_block(s, mb_x, mb_y, p, 0, obmc_edged, best_rd); 3097} 3098 3099static av_always_inline int check_4block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int ref, int *best_rd){ 3100 const int b_stride= s->b_width << s->block_max_depth; 3101 BlockNode *block= &s->block[mb_x + mb_y * b_stride]; 3102 BlockNode backup[4]= {block[0], block[1], block[b_stride], block[b_stride+1]}; 3103 int rd, index, value; 3104 3105 assert(mb_x>=0 && mb_y>=0); 3106 assert(mb_x<b_stride); 3107 assert(((mb_x|mb_y)&1) == 0); 3108 3109 index= (p0 + 31*p1) & (ME_CACHE_SIZE-1); 3110 value= s->me_cache_generation + (p0>>10) + (p1<<6) + (block->ref<<12); 3111 if(s->me_cache[index] == value) 3112 return 0; 3113 s->me_cache[index]= value; 3114 3115 block->mx= p0; 3116 block->my= p1; 3117 block->ref= ref; 3118 block->type &= ~BLOCK_INTRA; 3119 block[1]= block[b_stride]= block[b_stride+1]= *block; 3120 3121 rd= get_4block_rd(s, mb_x, mb_y, 0); 3122 3123//FIXME chroma 3124 if(rd < *best_rd){ 3125 *best_rd= rd; 3126 return 1; 3127 }else{ 3128 block[0]= backup[0]; 3129 block[1]= backup[1]; 3130 block[b_stride]= backup[2]; 3131 block[b_stride+1]= backup[3]; 3132 return 0; 3133 } 3134} 3135 3136static void iterative_me(SnowContext *s){ 3137 int pass, mb_x, mb_y; 3138 const int b_width = s->b_width << s->block_max_depth; 3139 const int b_height= s->b_height << s->block_max_depth; 3140 const int b_stride= b_width; 3141 int color[3]; 3142 3143 { 3144 RangeCoder r = s->c; 3145 uint8_t state[sizeof(s->block_state)]; 3146 memcpy(state, s->block_state, sizeof(s->block_state)); 3147 for(mb_y= 0; mb_y<s->b_height; mb_y++) 3148 for(mb_x= 0; mb_x<s->b_width; mb_x++) 3149 encode_q_branch(s, 0, mb_x, mb_y); 3150 s->c = r; 3151 memcpy(s->block_state, state, sizeof(s->block_state)); 3152 } 3153 3154 for(pass=0; pass<25; pass++){ 3155 int change= 0; 3156 3157 for(mb_y= 0; mb_y<b_height; mb_y++){ 3158 for(mb_x= 0; mb_x<b_width; mb_x++){ 3159 int dia_change, i, j, ref; 3160 int best_rd= INT_MAX, ref_rd; 3161 BlockNode backup, ref_b; 3162 const int index= mb_x + mb_y * b_stride; 3163 BlockNode *block= &s->block[index]; 3164 BlockNode *tb = mb_y ? &s->block[index-b_stride ] : NULL; 3165 BlockNode *lb = mb_x ? &s->block[index -1] : NULL; 3166 BlockNode *rb = mb_x+1<b_width ? &s->block[index +1] : NULL; 3167 BlockNode *bb = mb_y+1<b_height ? &s->block[index+b_stride ] : NULL; 3168 BlockNode *tlb= mb_x && mb_y ? &s->block[index-b_stride-1] : NULL; 3169 BlockNode *trb= mb_x+1<b_width && mb_y ? &s->block[index-b_stride+1] : NULL; 3170 BlockNode *blb= mb_x && mb_y+1<b_height ? &s->block[index+b_stride-1] : NULL; 3171 BlockNode *brb= mb_x+1<b_width && mb_y+1<b_height ? &s->block[index+b_stride+1] : NULL; 3172 const int b_w= (MB_SIZE >> s->block_max_depth); 3173 uint8_t obmc_edged[b_w*2][b_w*2]; 3174 3175 if(pass && (block->type & BLOCK_OPT)) 3176 continue; 3177 block->type |= BLOCK_OPT; 3178 3179 backup= *block; 3180 3181 if(!s->me_cache_generation) 3182 memset(s->me_cache, 0, sizeof(s->me_cache)); 3183 s->me_cache_generation += 1<<22; 3184 3185 //FIXME precalculate 3186 { 3187 int x, y; 3188 memcpy(obmc_edged, obmc_tab[s->block_max_depth], b_w*b_w*4); 3189 if(mb_x==0) 3190 for(y=0; y<b_w*2; y++) 3191 memset(obmc_edged[y], obmc_edged[y][0] + obmc_edged[y][b_w-1], b_w); 3192 if(mb_x==b_stride-1) 3193 for(y=0; y<b_w*2; y++) 3194 memset(obmc_edged[y]+b_w, obmc_edged[y][b_w] + obmc_edged[y][b_w*2-1], b_w); 3195 if(mb_y==0){ 3196 for(x=0; x<b_w*2; x++) 3197 obmc_edged[0][x] += obmc_edged[b_w-1][x]; 3198 for(y=1; y<b_w; y++) 3199 memcpy(obmc_edged[y], obmc_edged[0], b_w*2); 3200 } 3201 if(mb_y==b_height-1){ 3202 for(x=0; x<b_w*2; x++) 3203 obmc_edged[b_w*2-1][x] += obmc_edged[b_w][x]; 3204 for(y=b_w; y<b_w*2-1; y++) 3205 memcpy(obmc_edged[y], obmc_edged[b_w*2-1], b_w*2); 3206 } 3207 } 3208 3209 //skip stuff outside the picture 3210 if(mb_x==0 || mb_y==0 || mb_x==b_width-1 || mb_y==b_height-1){ 3211 uint8_t *src= s-> input_picture.data[0]; 3212 uint8_t *dst= s->current_picture.data[0]; 3213 const int stride= s->current_picture.linesize[0]; 3214 const int block_w= MB_SIZE >> s->block_max_depth; 3215 const int sx= block_w*mb_x - block_w/2; 3216 const int sy= block_w*mb_y - block_w/2; 3217 const int w= s->plane[0].width; 3218 const int h= s->plane[0].height; 3219 int y; 3220 3221 for(y=sy; y<0; y++) 3222 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2); 3223 for(y=h; y<sy+block_w*2; y++) 3224 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2); 3225 if(sx<0){ 3226 for(y=sy; y<sy+block_w*2; y++) 3227 memcpy(dst + sx + y*stride, src + sx + y*stride, -sx); 3228 } 3229 if(sx+block_w*2 > w){ 3230 for(y=sy; y<sy+block_w*2; y++) 3231 memcpy(dst + w + y*stride, src + w + y*stride, sx+block_w*2 - w); 3232 } 3233 } 3234 3235 // intra(black) = neighbors' contribution to the current block 3236 for(i=0; i<3; i++) 3237 color[i]= get_dc(s, mb_x, mb_y, i); 3238 3239 // get previous score (cannot be cached due to OBMC) 3240 if(pass > 0 && (block->type&BLOCK_INTRA)){ 3241 int color0[3]= {block->color[0], block->color[1], block->color[2]}; 3242 check_block(s, mb_x, mb_y, color0, 1, *obmc_edged, &best_rd); 3243 }else 3244 check_block_inter(s, mb_x, mb_y, block->mx, block->my, *obmc_edged, &best_rd); 3245 3246 ref_b= *block; 3247 ref_rd= best_rd; 3248 for(ref=0; ref < s->ref_frames; ref++){ 3249 int16_t (*mvr)[2]= &s->ref_mvs[ref][index]; 3250 if(s->ref_scores[ref][index] > s->ref_scores[ref_b.ref][index]*3/2) //FIXME tune threshold 3251 continue; 3252 block->ref= ref; 3253 best_rd= INT_MAX; 3254 3255 check_block_inter(s, mb_x, mb_y, mvr[0][0], mvr[0][1], *obmc_edged, &best_rd); 3256 check_block_inter(s, mb_x, mb_y, 0, 0, *obmc_edged, &best_rd); 3257 if(tb) 3258 check_block_inter(s, mb_x, mb_y, mvr[-b_stride][0], mvr[-b_stride][1], *obmc_edged, &best_rd); 3259 if(lb) 3260 check_block_inter(s, mb_x, mb_y, mvr[-1][0], mvr[-1][1], *obmc_edged, &best_rd); 3261 if(rb) 3262 check_block_inter(s, mb_x, mb_y, mvr[1][0], mvr[1][1], *obmc_edged, &best_rd); 3263 if(bb) 3264 check_block_inter(s, mb_x, mb_y, mvr[b_stride][0], mvr[b_stride][1], *obmc_edged, &best_rd); 3265 3266 /* fullpel ME */ 3267 //FIXME avoid subpel interpolation / round to nearest integer 3268 do{ 3269 dia_change=0; 3270 for(i=0; i<FFMAX(s->avctx->dia_size, 1); i++){ 3271 for(j=0; j<i; j++){ 3272 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my+(4*j), *obmc_edged, &best_rd); 3273 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my-(4*j), *obmc_edged, &best_rd); 3274 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my-(4*j), *obmc_edged, &best_rd); 3275 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my+(4*j), *obmc_edged, &best_rd); 3276 } 3277 } 3278 }while(dia_change); 3279 /* subpel ME */ 3280 do{ 3281 static const int square[8][2]= {{+1, 0},{-1, 0},{ 0,+1},{ 0,-1},{+1,+1},{-1,-1},{+1,-1},{-1,+1},}; 3282 dia_change=0; 3283 for(i=0; i<8; i++) 3284 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+square[i][0], block->my+square[i][1], *obmc_edged, &best_rd); 3285 }while(dia_change); 3286 //FIXME or try the standard 2 pass qpel or similar 3287 3288 mvr[0][0]= block->mx; 3289 mvr[0][1]= block->my; 3290 if(ref_rd > best_rd){ 3291 ref_rd= best_rd; 3292 ref_b= *block; 3293 } 3294 } 3295 best_rd= ref_rd; 3296 *block= ref_b; 3297#if 1 3298 check_block(s, mb_x, mb_y, color, 1, *obmc_edged, &best_rd); 3299 //FIXME RD style color selection 3300#endif 3301 if(!same_block(block, &backup)){ 3302 if(tb ) tb ->type &= ~BLOCK_OPT; 3303 if(lb ) lb ->type &= ~BLOCK_OPT; 3304 if(rb ) rb ->type &= ~BLOCK_OPT; 3305 if(bb ) bb ->type &= ~BLOCK_OPT; 3306 if(tlb) tlb->type &= ~BLOCK_OPT; 3307 if(trb) trb->type &= ~BLOCK_OPT; 3308 if(blb) blb->type &= ~BLOCK_OPT; 3309 if(brb) brb->type &= ~BLOCK_OPT; 3310 change ++; 3311 } 3312 } 3313 } 3314 av_log(s->avctx, AV_LOG_ERROR, "pass:%d changed:%d\n", pass, change); 3315 if(!change) 3316 break; 3317 } 3318 3319 if(s->block_max_depth == 1){ 3320 int change= 0; 3321 for(mb_y= 0; mb_y<b_height; mb_y+=2){ 3322 for(mb_x= 0; mb_x<b_width; mb_x+=2){ 3323 int i; 3324 int best_rd, init_rd; 3325 const int index= mb_x + mb_y * b_stride; 3326 BlockNode *b[4]; 3327 3328 b[0]= &s->block[index]; 3329 b[1]= b[0]+1; 3330 b[2]= b[0]+b_stride; 3331 b[3]= b[2]+1; 3332 if(same_block(b[0], b[1]) && 3333 same_block(b[0], b[2]) && 3334 same_block(b[0], b[3])) 3335 continue; 3336 3337 if(!s->me_cache_generation) 3338 memset(s->me_cache, 0, sizeof(s->me_cache)); 3339 s->me_cache_generation += 1<<22; 3340 3341 init_rd= best_rd= get_4block_rd(s, mb_x, mb_y, 0); 3342 3343 //FIXME more multiref search? 3344 check_4block_inter(s, mb_x, mb_y, 3345 (b[0]->mx + b[1]->mx + b[2]->mx + b[3]->mx + 2) >> 2, 3346 (b[0]->my + b[1]->my + b[2]->my + b[3]->my + 2) >> 2, 0, &best_rd); 3347 3348 for(i=0; i<4; i++) 3349 if(!(b[i]->type&BLOCK_INTRA)) 3350 check_4block_inter(s, mb_x, mb_y, b[i]->mx, b[i]->my, b[i]->ref, &best_rd); 3351 3352 if(init_rd != best_rd) 3353 change++; 3354 } 3355 } 3356 av_log(s->avctx, AV_LOG_ERROR, "pass:4mv changed:%d\n", change*4); 3357 } 3358} 3359 3360static void encode_blocks(SnowContext *s, int search){ 3361 int x, y; 3362 int w= s->b_width; 3363 int h= s->b_height; 3364 3365 if(s->avctx->me_method == ME_ITER && !s->keyframe && search) 3366 iterative_me(s); 3367 3368 for(y=0; y<h; y++){ 3369 if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit 3370 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n"); 3371 return; 3372 } 3373 for(x=0; x<w; x++){ 3374 if(s->avctx->me_method == ME_ITER || !search) 3375 encode_q_branch2(s, 0, x, y); 3376 else 3377 encode_q_branch (s, 0, x, y); 3378 } 3379 } 3380} 3381 3382static void quantize(SnowContext *s, SubBand *b, IDWTELEM *dst, DWTELEM *src, int stride, int bias){ 3383 const int w= b->width; 3384 const int h= b->height; 3385 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16); 3386 const int qmul= qexp[qlog&(QROOT-1)]<<((qlog>>QSHIFT) + ENCODER_EXTRA_BITS); 3387 int x,y, thres1, thres2; 3388 3389 if(s->qlog == LOSSLESS_QLOG){ 3390 for(y=0; y<h; y++) 3391 for(x=0; x<w; x++) 3392 dst[x + y*stride]= src[x + y*stride]; 3393 return; 3394 } 3395 3396 bias= bias ? 0 : (3*qmul)>>3; 3397 thres1= ((qmul - bias)>>QEXPSHIFT) - 1; 3398 thres2= 2*thres1; 3399 3400 if(!bias){ 3401 for(y=0; y<h; y++){ 3402 for(x=0; x<w; x++){ 3403 int i= src[x + y*stride]; 3404 3405 if((unsigned)(i+thres1) > thres2){ 3406 if(i>=0){ 3407 i<<= QEXPSHIFT; 3408 i/= qmul; //FIXME optimize 3409 dst[x + y*stride]= i; 3410 }else{ 3411 i= -i; 3412 i<<= QEXPSHIFT; 3413 i/= qmul; //FIXME optimize 3414 dst[x + y*stride]= -i; 3415 } 3416 }else 3417 dst[x + y*stride]= 0; 3418 } 3419 } 3420 }else{ 3421 for(y=0; y<h; y++){ 3422 for(x=0; x<w; x++){ 3423 int i= src[x + y*stride]; 3424 3425 if((unsigned)(i+thres1) > thres2){ 3426 if(i>=0){ 3427 i<<= QEXPSHIFT; 3428 i= (i + bias) / qmul; //FIXME optimize 3429 dst[x + y*stride]= i; 3430 }else{ 3431 i= -i; 3432 i<<= QEXPSHIFT; 3433 i= (i + bias) / qmul; //FIXME optimize 3434 dst[x + y*stride]= -i; 3435 } 3436 }else 3437 dst[x + y*stride]= 0; 3438 } 3439 } 3440 } 3441} 3442 3443static void dequantize(SnowContext *s, SubBand *b, IDWTELEM *src, int stride){ 3444 const int w= b->width; 3445 const int h= b->height; 3446 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16); 3447 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT); 3448 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT; 3449 int x,y; 3450 3451 if(s->qlog == LOSSLESS_QLOG) return; 3452 3453 for(y=0; y<h; y++){ 3454 for(x=0; x<w; x++){ 3455 int i= src[x + y*stride]; 3456 if(i<0){ 3457 src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias 3458 }else if(i>0){ 3459 src[x + y*stride]= (( i*qmul + qadd)>>(QEXPSHIFT)); 3460 } 3461 } 3462 } 3463} 3464 3465static void decorrelate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){ 3466 const int w= b->width; 3467 const int h= b->height; 3468 int x,y; 3469 3470 for(y=h-1; y>=0; y--){ 3471 for(x=w-1; x>=0; x--){ 3472 int i= x + y*stride; 3473 3474 if(x){ 3475 if(use_median){ 3476 if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]); 3477 else src[i] -= src[i - 1]; 3478 }else{ 3479 if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]); 3480 else src[i] -= src[i - 1]; 3481 } 3482 }else{ 3483 if(y) src[i] -= src[i - stride]; 3484 } 3485 } 3486 } 3487} 3488 3489static void correlate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){ 3490 const int w= b->width; 3491 const int h= b->height; 3492 int x,y; 3493 3494 for(y=0; y<h; y++){ 3495 for(x=0; x<w; x++){ 3496 int i= x + y*stride; 3497 3498 if(x){ 3499 if(use_median){ 3500 if(y && x+1<w) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]); 3501 else src[i] += src[i - 1]; 3502 }else{ 3503 if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]); 3504 else src[i] += src[i - 1]; 3505 } 3506 }else{ 3507 if(y) src[i] += src[i - stride]; 3508 } 3509 } 3510 } 3511} 3512 3513static void encode_qlogs(SnowContext *s){ 3514 int plane_index, level, orientation; 3515 3516 for(plane_index=0; plane_index<2; plane_index++){ 3517 for(level=0; level<s->spatial_decomposition_count; level++){ 3518 for(orientation=level ? 1:0; orientation<4; orientation++){ 3519 if(orientation==2) continue; 3520 put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1); 3521 } 3522 } 3523 } 3524} 3525 3526static void encode_header(SnowContext *s){ 3527 int plane_index, i; 3528 uint8_t kstate[32]; 3529 3530 memset(kstate, MID_STATE, sizeof(kstate)); 3531 3532 put_rac(&s->c, kstate, s->keyframe); 3533 if(s->keyframe || s->always_reset){ 3534 reset_contexts(s); 3535 s->last_spatial_decomposition_type= 3536 s->last_qlog= 3537 s->last_qbias= 3538 s->last_mv_scale= 3539 s->last_block_max_depth= 0; 3540 for(plane_index=0; plane_index<2; plane_index++){ 3541 Plane *p= &s->plane[plane_index]; 3542 p->last_htaps=0; 3543 p->last_diag_mc=0; 3544 memset(p->last_hcoeff, 0, sizeof(p->last_hcoeff)); 3545 } 3546 } 3547 if(s->keyframe){ 3548 put_symbol(&s->c, s->header_state, s->version, 0); 3549 put_rac(&s->c, s->header_state, s->always_reset); 3550 put_symbol(&s->c, s->header_state, s->temporal_decomposition_type, 0); 3551 put_symbol(&s->c, s->header_state, s->temporal_decomposition_count, 0); 3552 put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0); 3553 put_symbol(&s->c, s->header_state, s->colorspace_type, 0); 3554 put_symbol(&s->c, s->header_state, s->chroma_h_shift, 0); 3555 put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0); 3556 put_rac(&s->c, s->header_state, s->spatial_scalability); 3557// put_rac(&s->c, s->header_state, s->rate_scalability); 3558 put_symbol(&s->c, s->header_state, s->max_ref_frames-1, 0); 3559 3560 encode_qlogs(s); 3561 } 3562 3563 if(!s->keyframe){ 3564 int update_mc=0; 3565 for(plane_index=0; plane_index<2; plane_index++){ 3566 Plane *p= &s->plane[plane_index]; 3567 update_mc |= p->last_htaps != p->htaps; 3568 update_mc |= p->last_diag_mc != p->diag_mc; 3569 update_mc |= !!memcmp(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff)); 3570 } 3571 put_rac(&s->c, s->header_state, update_mc); 3572 if(update_mc){ 3573 for(plane_index=0; plane_index<2; plane_index++){ 3574 Plane *p= &s->plane[plane_index]; 3575 put_rac(&s->c, s->header_state, p->diag_mc); 3576 put_symbol(&s->c, s->header_state, p->htaps/2-1, 0); 3577 for(i= p->htaps/2; i; i--) 3578 put_symbol(&s->c, s->header_state, FFABS(p->hcoeff[i]), 0); 3579 } 3580 } 3581 if(s->last_spatial_decomposition_count != s->spatial_decomposition_count){ 3582 put_rac(&s->c, s->header_state, 1); 3583 put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0); 3584 encode_qlogs(s); 3585 }else 3586 put_rac(&s->c, s->header_state, 0); 3587 } 3588 3589 put_symbol(&s->c, s->header_state, s->spatial_decomposition_type - s->last_spatial_decomposition_type, 1); 3590 put_symbol(&s->c, s->header_state, s->qlog - s->last_qlog , 1); 3591 put_symbol(&s->c, s->header_state, s->mv_scale - s->last_mv_scale, 1); 3592 put_symbol(&s->c, s->header_state, s->qbias - s->last_qbias , 1); 3593 put_symbol(&s->c, s->header_state, s->block_max_depth - s->last_block_max_depth, 1); 3594 3595} 3596 3597static void update_last_header_values(SnowContext *s){ 3598 int plane_index; 3599 3600 if(!s->keyframe){ 3601 for(plane_index=0; plane_index<2; plane_index++){ 3602 Plane *p= &s->plane[plane_index]; 3603 p->last_diag_mc= p->diag_mc; 3604 p->last_htaps = p->htaps; 3605 memcpy(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff)); 3606 } 3607 } 3608 3609 s->last_spatial_decomposition_type = s->spatial_decomposition_type; 3610 s->last_qlog = s->qlog; 3611 s->last_qbias = s->qbias; 3612 s->last_mv_scale = s->mv_scale; 3613 s->last_block_max_depth = s->block_max_depth; 3614 s->last_spatial_decomposition_count = s->spatial_decomposition_count; 3615} 3616 3617static int qscale2qlog(int qscale){ 3618 return rint(QROOT*log(qscale / (float)FF_QP2LAMBDA)/log(2)) 3619 + 61*QROOT/8; //<64 >60 3620} 3621 3622static int ratecontrol_1pass(SnowContext *s, AVFrame *pict) 3623{ 3624 /* Estimate the frame's complexity as a sum of weighted dwt coefficients. 3625 * FIXME we know exact mv bits at this point, 3626 * but ratecontrol isn't set up to include them. */ 3627 uint32_t coef_sum= 0; 3628 int level, orientation, delta_qlog; 3629 3630 for(level=0; level<s->spatial_decomposition_count; level++){ 3631 for(orientation=level ? 1 : 0; orientation<4; orientation++){ 3632 SubBand *b= &s->plane[0].band[level][orientation]; 3633 IDWTELEM *buf= b->ibuf; 3634 const int w= b->width; 3635 const int h= b->height; 3636 const int stride= b->stride; 3637 const int qlog= av_clip(2*QROOT + b->qlog, 0, QROOT*16); 3638 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT); 3639 const int qdiv= (1<<16)/qmul; 3640 int x, y; 3641 //FIXME this is ugly 3642 for(y=0; y<h; y++) 3643 for(x=0; x<w; x++) 3644 buf[x+y*stride]= b->buf[x+y*stride]; 3645 if(orientation==0) 3646 decorrelate(s, b, buf, stride, 1, 0); 3647 for(y=0; y<h; y++) 3648 for(x=0; x<w; x++) 3649 coef_sum+= abs(buf[x+y*stride]) * qdiv >> 16; 3650 } 3651 } 3652 3653 /* ugly, ratecontrol just takes a sqrt again */ 3654 coef_sum = (uint64_t)coef_sum * coef_sum >> 16; 3655 assert(coef_sum < INT_MAX); 3656 3657 if(pict->pict_type == FF_I_TYPE){ 3658 s->m.current_picture.mb_var_sum= coef_sum; 3659 s->m.current_picture.mc_mb_var_sum= 0; 3660 }else{ 3661 s->m.current_picture.mc_mb_var_sum= coef_sum; 3662 s->m.current_picture.mb_var_sum= 0; 3663 } 3664 3665 pict->quality= ff_rate_estimate_qscale(&s->m, 1); 3666 if (pict->quality < 0) 3667 return INT_MIN; 3668 s->lambda= pict->quality * 3/2; 3669 delta_qlog= qscale2qlog(pict->quality) - s->qlog; 3670 s->qlog+= delta_qlog; 3671 return delta_qlog; 3672} 3673 3674static void calculate_visual_weight(SnowContext *s, Plane *p){ 3675 int width = p->width; 3676 int height= p->height; 3677 int level, orientation, x, y; 3678 3679 for(level=0; level<s->spatial_decomposition_count; level++){ 3680 for(orientation=level ? 1 : 0; orientation<4; orientation++){ 3681 SubBand *b= &p->band[level][orientation]; 3682 IDWTELEM *ibuf= b->ibuf; 3683 int64_t error=0; 3684 3685 memset(s->spatial_idwt_buffer, 0, sizeof(*s->spatial_idwt_buffer)*width*height); 3686 ibuf[b->width/2 + b->height/2*b->stride]= 256*16; 3687 ff_spatial_idwt(s->spatial_idwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count); 3688 for(y=0; y<height; y++){ 3689 for(x=0; x<width; x++){ 3690 int64_t d= s->spatial_idwt_buffer[x + y*width]*16; 3691 error += d*d; 3692 } 3693 } 3694 3695 b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5); 3696 } 3697 } 3698} 3699 3700static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){ 3701 SnowContext *s = avctx->priv_data; 3702 RangeCoder * const c= &s->c; 3703 AVFrame *pict = data; 3704 const int width= s->avctx->width; 3705 const int height= s->avctx->height; 3706 int level, orientation, plane_index, i, y; 3707 uint8_t rc_header_bak[sizeof(s->header_state)]; 3708 uint8_t rc_block_bak[sizeof(s->block_state)]; 3709 3710 ff_init_range_encoder(c, buf, buf_size); 3711 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8); 3712 3713 for(i=0; i<3; i++){ 3714 int shift= !!i; 3715 for(y=0; y<(height>>shift); y++) 3716 memcpy(&s->input_picture.data[i][y * s->input_picture.linesize[i]], 3717 &pict->data[i][y * pict->linesize[i]], 3718 width>>shift); 3719 } 3720 s->new_picture = *pict; 3721 3722 s->m.picture_number= avctx->frame_number; 3723 if(avctx->flags&CODEC_FLAG_PASS2){ 3724 s->m.pict_type = 3725 pict->pict_type= s->m.rc_context.entry[avctx->frame_number].new_pict_type; 3726 s->keyframe= pict->pict_type==FF_I_TYPE; 3727 if(!(avctx->flags&CODEC_FLAG_QSCALE)) { 3728 pict->quality= ff_rate_estimate_qscale(&s->m, 0); 3729 if (pict->quality < 0) 3730 return -1; 3731 } 3732 }else{ 3733 s->keyframe= avctx->gop_size==0 || avctx->frame_number % avctx->gop_size == 0; 3734 s->m.pict_type= 3735 pict->pict_type= s->keyframe ? FF_I_TYPE : FF_P_TYPE; 3736 } 3737 3738 if(s->pass1_rc && avctx->frame_number == 0) 3739 pict->quality= 2*FF_QP2LAMBDA; 3740 if(pict->quality){ 3741 s->qlog= qscale2qlog(pict->quality); 3742 s->lambda = pict->quality * 3/2; 3743 } 3744 if(s->qlog < 0 || (!pict->quality && (avctx->flags & CODEC_FLAG_QSCALE))){ 3745 s->qlog= LOSSLESS_QLOG; 3746 s->lambda = 0; 3747 }//else keep previous frame's qlog until after motion estimation 3748 3749 frame_start(s); 3750 3751 s->m.current_picture_ptr= &s->m.current_picture; 3752 s->m.last_picture.pts= s->m.current_picture.pts; 3753 s->m.current_picture.pts= pict->pts; 3754 if(pict->pict_type == FF_P_TYPE){ 3755 int block_width = (width +15)>>4; 3756 int block_height= (height+15)>>4; 3757 int stride= s->current_picture.linesize[0]; 3758 3759 assert(s->current_picture.data[0]); 3760 assert(s->last_picture[0].data[0]); 3761 3762 s->m.avctx= s->avctx; 3763 s->m.current_picture.data[0]= s->current_picture.data[0]; 3764 s->m. last_picture.data[0]= s->last_picture[0].data[0]; 3765 s->m. new_picture.data[0]= s-> input_picture.data[0]; 3766 s->m. last_picture_ptr= &s->m. last_picture; 3767 s->m.linesize= 3768 s->m. last_picture.linesize[0]= 3769 s->m. new_picture.linesize[0]= 3770 s->m.current_picture.linesize[0]= stride; 3771 s->m.uvlinesize= s->current_picture.linesize[1]; 3772 s->m.width = width; 3773 s->m.height= height; 3774 s->m.mb_width = block_width; 3775 s->m.mb_height= block_height; 3776 s->m.mb_stride= s->m.mb_width+1; 3777 s->m.b8_stride= 2*s->m.mb_width+1; 3778 s->m.f_code=1; 3779 s->m.pict_type= pict->pict_type; 3780 s->m.me_method= s->avctx->me_method; 3781 s->m.me.scene_change_score=0; 3782 s->m.flags= s->avctx->flags; 3783 s->m.quarter_sample= (s->avctx->flags & CODEC_FLAG_QPEL)!=0; 3784 s->m.out_format= FMT_H263; 3785 s->m.unrestricted_mv= 1; 3786 3787 s->m.lambda = s->lambda; 3788 s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7); 3789 s->lambda2= s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT; 3790 3791 s->m.dsp= s->dsp; //move 3792 ff_init_me(&s->m); 3793 s->dsp= s->m.dsp; 3794 } 3795 3796 if(s->pass1_rc){ 3797 memcpy(rc_header_bak, s->header_state, sizeof(s->header_state)); 3798 memcpy(rc_block_bak, s->block_state, sizeof(s->block_state)); 3799 } 3800 3801redo_frame: 3802 3803 if(pict->pict_type == FF_I_TYPE) 3804 s->spatial_decomposition_count= 5; 3805 else 3806 s->spatial_decomposition_count= 5; 3807 3808 s->m.pict_type = pict->pict_type; 3809 s->qbias= pict->pict_type == FF_P_TYPE ? 2 : 0; 3810 3811 common_init_after_header(avctx); 3812 3813 if(s->last_spatial_decomposition_count != s->spatial_decomposition_count){ 3814 for(plane_index=0; plane_index<3; plane_index++){ 3815 calculate_visual_weight(s, &s->plane[plane_index]); 3816 } 3817 } 3818 3819 encode_header(s); 3820 s->m.misc_bits = 8*(s->c.bytestream - s->c.bytestream_start); 3821 encode_blocks(s, 1); 3822 s->m.mv_bits = 8*(s->c.bytestream - s->c.bytestream_start) - s->m.misc_bits; 3823 3824 for(plane_index=0; plane_index<3; plane_index++){ 3825 Plane *p= &s->plane[plane_index]; 3826 int w= p->width; 3827 int h= p->height; 3828 int x, y; 3829// int bits= put_bits_count(&s->c.pb); 3830 3831 if(!(avctx->flags2 & CODEC_FLAG2_MEMC_ONLY)){ 3832 //FIXME optimize 3833 if(pict->data[plane_index]) //FIXME gray hack 3834 for(y=0; y<h; y++){ 3835 for(x=0; x<w; x++){ 3836 s->spatial_idwt_buffer[y*w + x]= pict->data[plane_index][y*pict->linesize[plane_index] + x]<<FRAC_BITS; 3837 } 3838 } 3839 predict_plane(s, s->spatial_idwt_buffer, plane_index, 0); 3840 3841 if( plane_index==0 3842 && pict->pict_type == FF_P_TYPE 3843 && !(avctx->flags&CODEC_FLAG_PASS2) 3844 && s->m.me.scene_change_score > s->avctx->scenechange_threshold){ 3845 ff_init_range_encoder(c, buf, buf_size); 3846 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8); 3847 pict->pict_type= FF_I_TYPE; 3848 s->keyframe=1; 3849 s->current_picture.key_frame=1; 3850 goto redo_frame; 3851 } 3852 3853 if(s->qlog == LOSSLESS_QLOG){ 3854 for(y=0; y<h; y++){ 3855 for(x=0; x<w; x++){ 3856 s->spatial_dwt_buffer[y*w + x]= (s->spatial_idwt_buffer[y*w + x] + (1<<(FRAC_BITS-1))-1)>>FRAC_BITS; 3857 } 3858 } 3859 }else{ 3860 for(y=0; y<h; y++){ 3861 for(x=0; x<w; x++){ 3862 s->spatial_dwt_buffer[y*w + x]=s->spatial_idwt_buffer[y*w + x]<<ENCODER_EXTRA_BITS; 3863 } 3864 } 3865 } 3866 3867 /* if(QUANTIZE2) 3868 dwt_quantize(s, p, s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type); 3869 else*/ 3870 ff_spatial_dwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count); 3871 3872 if(s->pass1_rc && plane_index==0){ 3873 int delta_qlog = ratecontrol_1pass(s, pict); 3874 if (delta_qlog <= INT_MIN) 3875 return -1; 3876 if(delta_qlog){ 3877 //reordering qlog in the bitstream would eliminate this reset 3878 ff_init_range_encoder(c, buf, buf_size); 3879 memcpy(s->header_state, rc_header_bak, sizeof(s->header_state)); 3880 memcpy(s->block_state, rc_block_bak, sizeof(s->block_state)); 3881 encode_header(s); 3882 encode_blocks(s, 0); 3883 } 3884 } 3885 3886 for(level=0; level<s->spatial_decomposition_count; level++){ 3887 for(orientation=level ? 1 : 0; orientation<4; orientation++){ 3888 SubBand *b= &p->band[level][orientation]; 3889 3890 if(!QUANTIZE2) 3891 quantize(s, b, b->ibuf, b->buf, b->stride, s->qbias); 3892 if(orientation==0) 3893 decorrelate(s, b, b->ibuf, b->stride, pict->pict_type == FF_P_TYPE, 0); 3894 encode_subband(s, b, b->ibuf, b->parent ? b->parent->ibuf : NULL, b->stride, orientation); 3895 assert(b->parent==NULL || b->parent->stride == b->stride*2); 3896 if(orientation==0) 3897 correlate(s, b, b->ibuf, b->stride, 1, 0); 3898 } 3899 } 3900 3901 for(level=0; level<s->spatial_decomposition_count; level++){ 3902 for(orientation=level ? 1 : 0; orientation<4; orientation++){ 3903 SubBand *b= &p->band[level][orientation]; 3904 3905 dequantize(s, b, b->ibuf, b->stride); 3906 } 3907 } 3908 3909 ff_spatial_idwt(s->spatial_idwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count); 3910 if(s->qlog == LOSSLESS_QLOG){ 3911 for(y=0; y<h; y++){ 3912 for(x=0; x<w; x++){ 3913 s->spatial_idwt_buffer[y*w + x]<<=FRAC_BITS; 3914 } 3915 } 3916 } 3917 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1); 3918 }else{ 3919 //ME/MC only 3920 if(pict->pict_type == FF_I_TYPE){ 3921 for(y=0; y<h; y++){ 3922 for(x=0; x<w; x++){ 3923 s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x]= 3924 pict->data[plane_index][y*pict->linesize[plane_index] + x]; 3925 } 3926 } 3927 }else{ 3928 memset(s->spatial_idwt_buffer, 0, sizeof(IDWTELEM)*w*h); 3929 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1); 3930 } 3931 } 3932 if(s->avctx->flags&CODEC_FLAG_PSNR){ 3933 int64_t error= 0; 3934 3935 if(pict->data[plane_index]) //FIXME gray hack 3936 for(y=0; y<h; y++){ 3937 for(x=0; x<w; x++){ 3938 int d= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x] - pict->data[plane_index][y*pict->linesize[plane_index] + x]; 3939 error += d*d; 3940 } 3941 } 3942 s->avctx->error[plane_index] += error; 3943 s->current_picture.error[plane_index] = error; 3944 } 3945 3946 } 3947 3948 update_last_header_values(s); 3949 3950 release_buffer(avctx); 3951 3952 s->current_picture.coded_picture_number = avctx->frame_number; 3953 s->current_picture.pict_type = pict->pict_type; 3954 s->current_picture.quality = pict->quality; 3955 s->m.frame_bits = 8*(s->c.bytestream - s->c.bytestream_start); 3956 s->m.p_tex_bits = s->m.frame_bits - s->m.misc_bits - s->m.mv_bits; 3957 s->m.current_picture.display_picture_number = 3958 s->m.current_picture.coded_picture_number = avctx->frame_number; 3959 s->m.current_picture.quality = pict->quality; 3960 s->m.total_bits += 8*(s->c.bytestream - s->c.bytestream_start); 3961 if(s->pass1_rc) 3962 if (ff_rate_estimate_qscale(&s->m, 0) < 0) 3963 return -1; 3964 if(avctx->flags&CODEC_FLAG_PASS1) 3965 ff_write_pass1_stats(&s->m); 3966 s->m.last_pict_type = s->m.pict_type; 3967 avctx->frame_bits = s->m.frame_bits; 3968 avctx->mv_bits = s->m.mv_bits; 3969 avctx->misc_bits = s->m.misc_bits; 3970 avctx->p_tex_bits = s->m.p_tex_bits; 3971 3972 emms_c(); 3973 3974 return ff_rac_terminate(c); 3975} 3976 3977static av_cold int encode_end(AVCodecContext *avctx) 3978{ 3979 SnowContext *s = avctx->priv_data; 3980 3981 common_end(s); 3982 if (s->input_picture.data[0]) 3983 avctx->release_buffer(avctx, &s->input_picture); 3984 av_free(avctx->stats_out); 3985 3986 return 0; 3987} 3988 3989AVCodec snow_encoder = { 3990 "snow", 3991 AVMEDIA_TYPE_VIDEO, 3992 CODEC_ID_SNOW, 3993 sizeof(SnowContext), 3994 encode_init, 3995 encode_frame, 3996 encode_end, 3997 .long_name = NULL_IF_CONFIG_SMALL("Snow"), 3998}; 3999#endif 4000 4001 4002#ifdef TEST 4003#undef malloc 4004#undef free 4005#undef printf 4006 4007#include "libavutil/lfg.h" 4008 4009int main(void){ 4010 int width=256; 4011 int height=256; 4012 int buffer[2][width*height]; 4013 SnowContext s; 4014 int i; 4015 AVLFG prng; 4016 s.spatial_decomposition_count=6; 4017 s.spatial_decomposition_type=1; 4018 4019 av_lfg_init(&prng, 1); 4020 4021 printf("testing 5/3 DWT\n"); 4022 for(i=0; i<width*height; i++) 4023 buffer[0][i] = buffer[1][i] = av_lfg_get(&prng) % 54321 - 12345; 4024 4025 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count); 4026 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count); 4027 4028 for(i=0; i<width*height; i++) 4029 if(buffer[0][i]!= buffer[1][i]) printf("fsck: %6d %12d %7d\n",i, buffer[0][i], buffer[1][i]); 4030 4031 printf("testing 9/7 DWT\n"); 4032 s.spatial_decomposition_type=0; 4033 for(i=0; i<width*height; i++) 4034 buffer[0][i] = buffer[1][i] = av_lfg_get(&prng) % 54321 - 12345; 4035 4036 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count); 4037 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count); 4038 4039 for(i=0; i<width*height; i++) 4040 if(FFABS(buffer[0][i] - buffer[1][i])>20) printf("fsck: %6d %12d %7d\n",i, buffer[0][i], buffer[1][i]); 4041 4042#if 0 4043 printf("testing AC coder\n"); 4044 memset(s.header_state, 0, sizeof(s.header_state)); 4045 ff_init_range_encoder(&s.c, buffer[0], 256*256); 4046 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64); 4047 4048 for(i=-256; i<256; i++){ 4049 put_symbol(&s.c, s.header_state, i*i*i/3*FFABS(i), 1); 4050 } 4051 ff_rac_terminate(&s.c); 4052 4053 memset(s.header_state, 0, sizeof(s.header_state)); 4054 ff_init_range_decoder(&s.c, buffer[0], 256*256); 4055 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64); 4056 4057 for(i=-256; i<256; i++){ 4058 int j; 4059 j= get_symbol(&s.c, s.header_state, 1); 4060 if(j!=i*i*i/3*FFABS(i)) printf("fsck: %d != %d\n", i, j); 4061 } 4062#endif 4063 { 4064 int level, orientation, x, y; 4065 int64_t errors[8][4]; 4066 int64_t g=0; 4067 4068 memset(errors, 0, sizeof(errors)); 4069 s.spatial_decomposition_count=3; 4070 s.spatial_decomposition_type=0; 4071 for(level=0; level<s.spatial_decomposition_count; level++){ 4072 for(orientation=level ? 1 : 0; orientation<4; orientation++){ 4073 int w= width >> (s.spatial_decomposition_count-level); 4074 int h= height >> (s.spatial_decomposition_count-level); 4075 int stride= width << (s.spatial_decomposition_count-level); 4076 DWTELEM *buf= buffer[0]; 4077 int64_t error=0; 4078 4079 if(orientation&1) buf+=w; 4080 if(orientation>1) buf+=stride>>1; 4081 4082 memset(buffer[0], 0, sizeof(int)*width*height); 4083 buf[w/2 + h/2*stride]= 256*256; 4084 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count); 4085 for(y=0; y<height; y++){ 4086 for(x=0; x<width; x++){ 4087 int64_t d= buffer[0][x + y*width]; 4088 error += d*d; 4089 if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9 && level==2) printf("%8"PRId64" ", d); 4090 } 4091 if(FFABS(height/2-y)<9 && level==2) printf("\n"); 4092 } 4093 error= (int)(sqrt(error)+0.5); 4094 errors[level][orientation]= error; 4095 if(g) g=av_gcd(g, error); 4096 else g= error; 4097 } 4098 } 4099 printf("static int const visual_weight[][4]={\n"); 4100 for(level=0; level<s.spatial_decomposition_count; level++){ 4101 printf(" {"); 4102 for(orientation=0; orientation<4; orientation++){ 4103 printf("%8"PRId64",", errors[level][orientation]/g); 4104 } 4105 printf("},\n"); 4106 } 4107 printf("};\n"); 4108 { 4109 int level=2; 4110 int w= width >> (s.spatial_decomposition_count-level); 4111 //int h= height >> (s.spatial_decomposition_count-level); 4112 int stride= width << (s.spatial_decomposition_count-level); 4113 DWTELEM *buf= buffer[0]; 4114 int64_t error=0; 4115 4116 buf+=w; 4117 buf+=stride>>1; 4118 4119 memset(buffer[0], 0, sizeof(int)*width*height); 4120#if 1 4121 for(y=0; y<height; y++){ 4122 for(x=0; x<width; x++){ 4123 int tab[4]={0,2,3,1}; 4124 buffer[0][x+width*y]= 256*256*tab[(x&1) + 2*(y&1)]; 4125 } 4126 } 4127 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count); 4128#else 4129 for(y=0; y<h; y++){ 4130 for(x=0; x<w; x++){ 4131 buf[x + y*stride ]=169; 4132 buf[x + y*stride-w]=64; 4133 } 4134 } 4135 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count); 4136#endif 4137 for(y=0; y<height; y++){ 4138 for(x=0; x<width; x++){ 4139 int64_t d= buffer[0][x + y*width]; 4140 error += d*d; 4141 if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9) printf("%8"PRId64" ", d); 4142 } 4143 if(FFABS(height/2-y)<9) printf("\n"); 4144 } 4145 } 4146 4147 } 4148 return 0; 4149} 4150#endif /* TEST */ 4151