1/*
2 * Motion estimation
3 * Copyright (c) 2002-2004 Michael Niedermayer
4 *
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
22/**
23 * @file
24 * Motion estimation template.
25 */
26
27#include "mpegvideo.h"
28
29//Let us hope gcc will remove the unused vars ...(gcc 3.2.2 seems to do it ...)
30#define LOAD_COMMON\
31    uint32_t av_unused * const score_map= c->score_map;\
32    const int av_unused xmin= c->xmin;\
33    const int av_unused ymin= c->ymin;\
34    const int av_unused xmax= c->xmax;\
35    const int av_unused ymax= c->ymax;\
36    uint8_t *mv_penalty= c->current_mv_penalty;\
37    const int pred_x= c->pred_x;\
38    const int pred_y= c->pred_y;\
39
40#define CHECK_HALF_MV(dx, dy, x, y)\
41{\
42    const int hx= 2*(x)+(dx);\
43    const int hy= 2*(y)+(dy);\
44    d= cmp_hpel(s, x, y, dx, dy, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);\
45    d += (mv_penalty[hx - pred_x] + mv_penalty[hy - pred_y])*penalty_factor;\
46    COPY3_IF_LT(dmin, d, bx, hx, by, hy)\
47}
48
49static int hpel_motion_search(MpegEncContext * s,
50                                  int *mx_ptr, int *my_ptr, int dmin,
51                                  int src_index, int ref_index,
52                                  int size, int h)
53{
54    MotionEstContext * const c= &s->me;
55    const int mx = *mx_ptr;
56    const int my = *my_ptr;
57    const int penalty_factor= c->sub_penalty_factor;
58    me_cmp_func cmp_sub, chroma_cmp_sub;
59    int bx=2*mx, by=2*my;
60
61    LOAD_COMMON
62    int flags= c->sub_flags;
63
64 //FIXME factorize
65
66    cmp_sub= s->dsp.me_sub_cmp[size];
67    chroma_cmp_sub= s->dsp.me_sub_cmp[size+1];
68
69    if(c->skip){ //FIXME move out of hpel?
70        *mx_ptr = 0;
71        *my_ptr = 0;
72        return dmin;
73    }
74
75    if(c->avctx->me_cmp != c->avctx->me_sub_cmp){
76        dmin= cmp(s, mx, my, 0, 0, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);
77        if(mx || my || size>0)
78            dmin += (mv_penalty[2*mx - pred_x] + mv_penalty[2*my - pred_y])*penalty_factor;
79    }
80
81    if (mx > xmin && mx < xmax &&
82        my > ymin && my < ymax) {
83        int d= dmin;
84        const int index= (my<<ME_MAP_SHIFT) + mx;
85        const int t= score_map[(index-(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)]
86                     + (mv_penalty[bx   - pred_x] + mv_penalty[by-2 - pred_y])*c->penalty_factor;
87        const int l= score_map[(index- 1               )&(ME_MAP_SIZE-1)]
88                     + (mv_penalty[bx-2 - pred_x] + mv_penalty[by   - pred_y])*c->penalty_factor;
89        const int r= score_map[(index+ 1               )&(ME_MAP_SIZE-1)]
90                     + (mv_penalty[bx+2 - pred_x] + mv_penalty[by   - pred_y])*c->penalty_factor;
91        const int b= score_map[(index+(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)]
92                     + (mv_penalty[bx   - pred_x] + mv_penalty[by+2 - pred_y])*c->penalty_factor;
93
94#if defined(ASSERT_LEVEL) && ASSERT_LEVEL > 1
95        unsigned key;
96        unsigned map_generation= c->map_generation;
97        key= ((my-1)<<ME_MAP_MV_BITS) + (mx) + map_generation;
98        av_assert2(c->map[(index-(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)] == key);
99        key= ((my+1)<<ME_MAP_MV_BITS) + (mx) + map_generation;
100        av_assert2(c->map[(index+(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)] == key);
101        key= ((my)<<ME_MAP_MV_BITS) + (mx+1) + map_generation;
102        av_assert2(c->map[(index+1)&(ME_MAP_SIZE-1)] == key);
103        key= ((my)<<ME_MAP_MV_BITS) + (mx-1) + map_generation;
104        av_assert2(c->map[(index-1)&(ME_MAP_SIZE-1)] == key);
105#endif
106        if(t<=b){
107            CHECK_HALF_MV(0, 1, mx  ,my-1)
108            if(l<=r){
109                CHECK_HALF_MV(1, 1, mx-1, my-1)
110                if(t+r<=b+l){
111                    CHECK_HALF_MV(1, 1, mx  , my-1)
112                }else{
113                    CHECK_HALF_MV(1, 1, mx-1, my  )
114                }
115                CHECK_HALF_MV(1, 0, mx-1, my  )
116            }else{
117                CHECK_HALF_MV(1, 1, mx  , my-1)
118                if(t+l<=b+r){
119                    CHECK_HALF_MV(1, 1, mx-1, my-1)
120                }else{
121                    CHECK_HALF_MV(1, 1, mx  , my  )
122                }
123                CHECK_HALF_MV(1, 0, mx  , my  )
124            }
125        }else{
126            if(l<=r){
127                if(t+l<=b+r){
128                    CHECK_HALF_MV(1, 1, mx-1, my-1)
129                }else{
130                    CHECK_HALF_MV(1, 1, mx  , my  )
131                }
132                CHECK_HALF_MV(1, 0, mx-1, my)
133                CHECK_HALF_MV(1, 1, mx-1, my)
134            }else{
135                if(t+r<=b+l){
136                    CHECK_HALF_MV(1, 1, mx  , my-1)
137                }else{
138                    CHECK_HALF_MV(1, 1, mx-1, my)
139                }
140                CHECK_HALF_MV(1, 0, mx  , my)
141                CHECK_HALF_MV(1, 1, mx  , my)
142            }
143            CHECK_HALF_MV(0, 1, mx  , my)
144        }
145        av_assert2(bx >= xmin*2 && bx <= xmax*2 && by >= ymin*2 && by <= ymax*2);
146    }
147
148    *mx_ptr = bx;
149    *my_ptr = by;
150
151    return dmin;
152}
153
154static int no_sub_motion_search(MpegEncContext * s,
155          int *mx_ptr, int *my_ptr, int dmin,
156                                  int src_index, int ref_index,
157                                  int size, int h)
158{
159    (*mx_ptr)<<=1;
160    (*my_ptr)<<=1;
161    return dmin;
162}
163
164static inline int get_mb_score(MpegEncContext *s, int mx, int my,
165                               int src_index, int ref_index, int size,
166                               int h, int add_rate)
167{
168//    const int check_luma= s->dsp.me_sub_cmp != s->dsp.mb_cmp;
169    MotionEstContext * const c= &s->me;
170    const int penalty_factor= c->mb_penalty_factor;
171    const int flags= c->mb_flags;
172    const int qpel= flags & FLAG_QPEL;
173    const int mask= 1+2*qpel;
174    me_cmp_func cmp_sub, chroma_cmp_sub;
175    int d;
176
177    LOAD_COMMON
178
179 //FIXME factorize
180
181    cmp_sub= s->dsp.mb_cmp[size];
182    chroma_cmp_sub= s->dsp.mb_cmp[size+1];
183
184    d= cmp(s, mx>>(qpel+1), my>>(qpel+1), mx&mask, my&mask, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);
185    //FIXME check cbp before adding penalty for (0,0) vector
186    if(add_rate && (mx || my || size>0))
187        d += (mv_penalty[mx - pred_x] + mv_penalty[my - pred_y])*penalty_factor;
188
189    return d;
190}
191
192int ff_get_mb_score(MpegEncContext *s, int mx, int my, int src_index,
193                    int ref_index, int size, int h, int add_rate)
194{
195    return get_mb_score(s, mx, my, src_index, ref_index, size, h, add_rate);
196}
197
198#define CHECK_QUARTER_MV(dx, dy, x, y)\
199{\
200    const int hx= 4*(x)+(dx);\
201    const int hy= 4*(y)+(dy);\
202    d= cmp_qpel(s, x, y, dx, dy, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
203    d += (mv_penalty[hx - pred_x] + mv_penalty[hy - pred_y])*penalty_factor;\
204    COPY3_IF_LT(dmin, d, bx, hx, by, hy)\
205}
206
207static int qpel_motion_search(MpegEncContext * s,
208                                  int *mx_ptr, int *my_ptr, int dmin,
209                                  int src_index, int ref_index,
210                                  int size, int h)
211{
212    MotionEstContext * const c= &s->me;
213    const int mx = *mx_ptr;
214    const int my = *my_ptr;
215    const int penalty_factor= c->sub_penalty_factor;
216    const unsigned map_generation = c->map_generation;
217    const int subpel_quality= c->avctx->me_subpel_quality;
218    uint32_t *map= c->map;
219    me_cmp_func cmpf, chroma_cmpf;
220    me_cmp_func cmp_sub, chroma_cmp_sub;
221
222    LOAD_COMMON
223    int flags= c->sub_flags;
224
225    cmpf= s->dsp.me_cmp[size];
226    chroma_cmpf= s->dsp.me_cmp[size+1]; //factorize FIXME
227 //FIXME factorize
228
229    cmp_sub= s->dsp.me_sub_cmp[size];
230    chroma_cmp_sub= s->dsp.me_sub_cmp[size+1];
231
232    if(c->skip){ //FIXME somehow move up (benchmark)
233        *mx_ptr = 0;
234        *my_ptr = 0;
235        return dmin;
236    }
237
238    if(c->avctx->me_cmp != c->avctx->me_sub_cmp){
239        dmin= cmp(s, mx, my, 0, 0, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);
240        if(mx || my || size>0)
241            dmin += (mv_penalty[4*mx - pred_x] + mv_penalty[4*my - pred_y])*penalty_factor;
242    }
243
244    if (mx > xmin && mx < xmax &&
245        my > ymin && my < ymax) {
246        int bx=4*mx, by=4*my;
247        int d= dmin;
248        int i, nx, ny;
249        const int index= (my<<ME_MAP_SHIFT) + mx;
250        const int t= score_map[(index-(1<<ME_MAP_SHIFT)  )&(ME_MAP_SIZE-1)];
251        const int l= score_map[(index- 1                 )&(ME_MAP_SIZE-1)];
252        const int r= score_map[(index+ 1                 )&(ME_MAP_SIZE-1)];
253        const int b= score_map[(index+(1<<ME_MAP_SHIFT)  )&(ME_MAP_SIZE-1)];
254        const int c= score_map[(index                    )&(ME_MAP_SIZE-1)];
255        int best[8];
256        int best_pos[8][2];
257
258        memset(best, 64, sizeof(int)*8);
259        if(s->me.dia_size>=2){
260            const int tl= score_map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
261            const int bl= score_map[(index+(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
262            const int tr= score_map[(index-(1<<ME_MAP_SHIFT)+1)&(ME_MAP_SIZE-1)];
263            const int br= score_map[(index+(1<<ME_MAP_SHIFT)+1)&(ME_MAP_SIZE-1)];
264
265            for(ny= -3; ny <= 3; ny++){
266                for(nx= -3; nx <= 3; nx++){
267                    //FIXME this could overflow (unlikely though)
268                    const int64_t t2= nx*nx*(tr + tl - 2*t) + 4*nx*(tr-tl) + 32*t;
269                    const int64_t c2= nx*nx*( r +  l - 2*c) + 4*nx*( r- l) + 32*c;
270                    const int64_t b2= nx*nx*(br + bl - 2*b) + 4*nx*(br-bl) + 32*b;
271                    int score= (ny*ny*(b2 + t2 - 2*c2) + 4*ny*(b2 - t2) + 32*c2 + 512)>>10;
272                    int i;
273
274                    if((nx&3)==0 && (ny&3)==0) continue;
275
276                    score += (mv_penalty[4*mx + nx - pred_x] + mv_penalty[4*my + ny - pred_y])*penalty_factor;
277
278//                    if(nx&1) score-=1024*c->penalty_factor;
279//                    if(ny&1) score-=1024*c->penalty_factor;
280
281                    for(i=0; i<8; i++){
282                        if(score < best[i]){
283                            memmove(&best[i+1], &best[i], sizeof(int)*(7-i));
284                            memmove(&best_pos[i+1][0], &best_pos[i][0], sizeof(int)*2*(7-i));
285                            best[i]= score;
286                            best_pos[i][0]= nx + 4*mx;
287                            best_pos[i][1]= ny + 4*my;
288                            break;
289                        }
290                    }
291                }
292            }
293        }else{
294            int tl;
295            //FIXME this could overflow (unlikely though)
296            const int cx = 4*(r - l);
297            const int cx2= r + l - 2*c;
298            const int cy = 4*(b - t);
299            const int cy2= b + t - 2*c;
300            int cxy;
301
302            if(map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)] == (my<<ME_MAP_MV_BITS) + mx + map_generation && 0){ //FIXME
303                tl= score_map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
304            }else{
305                tl= cmp(s, mx-1, my-1, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);//FIXME wrong if chroma me is different
306            }
307
308            cxy= 2*tl + (cx + cy)/4 - (cx2 + cy2) - 2*c;
309
310            av_assert2(16*cx2 + 4*cx + 32*c == 32*r);
311            av_assert2(16*cx2 - 4*cx + 32*c == 32*l);
312            av_assert2(16*cy2 + 4*cy + 32*c == 32*b);
313            av_assert2(16*cy2 - 4*cy + 32*c == 32*t);
314            av_assert2(16*cxy + 16*cy2 + 16*cx2 - 4*cy - 4*cx + 32*c == 32*tl);
315
316            for(ny= -3; ny <= 3; ny++){
317                for(nx= -3; nx <= 3; nx++){
318                    //FIXME this could overflow (unlikely though)
319                    int score= ny*nx*cxy + nx*nx*cx2 + ny*ny*cy2 + nx*cx + ny*cy + 32*c; //FIXME factor
320                    int i;
321
322                    if((nx&3)==0 && (ny&3)==0) continue;
323
324                    score += 32*(mv_penalty[4*mx + nx - pred_x] + mv_penalty[4*my + ny - pred_y])*penalty_factor;
325//                    if(nx&1) score-=32*c->penalty_factor;
326  //                  if(ny&1) score-=32*c->penalty_factor;
327
328                    for(i=0; i<8; i++){
329                        if(score < best[i]){
330                            memmove(&best[i+1], &best[i], sizeof(int)*(7-i));
331                            memmove(&best_pos[i+1][0], &best_pos[i][0], sizeof(int)*2*(7-i));
332                            best[i]= score;
333                            best_pos[i][0]= nx + 4*mx;
334                            best_pos[i][1]= ny + 4*my;
335                            break;
336                        }
337                    }
338                }
339            }
340        }
341        for(i=0; i<subpel_quality; i++){
342            nx= best_pos[i][0];
343            ny= best_pos[i][1];
344            CHECK_QUARTER_MV(nx&3, ny&3, nx>>2, ny>>2)
345        }
346
347        av_assert2(bx >= xmin*4 && bx <= xmax*4 && by >= ymin*4 && by <= ymax*4);
348
349        *mx_ptr = bx;
350        *my_ptr = by;
351    }else{
352        *mx_ptr =4*mx;
353        *my_ptr =4*my;
354    }
355
356    return dmin;
357}
358
359
360#define CHECK_MV(x,y)\
361{\
362    const unsigned key = ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\
363    const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\
364    av_assert2((x) >= xmin);\
365    av_assert2((x) <= xmax);\
366    av_assert2((y) >= ymin);\
367    av_assert2((y) <= ymax);\
368    if(map[index]!=key){\
369        d= cmp(s, x, y, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
370        map[index]= key;\
371        score_map[index]= d;\
372        d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*penalty_factor;\
373        COPY3_IF_LT(dmin, d, best[0], x, best[1], y)\
374    }\
375}
376
377#define CHECK_CLIPPED_MV(ax,ay)\
378{\
379    const int Lx= ax;\
380    const int Ly= ay;\
381    const int Lx2= FFMAX(xmin, FFMIN(Lx, xmax));\
382    const int Ly2= FFMAX(ymin, FFMIN(Ly, ymax));\
383    CHECK_MV(Lx2, Ly2)\
384}
385
386#define CHECK_MV_DIR(x,y,new_dir)\
387{\
388    const unsigned key = ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\
389    const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\
390    if(map[index]!=key){\
391        d= cmp(s, x, y, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
392        map[index]= key;\
393        score_map[index]= d;\
394        d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*penalty_factor;\
395        if(d<dmin){\
396            best[0]=x;\
397            best[1]=y;\
398            dmin=d;\
399            next_dir= new_dir;\
400        }\
401    }\
402}
403
404#define check(x,y,S,v)\
405if( (x)<(xmin<<(S)) ) av_log(NULL, AV_LOG_ERROR, "%d %d %d %d %d xmin" #v, xmin, (x), (y), s->mb_x, s->mb_y);\
406if( (x)>(xmax<<(S)) ) av_log(NULL, AV_LOG_ERROR, "%d %d %d %d %d xmax" #v, xmax, (x), (y), s->mb_x, s->mb_y);\
407if( (y)<(ymin<<(S)) ) av_log(NULL, AV_LOG_ERROR, "%d %d %d %d %d ymin" #v, ymin, (x), (y), s->mb_x, s->mb_y);\
408if( (y)>(ymax<<(S)) ) av_log(NULL, AV_LOG_ERROR, "%d %d %d %d %d ymax" #v, ymax, (x), (y), s->mb_x, s->mb_y);\
409
410#define LOAD_COMMON2\
411    uint32_t *map= c->map;\
412    const int qpel= flags&FLAG_QPEL;\
413    const int shift= 1+qpel;\
414
415static av_always_inline int small_diamond_search(MpegEncContext * s, int *best, int dmin,
416                                       int src_index, int ref_index, int const penalty_factor,
417                                       int size, int h, int flags)
418{
419    MotionEstContext * const c= &s->me;
420    me_cmp_func cmpf, chroma_cmpf;
421    int next_dir=-1;
422    LOAD_COMMON
423    LOAD_COMMON2
424    unsigned map_generation = c->map_generation;
425
426    cmpf= s->dsp.me_cmp[size];
427    chroma_cmpf= s->dsp.me_cmp[size+1];
428
429    { /* ensure that the best point is in the MAP as h/qpel refinement needs it */
430        const unsigned key = (best[1]<<ME_MAP_MV_BITS) + best[0] + map_generation;
431        const int index= ((best[1]<<ME_MAP_SHIFT) + best[0])&(ME_MAP_SIZE-1);
432        if(map[index]!=key){ //this will be executed only very rarey
433            score_map[index]= cmp(s, best[0], best[1], 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);
434            map[index]= key;
435        }
436    }
437
438    for(;;){
439        int d;
440        const int dir= next_dir;
441        const int x= best[0];
442        const int y= best[1];
443        next_dir=-1;
444
445        if(dir!=2 && x>xmin) CHECK_MV_DIR(x-1, y  , 0)
446        if(dir!=3 && y>ymin) CHECK_MV_DIR(x  , y-1, 1)
447        if(dir!=0 && x<xmax) CHECK_MV_DIR(x+1, y  , 2)
448        if(dir!=1 && y<ymax) CHECK_MV_DIR(x  , y+1, 3)
449
450        if(next_dir==-1){
451            return dmin;
452        }
453    }
454}
455
456static int funny_diamond_search(MpegEncContext * s, int *best, int dmin,
457                                       int src_index, int ref_index, int const penalty_factor,
458                                       int size, int h, int flags)
459{
460    MotionEstContext * const c= &s->me;
461    me_cmp_func cmpf, chroma_cmpf;
462    int dia_size;
463    LOAD_COMMON
464    LOAD_COMMON2
465    unsigned map_generation = c->map_generation;
466
467    cmpf= s->dsp.me_cmp[size];
468    chroma_cmpf= s->dsp.me_cmp[size+1];
469
470    for(dia_size=1; dia_size<=4; dia_size++){
471        int dir;
472        const int x= best[0];
473        const int y= best[1];
474
475        if(dia_size&(dia_size-1)) continue;
476
477        if(   x + dia_size > xmax
478           || x - dia_size < xmin
479           || y + dia_size > ymax
480           || y - dia_size < ymin)
481           continue;
482
483        for(dir= 0; dir<dia_size; dir+=2){
484            int d;
485
486            CHECK_MV(x + dir           , y + dia_size - dir);
487            CHECK_MV(x + dia_size - dir, y - dir           );
488            CHECK_MV(x - dir           , y - dia_size + dir);
489            CHECK_MV(x - dia_size + dir, y + dir           );
490        }
491
492        if(x!=best[0] || y!=best[1])
493            dia_size=0;
494    }
495    return dmin;
496}
497
498static int hex_search(MpegEncContext * s, int *best, int dmin,
499                                       int src_index, int ref_index, int const penalty_factor,
500                                       int size, int h, int flags, int dia_size)
501{
502    MotionEstContext * const c= &s->me;
503    me_cmp_func cmpf, chroma_cmpf;
504    LOAD_COMMON
505    LOAD_COMMON2
506    unsigned map_generation = c->map_generation;
507    int x,y,d;
508    const int dec= dia_size & (dia_size-1);
509
510    cmpf= s->dsp.me_cmp[size];
511    chroma_cmpf= s->dsp.me_cmp[size+1];
512
513    for(;dia_size; dia_size= dec ? dia_size-1 : dia_size>>1){
514        do{
515            x= best[0];
516            y= best[1];
517
518            CHECK_CLIPPED_MV(x  -dia_size    , y);
519            CHECK_CLIPPED_MV(x+  dia_size    , y);
520            CHECK_CLIPPED_MV(x+( dia_size>>1), y+dia_size);
521            CHECK_CLIPPED_MV(x+( dia_size>>1), y-dia_size);
522            if(dia_size>1){
523                CHECK_CLIPPED_MV(x+(-dia_size>>1), y+dia_size);
524                CHECK_CLIPPED_MV(x+(-dia_size>>1), y-dia_size);
525            }
526        }while(best[0] != x || best[1] != y);
527    }
528
529    return dmin;
530}
531
532static int l2s_dia_search(MpegEncContext * s, int *best, int dmin,
533                                       int src_index, int ref_index, int const penalty_factor,
534                                       int size, int h, int flags)
535{
536    MotionEstContext * const c= &s->me;
537    me_cmp_func cmpf, chroma_cmpf;
538    LOAD_COMMON
539    LOAD_COMMON2
540    unsigned map_generation = c->map_generation;
541    int x,y,i,d;
542    int dia_size= c->dia_size&0xFF;
543    const int dec= dia_size & (dia_size-1);
544    static const int hex[8][2]={{-2, 0}, {-1,-1}, { 0,-2}, { 1,-1},
545                                { 2, 0}, { 1, 1}, { 0, 2}, {-1, 1}};
546
547    cmpf= s->dsp.me_cmp[size];
548    chroma_cmpf= s->dsp.me_cmp[size+1];
549
550    for(; dia_size; dia_size= dec ? dia_size-1 : dia_size>>1){
551        do{
552            x= best[0];
553            y= best[1];
554            for(i=0; i<8; i++){
555                CHECK_CLIPPED_MV(x+hex[i][0]*dia_size, y+hex[i][1]*dia_size);
556            }
557        }while(best[0] != x || best[1] != y);
558    }
559
560    x= best[0];
561    y= best[1];
562    CHECK_CLIPPED_MV(x+1, y);
563    CHECK_CLIPPED_MV(x, y+1);
564    CHECK_CLIPPED_MV(x-1, y);
565    CHECK_CLIPPED_MV(x, y-1);
566
567    return dmin;
568}
569
570static int umh_search(MpegEncContext * s, int *best, int dmin,
571                                       int src_index, int ref_index, int const penalty_factor,
572                                       int size, int h, int flags)
573{
574    MotionEstContext * const c= &s->me;
575    me_cmp_func cmpf, chroma_cmpf;
576    LOAD_COMMON
577    LOAD_COMMON2
578    unsigned map_generation = c->map_generation;
579    int x,y,x2,y2, i, j, d;
580    const int dia_size= c->dia_size&0xFE;
581    static const int hex[16][2]={{-4,-2}, {-4,-1}, {-4, 0}, {-4, 1}, {-4, 2},
582                                 { 4,-2}, { 4,-1}, { 4, 0}, { 4, 1}, { 4, 2},
583                                 {-2, 3}, { 0, 4}, { 2, 3},
584                                 {-2,-3}, { 0,-4}, { 2,-3},};
585
586    cmpf= s->dsp.me_cmp[size];
587    chroma_cmpf= s->dsp.me_cmp[size+1];
588
589    x= best[0];
590    y= best[1];
591    for(x2=FFMAX(x-dia_size+1, xmin); x2<=FFMIN(x+dia_size-1,xmax); x2+=2){
592        CHECK_MV(x2, y);
593    }
594    for(y2=FFMAX(y-dia_size/2+1, ymin); y2<=FFMIN(y+dia_size/2-1,ymax); y2+=2){
595        CHECK_MV(x, y2);
596    }
597
598    x= best[0];
599    y= best[1];
600    for(y2=FFMAX(y-2, ymin); y2<=FFMIN(y+2,ymax); y2++){
601        for(x2=FFMAX(x-2, xmin); x2<=FFMIN(x+2,xmax); x2++){
602            CHECK_MV(x2, y2);
603        }
604    }
605
606//FIXME prevent the CLIP stuff
607
608    for(j=1; j<=dia_size/4; j++){
609        for(i=0; i<16; i++){
610            CHECK_CLIPPED_MV(x+hex[i][0]*j, y+hex[i][1]*j);
611        }
612    }
613
614    return hex_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags, 2);
615}
616
617static int full_search(MpegEncContext * s, int *best, int dmin,
618                                       int src_index, int ref_index, int const penalty_factor,
619                                       int size, int h, int flags)
620{
621    MotionEstContext * const c= &s->me;
622    me_cmp_func cmpf, chroma_cmpf;
623    LOAD_COMMON
624    LOAD_COMMON2
625    unsigned map_generation = c->map_generation;
626    int x,y, d;
627    const int dia_size= c->dia_size&0xFF;
628
629    cmpf= s->dsp.me_cmp[size];
630    chroma_cmpf= s->dsp.me_cmp[size+1];
631
632    for(y=FFMAX(-dia_size, ymin); y<=FFMIN(dia_size,ymax); y++){
633        for(x=FFMAX(-dia_size, xmin); x<=FFMIN(dia_size,xmax); x++){
634            CHECK_MV(x, y);
635        }
636    }
637
638    x= best[0];
639    y= best[1];
640    d= dmin;
641    CHECK_CLIPPED_MV(x  , y);
642    CHECK_CLIPPED_MV(x+1, y);
643    CHECK_CLIPPED_MV(x, y+1);
644    CHECK_CLIPPED_MV(x-1, y);
645    CHECK_CLIPPED_MV(x, y-1);
646    best[0]= x;
647    best[1]= y;
648
649    return d;
650}
651
652#define SAB_CHECK_MV(ax,ay)\
653{\
654    const unsigned key = ((ay)<<ME_MAP_MV_BITS) + (ax) + map_generation;\
655    const int index= (((ay)<<ME_MAP_SHIFT) + (ax))&(ME_MAP_SIZE-1);\
656    if(map[index]!=key){\
657        d= cmp(s, ax, ay, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
658        map[index]= key;\
659        score_map[index]= d;\
660        d += (mv_penalty[((ax)<<shift)-pred_x] + mv_penalty[((ay)<<shift)-pred_y])*penalty_factor;\
661        if(d < minima[minima_count-1].height){\
662            int j=0;\
663            \
664            while(d >= minima[j].height) j++;\
665\
666            memmove(&minima [j+1], &minima [j], (minima_count - j - 1)*sizeof(Minima));\
667\
668            minima[j].checked= 0;\
669            minima[j].height= d;\
670            minima[j].x= ax;\
671            minima[j].y= ay;\
672            \
673            i=-1;\
674            continue;\
675        }\
676    }\
677}
678
679#define MAX_SAB_SIZE ME_MAP_SIZE
680static int sab_diamond_search(MpegEncContext * s, int *best, int dmin,
681                                       int src_index, int ref_index, int const penalty_factor,
682                                       int size, int h, int flags)
683{
684    MotionEstContext * const c= &s->me;
685    me_cmp_func cmpf, chroma_cmpf;
686    Minima minima[MAX_SAB_SIZE];
687    const int minima_count= FFABS(c->dia_size);
688    int i, j;
689    LOAD_COMMON
690    LOAD_COMMON2
691    unsigned map_generation = c->map_generation;
692
693    av_assert1(minima_count <= MAX_SAB_SIZE);
694
695    cmpf= s->dsp.me_cmp[size];
696    chroma_cmpf= s->dsp.me_cmp[size+1];
697
698    /*Note j<MAX_SAB_SIZE is needed if MAX_SAB_SIZE < ME_MAP_SIZE as j can
699      become larger due to MVs overflowing their ME_MAP_MV_BITS bits space in map
700     */
701    for(j=i=0; i<ME_MAP_SIZE && j<MAX_SAB_SIZE; i++){
702        uint32_t key= map[i];
703
704        key += (1<<(ME_MAP_MV_BITS-1)) + (1<<(2*ME_MAP_MV_BITS-1));
705
706        if((key&((-1)<<(2*ME_MAP_MV_BITS))) != map_generation) continue;
707
708        minima[j].height= score_map[i];
709        minima[j].x= key & ((1<<ME_MAP_MV_BITS)-1); key>>=ME_MAP_MV_BITS;
710        minima[j].y= key & ((1<<ME_MAP_MV_BITS)-1);
711        minima[j].x-= (1<<(ME_MAP_MV_BITS-1));
712        minima[j].y-= (1<<(ME_MAP_MV_BITS-1));
713
714        // all entries in map should be in range except if the mv overflows their ME_MAP_MV_BITS bits space
715        if(   minima[j].x > xmax || minima[j].x < xmin
716           || minima[j].y > ymax || minima[j].y < ymin)
717            continue;
718
719        minima[j].checked=0;
720        if(minima[j].x || minima[j].y)
721            minima[j].height+= (mv_penalty[((minima[j].x)<<shift)-pred_x] + mv_penalty[((minima[j].y)<<shift)-pred_y])*penalty_factor;
722
723        j++;
724    }
725
726    qsort(minima, j, sizeof(Minima), minima_cmp);
727
728    for(; j<minima_count; j++){
729        minima[j].height=256*256*256*64;
730        minima[j].checked=0;
731        minima[j].x= minima[j].y=0;
732    }
733
734    for(i=0; i<minima_count; i++){
735        const int x= minima[i].x;
736        const int y= minima[i].y;
737        int d;
738
739        if(minima[i].checked) continue;
740
741        if(   x >= xmax || x <= xmin
742           || y >= ymax || y <= ymin)
743           continue;
744
745        SAB_CHECK_MV(x-1, y)
746        SAB_CHECK_MV(x+1, y)
747        SAB_CHECK_MV(x  , y-1)
748        SAB_CHECK_MV(x  , y+1)
749
750        minima[i].checked= 1;
751    }
752
753    best[0]= minima[0].x;
754    best[1]= minima[0].y;
755    dmin= minima[0].height;
756
757    if(   best[0] < xmax && best[0] > xmin
758       && best[1] < ymax && best[1] > ymin){
759        int d;
760        //ensure that the refernece samples for hpel refinement are in the map
761        CHECK_MV(best[0]-1, best[1])
762        CHECK_MV(best[0]+1, best[1])
763        CHECK_MV(best[0], best[1]-1)
764        CHECK_MV(best[0], best[1]+1)
765    }
766    return dmin;
767}
768
769static int var_diamond_search(MpegEncContext * s, int *best, int dmin,
770                                       int src_index, int ref_index, int const penalty_factor,
771                                       int size, int h, int flags)
772{
773    MotionEstContext * const c= &s->me;
774    me_cmp_func cmpf, chroma_cmpf;
775    int dia_size;
776    LOAD_COMMON
777    LOAD_COMMON2
778    unsigned map_generation = c->map_generation;
779
780    cmpf= s->dsp.me_cmp[size];
781    chroma_cmpf= s->dsp.me_cmp[size+1];
782
783    for(dia_size=1; dia_size<=c->dia_size; dia_size++){
784        int dir, start, end;
785        const int x= best[0];
786        const int y= best[1];
787
788        start= FFMAX(0, y + dia_size - ymax);
789        end  = FFMIN(dia_size, xmax - x + 1);
790        for(dir= start; dir<end; dir++){
791            int d;
792
793//check(x + dir,y + dia_size - dir,0, a0)
794            CHECK_MV(x + dir           , y + dia_size - dir);
795        }
796
797        start= FFMAX(0, x + dia_size - xmax);
798        end  = FFMIN(dia_size, y - ymin + 1);
799        for(dir= start; dir<end; dir++){
800            int d;
801
802//check(x + dia_size - dir, y - dir,0, a1)
803            CHECK_MV(x + dia_size - dir, y - dir           );
804        }
805
806        start= FFMAX(0, -y + dia_size + ymin );
807        end  = FFMIN(dia_size, x - xmin + 1);
808        for(dir= start; dir<end; dir++){
809            int d;
810
811//check(x - dir,y - dia_size + dir,0, a2)
812            CHECK_MV(x - dir           , y - dia_size + dir);
813        }
814
815        start= FFMAX(0, -x + dia_size + xmin );
816        end  = FFMIN(dia_size, ymax - y + 1);
817        for(dir= start; dir<end; dir++){
818            int d;
819
820//check(x - dia_size + dir, y + dir,0, a3)
821            CHECK_MV(x - dia_size + dir, y + dir           );
822        }
823
824        if(x!=best[0] || y!=best[1])
825            dia_size=0;
826    }
827    return dmin;
828}
829
830static av_always_inline int diamond_search(MpegEncContext * s, int *best, int dmin,
831                                       int src_index, int ref_index, int const penalty_factor,
832                                       int size, int h, int flags){
833    MotionEstContext * const c= &s->me;
834    if(c->dia_size==-1)
835        return funny_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
836    else if(c->dia_size<-1)
837        return   sab_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
838    else if(c->dia_size<2)
839        return small_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
840    else if(c->dia_size>1024)
841        return          full_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
842    else if(c->dia_size>768)
843        return           umh_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
844    else if(c->dia_size>512)
845        return           hex_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags, c->dia_size&0xFF);
846    else if(c->dia_size>256)
847        return       l2s_dia_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
848    else
849        return   var_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
850}
851
852/**
853   @param P a list of candidate mvs to check before starting the
854   iterative search. If one of the candidates is close to the optimal mv, then
855   it takes fewer iterations. And it increases the chance that we find the
856   optimal mv.
857 */
858static av_always_inline int epzs_motion_search_internal(MpegEncContext * s, int *mx_ptr, int *my_ptr,
859                             int P[10][2], int src_index, int ref_index, int16_t (*last_mv)[2],
860                             int ref_mv_scale, int flags, int size, int h)
861{
862    MotionEstContext * const c= &s->me;
863    int best[2]={0, 0};      /**< x and y coordinates of the best motion vector.
864                               i.e. the difference between the position of the
865                               block currently being encoded and the position of
866                               the block chosen to predict it from. */
867    int d;                   ///< the score (cmp + penalty) of any given mv
868    int dmin;                /**< the best value of d, i.e. the score
869                               corresponding to the mv stored in best[]. */
870    unsigned map_generation;
871    int penalty_factor;
872    const int ref_mv_stride= s->mb_stride; //pass as arg  FIXME
873    const int ref_mv_xy= s->mb_x + s->mb_y*ref_mv_stride; //add to last_mv beforepassing FIXME
874    me_cmp_func cmpf, chroma_cmpf;
875
876    LOAD_COMMON
877    LOAD_COMMON2
878
879    if(c->pre_pass){
880        penalty_factor= c->pre_penalty_factor;
881        cmpf= s->dsp.me_pre_cmp[size];
882        chroma_cmpf= s->dsp.me_pre_cmp[size+1];
883    }else{
884        penalty_factor= c->penalty_factor;
885        cmpf= s->dsp.me_cmp[size];
886        chroma_cmpf= s->dsp.me_cmp[size+1];
887    }
888
889    map_generation= update_map_generation(c);
890
891    av_assert2(cmpf);
892    dmin= cmp(s, 0, 0, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);
893    map[0]= map_generation;
894    score_map[0]= dmin;
895
896    //FIXME precalc first term below?
897    if ((s->pict_type == AV_PICTURE_TYPE_B && !(c->flags & FLAG_DIRECT)) ||
898        s->mpv_flags & FF_MPV_FLAG_MV0)
899        dmin += (mv_penalty[pred_x] + mv_penalty[pred_y])*penalty_factor;
900
901    /* first line */
902    if (s->first_slice_line) {
903        CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
904        CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
905                        (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
906    }else{
907        if(dmin<((h*h*s->avctx->mv0_threshold)>>8)
908                    && ( P_LEFT[0]    |P_LEFT[1]
909                        |P_TOP[0]     |P_TOP[1]
910                        |P_TOPRIGHT[0]|P_TOPRIGHT[1])==0){
911            *mx_ptr= 0;
912            *my_ptr= 0;
913            c->skip=1;
914            return dmin;
915        }
916        CHECK_MV(    P_MEDIAN[0] >>shift ,    P_MEDIAN[1] >>shift)
917        CHECK_CLIPPED_MV((P_MEDIAN[0]>>shift)  , (P_MEDIAN[1]>>shift)-1)
918        CHECK_CLIPPED_MV((P_MEDIAN[0]>>shift)  , (P_MEDIAN[1]>>shift)+1)
919        CHECK_CLIPPED_MV((P_MEDIAN[0]>>shift)-1, (P_MEDIAN[1]>>shift)  )
920        CHECK_CLIPPED_MV((P_MEDIAN[0]>>shift)+1, (P_MEDIAN[1]>>shift)  )
921        CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
922                        (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
923        CHECK_MV(P_LEFT[0]    >>shift, P_LEFT[1]    >>shift)
924        CHECK_MV(P_TOP[0]     >>shift, P_TOP[1]     >>shift)
925        CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift)
926    }
927    if(dmin>h*h*4){
928        if(c->pre_pass){
929            CHECK_CLIPPED_MV((last_mv[ref_mv_xy-1][0]*ref_mv_scale + (1<<15))>>16,
930                            (last_mv[ref_mv_xy-1][1]*ref_mv_scale + (1<<15))>>16)
931            if(!s->first_slice_line)
932                CHECK_CLIPPED_MV((last_mv[ref_mv_xy-ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
933                                (last_mv[ref_mv_xy-ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
934        }else{
935            CHECK_CLIPPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16,
936                            (last_mv[ref_mv_xy+1][1]*ref_mv_scale + (1<<15))>>16)
937            if(s->mb_y+1<s->end_mb_y)  //FIXME replace at least with last_slice_line
938                CHECK_CLIPPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
939                                (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
940        }
941    }
942
943    if(c->avctx->last_predictor_count){
944        const int count= c->avctx->last_predictor_count;
945        const int xstart= FFMAX(0, s->mb_x - count);
946        const int ystart= FFMAX(0, s->mb_y - count);
947        const int xend= FFMIN(s->mb_width , s->mb_x + count + 1);
948        const int yend= FFMIN(s->mb_height, s->mb_y + count + 1);
949        int mb_y;
950
951        for(mb_y=ystart; mb_y<yend; mb_y++){
952            int mb_x;
953            for(mb_x=xstart; mb_x<xend; mb_x++){
954                const int xy= mb_x + 1 + (mb_y + 1)*ref_mv_stride;
955                int mx= (last_mv[xy][0]*ref_mv_scale + (1<<15))>>16;
956                int my= (last_mv[xy][1]*ref_mv_scale + (1<<15))>>16;
957
958                if(mx>xmax || mx<xmin || my>ymax || my<ymin) continue;
959                CHECK_MV(mx,my)
960            }
961        }
962    }
963
964//check(best[0],best[1],0, b0)
965    dmin= diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
966
967//check(best[0],best[1],0, b1)
968    *mx_ptr= best[0];
969    *my_ptr= best[1];
970
971    return dmin;
972}
973
974//this function is dedicated to the braindamaged gcc
975int ff_epzs_motion_search(MpegEncContext *s, int *mx_ptr, int *my_ptr,
976                          int P[10][2], int src_index, int ref_index,
977                          int16_t (*last_mv)[2], int ref_mv_scale,
978                          int size, int h)
979{
980    MotionEstContext * const c= &s->me;
981//FIXME convert other functions in the same way if faster
982    if(c->flags==0 && h==16 && size==0){
983        return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, 0, 0, 16);
984//    case FLAG_QPEL:
985//        return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, FLAG_QPEL);
986    }else{
987        return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, c->flags, size, h);
988    }
989}
990
991static int epzs_motion_search4(MpegEncContext * s,
992                             int *mx_ptr, int *my_ptr, int P[10][2],
993                             int src_index, int ref_index, int16_t (*last_mv)[2],
994                             int ref_mv_scale)
995{
996    MotionEstContext * const c= &s->me;
997    int best[2]={0, 0};
998    int d, dmin;
999    unsigned map_generation;
1000    const int penalty_factor= c->penalty_factor;
1001    const int size=1;
1002    const int h=8;
1003    const int ref_mv_stride= s->mb_stride;
1004    const int ref_mv_xy= s->mb_x + s->mb_y *ref_mv_stride;
1005    me_cmp_func cmpf, chroma_cmpf;
1006    LOAD_COMMON
1007    int flags= c->flags;
1008    LOAD_COMMON2
1009
1010    cmpf= s->dsp.me_cmp[size];
1011    chroma_cmpf= s->dsp.me_cmp[size+1];
1012
1013    map_generation= update_map_generation(c);
1014
1015    dmin = 1000000;
1016
1017    /* first line */
1018    if (s->first_slice_line) {
1019        CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
1020        CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
1021                        (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
1022        CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
1023    }else{
1024        CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
1025        //FIXME try some early stop
1026        CHECK_MV(P_MEDIAN[0]>>shift, P_MEDIAN[1]>>shift)
1027        CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
1028        CHECK_MV(P_TOP[0]>>shift, P_TOP[1]>>shift)
1029        CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift)
1030        CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
1031                        (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
1032    }
1033    if(dmin>64*4){
1034        CHECK_CLIPPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16,
1035                        (last_mv[ref_mv_xy+1][1]*ref_mv_scale + (1<<15))>>16)
1036        if(s->mb_y+1<s->end_mb_y)  //FIXME replace at least with last_slice_line
1037            CHECK_CLIPPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
1038                            (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
1039    }
1040
1041    dmin= diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
1042
1043    *mx_ptr= best[0];
1044    *my_ptr= best[1];
1045
1046    return dmin;
1047}
1048
1049//try to merge with above FIXME (needs PSNR test)
1050static int epzs_motion_search2(MpegEncContext * s,
1051                             int *mx_ptr, int *my_ptr, int P[10][2],
1052                             int src_index, int ref_index, int16_t (*last_mv)[2],
1053                             int ref_mv_scale)
1054{
1055    MotionEstContext * const c= &s->me;
1056    int best[2]={0, 0};
1057    int d, dmin;
1058    unsigned map_generation;
1059    const int penalty_factor= c->penalty_factor;
1060    const int size=0; //FIXME pass as arg
1061    const int h=8;
1062    const int ref_mv_stride= s->mb_stride;
1063    const int ref_mv_xy= s->mb_x + s->mb_y *ref_mv_stride;
1064    me_cmp_func cmpf, chroma_cmpf;
1065    LOAD_COMMON
1066    int flags= c->flags;
1067    LOAD_COMMON2
1068
1069    cmpf= s->dsp.me_cmp[size];
1070    chroma_cmpf= s->dsp.me_cmp[size+1];
1071
1072    map_generation= update_map_generation(c);
1073
1074    dmin = 1000000;
1075
1076    /* first line */
1077    if (s->first_slice_line) {
1078        CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
1079        CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
1080                        (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
1081        CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
1082    }else{
1083        CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
1084        //FIXME try some early stop
1085        CHECK_MV(P_MEDIAN[0]>>shift, P_MEDIAN[1]>>shift)
1086        CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
1087        CHECK_MV(P_TOP[0]>>shift, P_TOP[1]>>shift)
1088        CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift)
1089        CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
1090                        (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
1091    }
1092    if(dmin>64*4){
1093        CHECK_CLIPPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16,
1094                        (last_mv[ref_mv_xy+1][1]*ref_mv_scale + (1<<15))>>16)
1095        if(s->mb_y+1<s->end_mb_y)  //FIXME replace at least with last_slice_line
1096            CHECK_CLIPPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
1097                            (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
1098    }
1099
1100    dmin= diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
1101
1102    *mx_ptr= best[0];
1103    *my_ptr= best[1];
1104
1105    return dmin;
1106}
1107