1/*
2 * Motion estimation
3 * Copyright (c) 2002-2004 Michael Niedermayer
4 *
5 * This file is part of Libav.
6 *
7 * Libav is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * Libav is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with Libav; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
22/**
23 * @file
24 * Motion estimation template.
25 */
26
27//Let us hope gcc will remove the unused vars ...(gcc 3.2.2 seems to do it ...)
28#define LOAD_COMMON\
29    uint32_t av_unused * const score_map= c->score_map;\
30    const int av_unused xmin= c->xmin;\
31    const int av_unused ymin= c->ymin;\
32    const int av_unused xmax= c->xmax;\
33    const int av_unused ymax= c->ymax;\
34    uint8_t *mv_penalty= c->current_mv_penalty;\
35    const int pred_x= c->pred_x;\
36    const int pred_y= c->pred_y;\
37
38#define CHECK_HALF_MV(dx, dy, x, y)\
39{\
40    const int hx= 2*(x)+(dx);\
41    const int hy= 2*(y)+(dy);\
42    d= cmp_hpel(s, x, y, dx, dy, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);\
43    d += (mv_penalty[hx - pred_x] + mv_penalty[hy - pred_y])*penalty_factor;\
44    COPY3_IF_LT(dmin, d, bx, hx, by, hy)\
45}
46
47static int hpel_motion_search(MpegEncContext * s,
48                                  int *mx_ptr, int *my_ptr, int dmin,
49                                  int src_index, int ref_index,
50                                  int size, int h)
51{
52    MotionEstContext * const c= &s->me;
53    const int mx = *mx_ptr;
54    const int my = *my_ptr;
55    const int penalty_factor= c->sub_penalty_factor;
56    me_cmp_func cmp_sub, chroma_cmp_sub;
57    int bx=2*mx, by=2*my;
58
59    LOAD_COMMON
60    int flags= c->sub_flags;
61
62 //FIXME factorize
63
64    cmp_sub= s->dsp.me_sub_cmp[size];
65    chroma_cmp_sub= s->dsp.me_sub_cmp[size+1];
66
67    if(c->skip){ //FIXME move out of hpel?
68        *mx_ptr = 0;
69        *my_ptr = 0;
70        return dmin;
71    }
72
73    if(c->avctx->me_cmp != c->avctx->me_sub_cmp){
74        dmin= cmp(s, mx, my, 0, 0, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);
75        if(mx || my || size>0)
76            dmin += (mv_penalty[2*mx - pred_x] + mv_penalty[2*my - pred_y])*penalty_factor;
77    }
78
79    if (mx > xmin && mx < xmax &&
80        my > ymin && my < ymax) {
81        int d= dmin;
82        const int index= (my<<ME_MAP_SHIFT) + mx;
83        const int t= score_map[(index-(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)]
84                     + (mv_penalty[bx   - pred_x] + mv_penalty[by-2 - pred_y])*c->penalty_factor;
85        const int l= score_map[(index- 1               )&(ME_MAP_SIZE-1)]
86                     + (mv_penalty[bx-2 - pred_x] + mv_penalty[by   - pred_y])*c->penalty_factor;
87        const int r= score_map[(index+ 1               )&(ME_MAP_SIZE-1)]
88                     + (mv_penalty[bx+2 - pred_x] + mv_penalty[by   - pred_y])*c->penalty_factor;
89        const int b= score_map[(index+(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)]
90                     + (mv_penalty[bx   - pred_x] + mv_penalty[by+2 - pred_y])*c->penalty_factor;
91
92        unsigned key;
93        unsigned map_generation= c->map_generation;
94#ifndef NDEBUG
95        uint32_t *map= c->map;
96#endif
97        key= ((my-1)<<ME_MAP_MV_BITS) + (mx) + map_generation;
98        assert(map[(index-(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)] == key);
99        key= ((my+1)<<ME_MAP_MV_BITS) + (mx) + map_generation;
100        assert(map[(index+(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)] == key);
101        key= ((my)<<ME_MAP_MV_BITS) + (mx+1) + map_generation;
102        assert(map[(index+1)&(ME_MAP_SIZE-1)] == key);
103        key= ((my)<<ME_MAP_MV_BITS) + (mx-1) + map_generation;
104        assert(map[(index-1)&(ME_MAP_SIZE-1)] == key);
105        if(t<=b){
106            CHECK_HALF_MV(0, 1, mx  ,my-1)
107            if(l<=r){
108                CHECK_HALF_MV(1, 1, mx-1, my-1)
109                if(t+r<=b+l){
110                    CHECK_HALF_MV(1, 1, mx  , my-1)
111                }else{
112                    CHECK_HALF_MV(1, 1, mx-1, my  )
113                }
114                CHECK_HALF_MV(1, 0, mx-1, my  )
115            }else{
116                CHECK_HALF_MV(1, 1, mx  , my-1)
117                if(t+l<=b+r){
118                    CHECK_HALF_MV(1, 1, mx-1, my-1)
119                }else{
120                    CHECK_HALF_MV(1, 1, mx  , my  )
121                }
122                CHECK_HALF_MV(1, 0, mx  , my  )
123            }
124        }else{
125            if(l<=r){
126                if(t+l<=b+r){
127                    CHECK_HALF_MV(1, 1, mx-1, my-1)
128                }else{
129                    CHECK_HALF_MV(1, 1, mx  , my  )
130                }
131                CHECK_HALF_MV(1, 0, mx-1, my)
132                CHECK_HALF_MV(1, 1, mx-1, my)
133            }else{
134                if(t+r<=b+l){
135                    CHECK_HALF_MV(1, 1, mx  , my-1)
136                }else{
137                    CHECK_HALF_MV(1, 1, mx-1, my)
138                }
139                CHECK_HALF_MV(1, 0, mx  , my)
140                CHECK_HALF_MV(1, 1, mx  , my)
141            }
142            CHECK_HALF_MV(0, 1, mx  , my)
143        }
144        assert(bx >= xmin*2 && bx <= xmax*2 && by >= ymin*2 && by <= ymax*2);
145    }
146
147    *mx_ptr = bx;
148    *my_ptr = by;
149
150    return dmin;
151}
152
153static int no_sub_motion_search(MpegEncContext * s,
154          int *mx_ptr, int *my_ptr, int dmin,
155                                  int src_index, int ref_index,
156                                  int size, int h)
157{
158    (*mx_ptr)<<=1;
159    (*my_ptr)<<=1;
160    return dmin;
161}
162
163inline int ff_get_mb_score(MpegEncContext * s, int mx, int my, int src_index,
164                               int ref_index, int size, int h, int add_rate)
165{
166//    const int check_luma= s->dsp.me_sub_cmp != s->dsp.mb_cmp;
167    MotionEstContext * const c= &s->me;
168    const int penalty_factor= c->mb_penalty_factor;
169    const int flags= c->mb_flags;
170    const int qpel= flags & FLAG_QPEL;
171    const int mask= 1+2*qpel;
172    me_cmp_func cmp_sub, chroma_cmp_sub;
173    int d;
174
175    LOAD_COMMON
176
177 //FIXME factorize
178
179    cmp_sub= s->dsp.mb_cmp[size];
180    chroma_cmp_sub= s->dsp.mb_cmp[size+1];
181
182//    assert(!c->skip);
183//    assert(c->avctx->me_sub_cmp != c->avctx->mb_cmp);
184
185    d= cmp(s, mx>>(qpel+1), my>>(qpel+1), mx&mask, my&mask, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);
186    //FIXME check cbp before adding penalty for (0,0) vector
187    if(add_rate && (mx || my || size>0))
188        d += (mv_penalty[mx - pred_x] + mv_penalty[my - pred_y])*penalty_factor;
189
190    return d;
191}
192
193#define CHECK_QUARTER_MV(dx, dy, x, y)\
194{\
195    const int hx= 4*(x)+(dx);\
196    const int hy= 4*(y)+(dy);\
197    d= cmp_qpel(s, x, y, dx, dy, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
198    d += (mv_penalty[hx - pred_x] + mv_penalty[hy - pred_y])*penalty_factor;\
199    COPY3_IF_LT(dmin, d, bx, hx, by, hy)\
200}
201
202static int qpel_motion_search(MpegEncContext * s,
203                                  int *mx_ptr, int *my_ptr, int dmin,
204                                  int src_index, int ref_index,
205                                  int size, int h)
206{
207    MotionEstContext * const c= &s->me;
208    const int mx = *mx_ptr;
209    const int my = *my_ptr;
210    const int penalty_factor= c->sub_penalty_factor;
211    const unsigned map_generation = c->map_generation;
212    const int subpel_quality= c->avctx->me_subpel_quality;
213    uint32_t *map= c->map;
214    me_cmp_func cmpf, chroma_cmpf;
215    me_cmp_func cmp_sub, chroma_cmp_sub;
216
217    LOAD_COMMON
218    int flags= c->sub_flags;
219
220    cmpf= s->dsp.me_cmp[size];
221    chroma_cmpf= s->dsp.me_cmp[size+1]; //factorize FIXME
222 //FIXME factorize
223
224    cmp_sub= s->dsp.me_sub_cmp[size];
225    chroma_cmp_sub= s->dsp.me_sub_cmp[size+1];
226
227    if(c->skip){ //FIXME somehow move up (benchmark)
228        *mx_ptr = 0;
229        *my_ptr = 0;
230        return dmin;
231    }
232
233    if(c->avctx->me_cmp != c->avctx->me_sub_cmp){
234        dmin= cmp(s, mx, my, 0, 0, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);
235        if(mx || my || size>0)
236            dmin += (mv_penalty[4*mx - pred_x] + mv_penalty[4*my - pred_y])*penalty_factor;
237    }
238
239    if (mx > xmin && mx < xmax &&
240        my > ymin && my < ymax) {
241        int bx=4*mx, by=4*my;
242        int d= dmin;
243        int i, nx, ny;
244        const int index= (my<<ME_MAP_SHIFT) + mx;
245        const int t= score_map[(index-(1<<ME_MAP_SHIFT)  )&(ME_MAP_SIZE-1)];
246        const int l= score_map[(index- 1                 )&(ME_MAP_SIZE-1)];
247        const int r= score_map[(index+ 1                 )&(ME_MAP_SIZE-1)];
248        const int b= score_map[(index+(1<<ME_MAP_SHIFT)  )&(ME_MAP_SIZE-1)];
249        const int c= score_map[(index                    )&(ME_MAP_SIZE-1)];
250        int best[8];
251        int best_pos[8][2];
252
253        memset(best, 64, sizeof(int)*8);
254        if(s->me.dia_size>=2){
255            const int tl= score_map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
256            const int bl= score_map[(index+(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
257            const int tr= score_map[(index-(1<<ME_MAP_SHIFT)+1)&(ME_MAP_SIZE-1)];
258            const int br= score_map[(index+(1<<ME_MAP_SHIFT)+1)&(ME_MAP_SIZE-1)];
259
260            for(ny= -3; ny <= 3; ny++){
261                for(nx= -3; nx <= 3; nx++){
262                    //FIXME this could overflow (unlikely though)
263                    const int64_t t2= nx*nx*(tr + tl - 2*t) + 4*nx*(tr-tl) + 32*t;
264                    const int64_t c2= nx*nx*( r +  l - 2*c) + 4*nx*( r- l) + 32*c;
265                    const int64_t b2= nx*nx*(br + bl - 2*b) + 4*nx*(br-bl) + 32*b;
266                    int score= (ny*ny*(b2 + t2 - 2*c2) + 4*ny*(b2 - t2) + 32*c2 + 512)>>10;
267                    int i;
268
269                    if((nx&3)==0 && (ny&3)==0) continue;
270
271                    score += (mv_penalty[4*mx + nx - pred_x] + mv_penalty[4*my + ny - pred_y])*penalty_factor;
272
273//                    if(nx&1) score-=1024*c->penalty_factor;
274//                    if(ny&1) score-=1024*c->penalty_factor;
275
276                    for(i=0; i<8; i++){
277                        if(score < best[i]){
278                            memmove(&best[i+1], &best[i], sizeof(int)*(7-i));
279                            memmove(&best_pos[i+1][0], &best_pos[i][0], sizeof(int)*2*(7-i));
280                            best[i]= score;
281                            best_pos[i][0]= nx + 4*mx;
282                            best_pos[i][1]= ny + 4*my;
283                            break;
284                        }
285                    }
286                }
287            }
288        }else{
289            int tl;
290            //FIXME this could overflow (unlikely though)
291            const int cx = 4*(r - l);
292            const int cx2= r + l - 2*c;
293            const int cy = 4*(b - t);
294            const int cy2= b + t - 2*c;
295            int cxy;
296
297            if(map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)] == (my<<ME_MAP_MV_BITS) + mx + map_generation && 0){ //FIXME
298                tl= score_map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
299            }else{
300                tl= cmp(s, mx-1, my-1, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);//FIXME wrong if chroma me is different
301            }
302
303            cxy= 2*tl + (cx + cy)/4 - (cx2 + cy2) - 2*c;
304
305            assert(16*cx2 + 4*cx + 32*c == 32*r);
306            assert(16*cx2 - 4*cx + 32*c == 32*l);
307            assert(16*cy2 + 4*cy + 32*c == 32*b);
308            assert(16*cy2 - 4*cy + 32*c == 32*t);
309            assert(16*cxy + 16*cy2 + 16*cx2 - 4*cy - 4*cx + 32*c == 32*tl);
310
311            for(ny= -3; ny <= 3; ny++){
312                for(nx= -3; nx <= 3; nx++){
313                    //FIXME this could overflow (unlikely though)
314                    int score= ny*nx*cxy + nx*nx*cx2 + ny*ny*cy2 + nx*cx + ny*cy + 32*c; //FIXME factor
315                    int i;
316
317                    if((nx&3)==0 && (ny&3)==0) continue;
318
319                    score += 32*(mv_penalty[4*mx + nx - pred_x] + mv_penalty[4*my + ny - pred_y])*penalty_factor;
320//                    if(nx&1) score-=32*c->penalty_factor;
321  //                  if(ny&1) score-=32*c->penalty_factor;
322
323                    for(i=0; i<8; i++){
324                        if(score < best[i]){
325                            memmove(&best[i+1], &best[i], sizeof(int)*(7-i));
326                            memmove(&best_pos[i+1][0], &best_pos[i][0], sizeof(int)*2*(7-i));
327                            best[i]= score;
328                            best_pos[i][0]= nx + 4*mx;
329                            best_pos[i][1]= ny + 4*my;
330                            break;
331                        }
332                    }
333                }
334            }
335        }
336        for(i=0; i<subpel_quality; i++){
337            nx= best_pos[i][0];
338            ny= best_pos[i][1];
339            CHECK_QUARTER_MV(nx&3, ny&3, nx>>2, ny>>2)
340        }
341
342        assert(bx >= xmin*4 && bx <= xmax*4 && by >= ymin*4 && by <= ymax*4);
343
344        *mx_ptr = bx;
345        *my_ptr = by;
346    }else{
347        *mx_ptr =4*mx;
348        *my_ptr =4*my;
349    }
350
351    return dmin;
352}
353
354
355#define CHECK_MV(x,y)\
356{\
357    const unsigned key = ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\
358    const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\
359    assert((x) >= xmin);\
360    assert((x) <= xmax);\
361    assert((y) >= ymin);\
362    assert((y) <= ymax);\
363/*printf("check_mv %d %d\n", x, y);*/\
364    if(map[index]!=key){\
365        d= cmp(s, x, y, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
366        map[index]= key;\
367        score_map[index]= d;\
368        d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*penalty_factor;\
369/*printf("score:%d\n", d);*/\
370        COPY3_IF_LT(dmin, d, best[0], x, best[1], y)\
371    }\
372}
373
374#define CHECK_CLIPPED_MV(ax,ay)\
375{\
376    const int Lx= ax;\
377    const int Ly= ay;\
378    const int Lx2= FFMAX(xmin, FFMIN(Lx, xmax));\
379    const int Ly2= FFMAX(ymin, FFMIN(Ly, ymax));\
380    CHECK_MV(Lx2, Ly2)\
381}
382
383#define CHECK_MV_DIR(x,y,new_dir)\
384{\
385    const unsigned key = ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\
386    const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\
387/*printf("check_mv_dir %d %d %d\n", x, y, new_dir);*/\
388    if(map[index]!=key){\
389        d= cmp(s, x, y, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
390        map[index]= key;\
391        score_map[index]= d;\
392        d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*penalty_factor;\
393/*printf("score:%d\n", d);*/\
394        if(d<dmin){\
395            best[0]=x;\
396            best[1]=y;\
397            dmin=d;\
398            next_dir= new_dir;\
399        }\
400    }\
401}
402
403#define check(x,y,S,v)\
404if( (x)<(xmin<<(S)) ) printf("%d %d %d %d %d xmin" #v, xmin, (x), (y), s->mb_x, s->mb_y);\
405if( (x)>(xmax<<(S)) ) printf("%d %d %d %d %d xmax" #v, xmax, (x), (y), s->mb_x, s->mb_y);\
406if( (y)<(ymin<<(S)) ) printf("%d %d %d %d %d ymin" #v, ymin, (x), (y), s->mb_x, s->mb_y);\
407if( (y)>(ymax<<(S)) ) printf("%d %d %d %d %d ymax" #v, ymax, (x), (y), s->mb_x, s->mb_y);\
408
409#define LOAD_COMMON2\
410    uint32_t *map= c->map;\
411    const int qpel= flags&FLAG_QPEL;\
412    const int shift= 1+qpel;\
413
414static av_always_inline int small_diamond_search(MpegEncContext * s, int *best, int dmin,
415                                       int src_index, int ref_index, int const penalty_factor,
416                                       int size, int h, int flags)
417{
418    MotionEstContext * const c= &s->me;
419    me_cmp_func cmpf, chroma_cmpf;
420    int next_dir=-1;
421    LOAD_COMMON
422    LOAD_COMMON2
423    unsigned map_generation = c->map_generation;
424
425    cmpf= s->dsp.me_cmp[size];
426    chroma_cmpf= s->dsp.me_cmp[size+1];
427
428    { /* ensure that the best point is in the MAP as h/qpel refinement needs it */
429        const unsigned key = (best[1]<<ME_MAP_MV_BITS) + best[0] + map_generation;
430        const int index= ((best[1]<<ME_MAP_SHIFT) + best[0])&(ME_MAP_SIZE-1);
431        if(map[index]!=key){ //this will be executed only very rarey
432            score_map[index]= cmp(s, best[0], best[1], 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);
433            map[index]= key;
434        }
435    }
436
437    for(;;){
438        int d;
439        const int dir= next_dir;
440        const int x= best[0];
441        const int y= best[1];
442        next_dir=-1;
443
444//printf("%d", dir);
445        if(dir!=2 && x>xmin) CHECK_MV_DIR(x-1, y  , 0)
446        if(dir!=3 && y>ymin) CHECK_MV_DIR(x  , y-1, 1)
447        if(dir!=0 && x<xmax) CHECK_MV_DIR(x+1, y  , 2)
448        if(dir!=1 && y<ymax) CHECK_MV_DIR(x  , y+1, 3)
449
450        if(next_dir==-1){
451            return dmin;
452        }
453    }
454}
455
456static int funny_diamond_search(MpegEncContext * s, int *best, int dmin,
457                                       int src_index, int ref_index, int const penalty_factor,
458                                       int size, int h, int flags)
459{
460    MotionEstContext * const c= &s->me;
461    me_cmp_func cmpf, chroma_cmpf;
462    int dia_size;
463    LOAD_COMMON
464    LOAD_COMMON2
465    unsigned map_generation = c->map_generation;
466
467    cmpf= s->dsp.me_cmp[size];
468    chroma_cmpf= s->dsp.me_cmp[size+1];
469
470    for(dia_size=1; dia_size<=4; dia_size++){
471        int dir;
472        const int x= best[0];
473        const int y= best[1];
474
475        if(dia_size&(dia_size-1)) continue;
476
477        if(   x + dia_size > xmax
478           || x - dia_size < xmin
479           || y + dia_size > ymax
480           || y - dia_size < ymin)
481           continue;
482
483        for(dir= 0; dir<dia_size; dir+=2){
484            int d;
485
486            CHECK_MV(x + dir           , y + dia_size - dir);
487            CHECK_MV(x + dia_size - dir, y - dir           );
488            CHECK_MV(x - dir           , y - dia_size + dir);
489            CHECK_MV(x - dia_size + dir, y + dir           );
490        }
491
492        if(x!=best[0] || y!=best[1])
493            dia_size=0;
494    }
495    return dmin;
496}
497
498static int hex_search(MpegEncContext * s, int *best, int dmin,
499                                       int src_index, int ref_index, int const penalty_factor,
500                                       int size, int h, int flags, int dia_size)
501{
502    MotionEstContext * const c= &s->me;
503    me_cmp_func cmpf, chroma_cmpf;
504    LOAD_COMMON
505    LOAD_COMMON2
506    unsigned map_generation = c->map_generation;
507    int x,y,d;
508    const int dec= dia_size & (dia_size-1);
509
510    cmpf= s->dsp.me_cmp[size];
511    chroma_cmpf= s->dsp.me_cmp[size+1];
512
513    for(;dia_size; dia_size= dec ? dia_size-1 : dia_size>>1){
514        do{
515            x= best[0];
516            y= best[1];
517
518            CHECK_CLIPPED_MV(x  -dia_size    , y);
519            CHECK_CLIPPED_MV(x+  dia_size    , y);
520            CHECK_CLIPPED_MV(x+( dia_size>>1), y+dia_size);
521            CHECK_CLIPPED_MV(x+( dia_size>>1), y-dia_size);
522            if(dia_size>1){
523                CHECK_CLIPPED_MV(x+(-dia_size>>1), y+dia_size);
524                CHECK_CLIPPED_MV(x+(-dia_size>>1), y-dia_size);
525            }
526        }while(best[0] != x || best[1] != y);
527    }
528
529    return dmin;
530}
531
532static int l2s_dia_search(MpegEncContext * s, int *best, int dmin,
533                                       int src_index, int ref_index, int const penalty_factor,
534                                       int size, int h, int flags)
535{
536    MotionEstContext * const c= &s->me;
537    me_cmp_func cmpf, chroma_cmpf;
538    LOAD_COMMON
539    LOAD_COMMON2
540    unsigned map_generation = c->map_generation;
541    int x,y,i,d;
542    int dia_size= c->dia_size&0xFF;
543    const int dec= dia_size & (dia_size-1);
544    static const int hex[8][2]={{-2, 0}, {-1,-1}, { 0,-2}, { 1,-1},
545                                { 2, 0}, { 1, 1}, { 0, 2}, {-1, 1}};
546
547    cmpf= s->dsp.me_cmp[size];
548    chroma_cmpf= s->dsp.me_cmp[size+1];
549
550    for(; dia_size; dia_size= dec ? dia_size-1 : dia_size>>1){
551        do{
552            x= best[0];
553            y= best[1];
554            for(i=0; i<8; i++){
555                CHECK_CLIPPED_MV(x+hex[i][0]*dia_size, y+hex[i][1]*dia_size);
556            }
557        }while(best[0] != x || best[1] != y);
558    }
559
560    x= best[0];
561    y= best[1];
562    CHECK_CLIPPED_MV(x+1, y);
563    CHECK_CLIPPED_MV(x, y+1);
564    CHECK_CLIPPED_MV(x-1, y);
565    CHECK_CLIPPED_MV(x, y-1);
566
567    return dmin;
568}
569
570static int umh_search(MpegEncContext * s, int *best, int dmin,
571                                       int src_index, int ref_index, int const penalty_factor,
572                                       int size, int h, int flags)
573{
574    MotionEstContext * const c= &s->me;
575    me_cmp_func cmpf, chroma_cmpf;
576    LOAD_COMMON
577    LOAD_COMMON2
578    unsigned map_generation = c->map_generation;
579    int x,y,x2,y2, i, j, d;
580    const int dia_size= c->dia_size&0xFE;
581    static const int hex[16][2]={{-4,-2}, {-4,-1}, {-4, 0}, {-4, 1}, {-4, 2},
582                                 { 4,-2}, { 4,-1}, { 4, 0}, { 4, 1}, { 4, 2},
583                                 {-2, 3}, { 0, 4}, { 2, 3},
584                                 {-2,-3}, { 0,-4}, { 2,-3},};
585
586    cmpf= s->dsp.me_cmp[size];
587    chroma_cmpf= s->dsp.me_cmp[size+1];
588
589    x= best[0];
590    y= best[1];
591    for(x2=FFMAX(x-dia_size+1, xmin); x2<=FFMIN(x+dia_size-1,xmax); x2+=2){
592        CHECK_MV(x2, y);
593    }
594    for(y2=FFMAX(y-dia_size/2+1, ymin); y2<=FFMIN(y+dia_size/2-1,ymax); y2+=2){
595        CHECK_MV(x, y2);
596    }
597
598    x= best[0];
599    y= best[1];
600    for(y2=FFMAX(y-2, ymin); y2<=FFMIN(y+2,ymax); y2++){
601        for(x2=FFMAX(x-2, xmin); x2<=FFMIN(x+2,xmax); x2++){
602            CHECK_MV(x2, y2);
603        }
604    }
605
606//FIXME prevent the CLIP stuff
607
608    for(j=1; j<=dia_size/4; j++){
609        for(i=0; i<16; i++){
610            CHECK_CLIPPED_MV(x+hex[i][0]*j, y+hex[i][1]*j);
611        }
612    }
613
614    return hex_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags, 2);
615}
616
617static int full_search(MpegEncContext * s, int *best, int dmin,
618                                       int src_index, int ref_index, int const penalty_factor,
619                                       int size, int h, int flags)
620{
621    MotionEstContext * const c= &s->me;
622    me_cmp_func cmpf, chroma_cmpf;
623    LOAD_COMMON
624    LOAD_COMMON2
625    unsigned map_generation = c->map_generation;
626    int x,y, d;
627    const int dia_size= c->dia_size&0xFF;
628
629    cmpf= s->dsp.me_cmp[size];
630    chroma_cmpf= s->dsp.me_cmp[size+1];
631
632    for(y=FFMAX(-dia_size, ymin); y<=FFMIN(dia_size,ymax); y++){
633        for(x=FFMAX(-dia_size, xmin); x<=FFMIN(dia_size,xmax); x++){
634            CHECK_MV(x, y);
635        }
636    }
637
638    x= best[0];
639    y= best[1];
640    d= dmin;
641    CHECK_CLIPPED_MV(x  , y);
642    CHECK_CLIPPED_MV(x+1, y);
643    CHECK_CLIPPED_MV(x, y+1);
644    CHECK_CLIPPED_MV(x-1, y);
645    CHECK_CLIPPED_MV(x, y-1);
646    best[0]= x;
647    best[1]= y;
648
649    return d;
650}
651
652#define SAB_CHECK_MV(ax,ay)\
653{\
654    const unsigned key = ((ay)<<ME_MAP_MV_BITS) + (ax) + map_generation;\
655    const int index= (((ay)<<ME_MAP_SHIFT) + (ax))&(ME_MAP_SIZE-1);\
656/*printf("sab check %d %d\n", ax, ay);*/\
657    if(map[index]!=key){\
658        d= cmp(s, ax, ay, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
659        map[index]= key;\
660        score_map[index]= d;\
661        d += (mv_penalty[((ax)<<shift)-pred_x] + mv_penalty[((ay)<<shift)-pred_y])*penalty_factor;\
662/*printf("score: %d\n", d);*/\
663        if(d < minima[minima_count-1].height){\
664            int j=0;\
665            \
666            while(d >= minima[j].height) j++;\
667\
668            memmove(&minima [j+1], &minima [j], (minima_count - j - 1)*sizeof(Minima));\
669\
670            minima[j].checked= 0;\
671            minima[j].height= d;\
672            minima[j].x= ax;\
673            minima[j].y= ay;\
674            \
675            i=-1;\
676            continue;\
677        }\
678    }\
679}
680
681#define MAX_SAB_SIZE ME_MAP_SIZE
682static int sab_diamond_search(MpegEncContext * s, int *best, int dmin,
683                                       int src_index, int ref_index, int const penalty_factor,
684                                       int size, int h, int flags)
685{
686    MotionEstContext * const c= &s->me;
687    me_cmp_func cmpf, chroma_cmpf;
688    Minima minima[MAX_SAB_SIZE];
689    const int minima_count= FFABS(c->dia_size);
690    int i, j;
691    LOAD_COMMON
692    LOAD_COMMON2
693    unsigned map_generation = c->map_generation;
694
695    cmpf= s->dsp.me_cmp[size];
696    chroma_cmpf= s->dsp.me_cmp[size+1];
697
698    /*Note j<MAX_SAB_SIZE is needed if MAX_SAB_SIZE < ME_MAP_SIZE as j can
699      become larger due to MVs overflowing their ME_MAP_MV_BITS bits space in map
700     */
701    for(j=i=0; i<ME_MAP_SIZE && j<MAX_SAB_SIZE; i++){
702        uint32_t key= map[i];
703
704        key += (1<<(ME_MAP_MV_BITS-1)) + (1<<(2*ME_MAP_MV_BITS-1));
705
706        if((key&((-1)<<(2*ME_MAP_MV_BITS))) != map_generation) continue;
707
708        minima[j].height= score_map[i];
709        minima[j].x= key & ((1<<ME_MAP_MV_BITS)-1); key>>=ME_MAP_MV_BITS;
710        minima[j].y= key & ((1<<ME_MAP_MV_BITS)-1);
711        minima[j].x-= (1<<(ME_MAP_MV_BITS-1));
712        minima[j].y-= (1<<(ME_MAP_MV_BITS-1));
713
714        // all entries in map should be in range except if the mv overflows their ME_MAP_MV_BITS bits space
715        if(   minima[j].x > xmax || minima[j].x < xmin
716           || minima[j].y > ymax || minima[j].y < ymin)
717            continue;
718
719        minima[j].checked=0;
720        if(minima[j].x || minima[j].y)
721            minima[j].height+= (mv_penalty[((minima[j].x)<<shift)-pred_x] + mv_penalty[((minima[j].y)<<shift)-pred_y])*penalty_factor;
722
723        j++;
724    }
725
726    qsort(minima, j, sizeof(Minima), minima_cmp);
727
728    for(; j<minima_count; j++){
729        minima[j].height=256*256*256*64;
730        minima[j].checked=0;
731        minima[j].x= minima[j].y=0;
732    }
733
734    for(i=0; i<minima_count; i++){
735        const int x= minima[i].x;
736        const int y= minima[i].y;
737        int d;
738
739        if(minima[i].checked) continue;
740
741        if(   x >= xmax || x <= xmin
742           || y >= ymax || y <= ymin)
743           continue;
744
745        SAB_CHECK_MV(x-1, y)
746        SAB_CHECK_MV(x+1, y)
747        SAB_CHECK_MV(x  , y-1)
748        SAB_CHECK_MV(x  , y+1)
749
750        minima[i].checked= 1;
751    }
752
753    best[0]= minima[0].x;
754    best[1]= minima[0].y;
755    dmin= minima[0].height;
756
757    if(   best[0] < xmax && best[0] > xmin
758       && best[1] < ymax && best[1] > ymin){
759        int d;
760        //ensure that the refernece samples for hpel refinement are in the map
761        CHECK_MV(best[0]-1, best[1])
762        CHECK_MV(best[0]+1, best[1])
763        CHECK_MV(best[0], best[1]-1)
764        CHECK_MV(best[0], best[1]+1)
765    }
766    return dmin;
767}
768
769static int var_diamond_search(MpegEncContext * s, int *best, int dmin,
770                                       int src_index, int ref_index, int const penalty_factor,
771                                       int size, int h, int flags)
772{
773    MotionEstContext * const c= &s->me;
774    me_cmp_func cmpf, chroma_cmpf;
775    int dia_size;
776    LOAD_COMMON
777    LOAD_COMMON2
778    unsigned map_generation = c->map_generation;
779
780    cmpf= s->dsp.me_cmp[size];
781    chroma_cmpf= s->dsp.me_cmp[size+1];
782
783    for(dia_size=1; dia_size<=c->dia_size; dia_size++){
784        int dir, start, end;
785        const int x= best[0];
786        const int y= best[1];
787
788        start= FFMAX(0, y + dia_size - ymax);
789        end  = FFMIN(dia_size, xmax - x + 1);
790        for(dir= start; dir<end; dir++){
791            int d;
792
793//check(x + dir,y + dia_size - dir,0, a0)
794            CHECK_MV(x + dir           , y + dia_size - dir);
795        }
796
797        start= FFMAX(0, x + dia_size - xmax);
798        end  = FFMIN(dia_size, y - ymin + 1);
799        for(dir= start; dir<end; dir++){
800            int d;
801
802//check(x + dia_size - dir, y - dir,0, a1)
803            CHECK_MV(x + dia_size - dir, y - dir           );
804        }
805
806        start= FFMAX(0, -y + dia_size + ymin );
807        end  = FFMIN(dia_size, x - xmin + 1);
808        for(dir= start; dir<end; dir++){
809            int d;
810
811//check(x - dir,y - dia_size + dir,0, a2)
812            CHECK_MV(x - dir           , y - dia_size + dir);
813        }
814
815        start= FFMAX(0, -x + dia_size + xmin );
816        end  = FFMIN(dia_size, ymax - y + 1);
817        for(dir= start; dir<end; dir++){
818            int d;
819
820//check(x - dia_size + dir, y + dir,0, a3)
821            CHECK_MV(x - dia_size + dir, y + dir           );
822        }
823
824        if(x!=best[0] || y!=best[1])
825            dia_size=0;
826    }
827    return dmin;
828}
829
830static av_always_inline int diamond_search(MpegEncContext * s, int *best, int dmin,
831                                       int src_index, int ref_index, int const penalty_factor,
832                                       int size, int h, int flags){
833    MotionEstContext * const c= &s->me;
834    if(c->dia_size==-1)
835        return funny_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
836    else if(c->dia_size<-1)
837        return   sab_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
838    else if(c->dia_size<2)
839        return small_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
840    else if(c->dia_size>1024)
841        return          full_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
842    else if(c->dia_size>768)
843        return           umh_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
844    else if(c->dia_size>512)
845        return           hex_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags, c->dia_size&0xFF);
846    else if(c->dia_size>256)
847        return       l2s_dia_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
848    else
849        return   var_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
850}
851
852/**
853   @param P a list of candidate mvs to check before starting the
854   iterative search. If one of the candidates is close to the optimal mv, then
855   it takes fewer iterations. And it increases the chance that we find the
856   optimal mv.
857 */
858static av_always_inline int epzs_motion_search_internal(MpegEncContext * s, int *mx_ptr, int *my_ptr,
859                             int P[10][2], int src_index, int ref_index, int16_t (*last_mv)[2],
860                             int ref_mv_scale, int flags, int size, int h)
861{
862    MotionEstContext * const c= &s->me;
863    int best[2]={0, 0};      /**< x and y coordinates of the best motion vector.
864                               i.e. the difference between the position of the
865                               block currently being encoded and the position of
866                               the block chosen to predict it from. */
867    int d;                   ///< the score (cmp + penalty) of any given mv
868    int dmin;                /**< the best value of d, i.e. the score
869                               corresponding to the mv stored in best[]. */
870    unsigned map_generation;
871    int penalty_factor;
872    const int ref_mv_stride= s->mb_stride; //pass as arg  FIXME
873    const int ref_mv_xy= s->mb_x + s->mb_y*ref_mv_stride; //add to last_mv beforepassing FIXME
874    me_cmp_func cmpf, chroma_cmpf;
875
876    LOAD_COMMON
877    LOAD_COMMON2
878
879    if(c->pre_pass){
880        penalty_factor= c->pre_penalty_factor;
881        cmpf= s->dsp.me_pre_cmp[size];
882        chroma_cmpf= s->dsp.me_pre_cmp[size+1];
883    }else{
884        penalty_factor= c->penalty_factor;
885        cmpf= s->dsp.me_cmp[size];
886        chroma_cmpf= s->dsp.me_cmp[size+1];
887    }
888
889    map_generation= update_map_generation(c);
890
891    assert(cmpf);
892    dmin= cmp(s, 0, 0, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);
893    map[0]= map_generation;
894    score_map[0]= dmin;
895
896    //FIXME precalc first term below?
897    if((s->pict_type == AV_PICTURE_TYPE_B && !(c->flags & FLAG_DIRECT)) || s->flags&CODEC_FLAG_MV0)
898        dmin += (mv_penalty[pred_x] + mv_penalty[pred_y])*penalty_factor;
899
900    /* first line */
901    if (s->first_slice_line) {
902        CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
903        CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
904                        (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
905    }else{
906        if(dmin<((h*h*s->avctx->mv0_threshold)>>8)
907                    && ( P_LEFT[0]    |P_LEFT[1]
908                        |P_TOP[0]     |P_TOP[1]
909                        |P_TOPRIGHT[0]|P_TOPRIGHT[1])==0){
910            *mx_ptr= 0;
911            *my_ptr= 0;
912            c->skip=1;
913            return dmin;
914        }
915        CHECK_MV(    P_MEDIAN[0] >>shift ,    P_MEDIAN[1] >>shift)
916        CHECK_CLIPPED_MV((P_MEDIAN[0]>>shift)  , (P_MEDIAN[1]>>shift)-1)
917        CHECK_CLIPPED_MV((P_MEDIAN[0]>>shift)  , (P_MEDIAN[1]>>shift)+1)
918        CHECK_CLIPPED_MV((P_MEDIAN[0]>>shift)-1, (P_MEDIAN[1]>>shift)  )
919        CHECK_CLIPPED_MV((P_MEDIAN[0]>>shift)+1, (P_MEDIAN[1]>>shift)  )
920        CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
921                        (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
922        CHECK_MV(P_LEFT[0]    >>shift, P_LEFT[1]    >>shift)
923        CHECK_MV(P_TOP[0]     >>shift, P_TOP[1]     >>shift)
924        CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift)
925    }
926    if(dmin>h*h*4){
927        if(c->pre_pass){
928            CHECK_CLIPPED_MV((last_mv[ref_mv_xy-1][0]*ref_mv_scale + (1<<15))>>16,
929                            (last_mv[ref_mv_xy-1][1]*ref_mv_scale + (1<<15))>>16)
930            if(!s->first_slice_line)
931                CHECK_CLIPPED_MV((last_mv[ref_mv_xy-ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
932                                (last_mv[ref_mv_xy-ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
933        }else{
934            CHECK_CLIPPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16,
935                            (last_mv[ref_mv_xy+1][1]*ref_mv_scale + (1<<15))>>16)
936            if(s->mb_y+1<s->end_mb_y)  //FIXME replace at least with last_slice_line
937                CHECK_CLIPPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
938                                (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
939        }
940    }
941
942    if(c->avctx->last_predictor_count){
943        const int count= c->avctx->last_predictor_count;
944        const int xstart= FFMAX(0, s->mb_x - count);
945        const int ystart= FFMAX(0, s->mb_y - count);
946        const int xend= FFMIN(s->mb_width , s->mb_x + count + 1);
947        const int yend= FFMIN(s->mb_height, s->mb_y + count + 1);
948        int mb_y;
949
950        for(mb_y=ystart; mb_y<yend; mb_y++){
951            int mb_x;
952            for(mb_x=xstart; mb_x<xend; mb_x++){
953                const int xy= mb_x + 1 + (mb_y + 1)*ref_mv_stride;
954                int mx= (last_mv[xy][0]*ref_mv_scale + (1<<15))>>16;
955                int my= (last_mv[xy][1]*ref_mv_scale + (1<<15))>>16;
956
957                if(mx>xmax || mx<xmin || my>ymax || my<ymin) continue;
958                CHECK_MV(mx,my)
959            }
960        }
961    }
962
963//check(best[0],best[1],0, b0)
964    dmin= diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
965
966//check(best[0],best[1],0, b1)
967    *mx_ptr= best[0];
968    *my_ptr= best[1];
969
970//    printf("%d %d %d \n", best[0], best[1], dmin);
971    return dmin;
972}
973
974//this function is dedicated to the braindamaged gcc
975inline int ff_epzs_motion_search(MpegEncContext * s, int *mx_ptr, int *my_ptr,
976                             int P[10][2], int src_index, int ref_index, int16_t (*last_mv)[2],
977                             int ref_mv_scale, int size, int h)
978{
979    MotionEstContext * const c= &s->me;
980//FIXME convert other functions in the same way if faster
981    if(c->flags==0 && h==16 && size==0){
982        return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, 0, 0, 16);
983//    case FLAG_QPEL:
984//        return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, FLAG_QPEL);
985    }else{
986        return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, c->flags, size, h);
987    }
988}
989
990static int epzs_motion_search4(MpegEncContext * s,
991                             int *mx_ptr, int *my_ptr, int P[10][2],
992                             int src_index, int ref_index, int16_t (*last_mv)[2],
993                             int ref_mv_scale)
994{
995    MotionEstContext * const c= &s->me;
996    int best[2]={0, 0};
997    int d, dmin;
998    unsigned map_generation;
999    const int penalty_factor= c->penalty_factor;
1000    const int size=1;
1001    const int h=8;
1002    const int ref_mv_stride= s->mb_stride;
1003    const int ref_mv_xy= s->mb_x + s->mb_y *ref_mv_stride;
1004    me_cmp_func cmpf, chroma_cmpf;
1005    LOAD_COMMON
1006    int flags= c->flags;
1007    LOAD_COMMON2
1008
1009    cmpf= s->dsp.me_cmp[size];
1010    chroma_cmpf= s->dsp.me_cmp[size+1];
1011
1012    map_generation= update_map_generation(c);
1013
1014    dmin = 1000000;
1015//printf("%d %d %d %d //",xmin, ymin, xmax, ymax);
1016    /* first line */
1017    if (s->first_slice_line) {
1018        CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
1019        CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
1020                        (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
1021        CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
1022    }else{
1023        CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
1024        //FIXME try some early stop
1025        CHECK_MV(P_MEDIAN[0]>>shift, P_MEDIAN[1]>>shift)
1026        CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
1027        CHECK_MV(P_TOP[0]>>shift, P_TOP[1]>>shift)
1028        CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift)
1029        CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
1030                        (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
1031    }
1032    if(dmin>64*4){
1033        CHECK_CLIPPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16,
1034                        (last_mv[ref_mv_xy+1][1]*ref_mv_scale + (1<<15))>>16)
1035        if(s->mb_y+1<s->end_mb_y)  //FIXME replace at least with last_slice_line
1036            CHECK_CLIPPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
1037                            (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
1038    }
1039
1040    dmin= diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
1041
1042    *mx_ptr= best[0];
1043    *my_ptr= best[1];
1044
1045//    printf("%d %d %d \n", best[0], best[1], dmin);
1046    return dmin;
1047}
1048
1049//try to merge with above FIXME (needs PSNR test)
1050static int epzs_motion_search2(MpegEncContext * s,
1051                             int *mx_ptr, int *my_ptr, int P[10][2],
1052                             int src_index, int ref_index, int16_t (*last_mv)[2],
1053                             int ref_mv_scale)
1054{
1055    MotionEstContext * const c= &s->me;
1056    int best[2]={0, 0};
1057    int d, dmin;
1058    unsigned map_generation;
1059    const int penalty_factor= c->penalty_factor;
1060    const int size=0; //FIXME pass as arg
1061    const int h=8;
1062    const int ref_mv_stride= s->mb_stride;
1063    const int ref_mv_xy= s->mb_x + s->mb_y *ref_mv_stride;
1064    me_cmp_func cmpf, chroma_cmpf;
1065    LOAD_COMMON
1066    int flags= c->flags;
1067    LOAD_COMMON2
1068
1069    cmpf= s->dsp.me_cmp[size];
1070    chroma_cmpf= s->dsp.me_cmp[size+1];
1071
1072    map_generation= update_map_generation(c);
1073
1074    dmin = 1000000;
1075//printf("%d %d %d %d //",xmin, ymin, xmax, ymax);
1076    /* first line */
1077    if (s->first_slice_line) {
1078        CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
1079        CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
1080                        (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
1081        CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
1082    }else{
1083        CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
1084        //FIXME try some early stop
1085        CHECK_MV(P_MEDIAN[0]>>shift, P_MEDIAN[1]>>shift)
1086        CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
1087        CHECK_MV(P_TOP[0]>>shift, P_TOP[1]>>shift)
1088        CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift)
1089        CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
1090                        (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
1091    }
1092    if(dmin>64*4){
1093        CHECK_CLIPPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16,
1094                        (last_mv[ref_mv_xy+1][1]*ref_mv_scale + (1<<15))>>16)
1095        if(s->mb_y+1<s->end_mb_y)  //FIXME replace at least with last_slice_line
1096            CHECK_CLIPPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
1097                            (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
1098    }
1099
1100    dmin= diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
1101
1102    *mx_ptr= best[0];
1103    *my_ptr= best[1];
1104
1105//    printf("%d %d %d \n", best[0], best[1], dmin);
1106    return dmin;
1107}
1108