1/*
2 * Motion estimation
3 * Copyright (c) 2002-2004 Michael Niedermayer
4 *
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
22/**
23 * @file libavcodec/motion_est_template.c
24 * Motion estimation template.
25 */
26
27//Let us hope gcc will remove the unused vars ...(gcc 3.2.2 seems to do it ...)
28#define LOAD_COMMON\
29    uint32_t av_unused * const score_map= c->score_map;\
30    const int av_unused xmin= c->xmin;\
31    const int av_unused ymin= c->ymin;\
32    const int av_unused xmax= c->xmax;\
33    const int av_unused ymax= c->ymax;\
34    uint8_t *mv_penalty= c->current_mv_penalty;\
35    const int pred_x= c->pred_x;\
36    const int pred_y= c->pred_y;\
37
38#define CHECK_HALF_MV(dx, dy, x, y)\
39{\
40    const int hx= 2*(x)+(dx);\
41    const int hy= 2*(y)+(dy);\
42    d= cmp(s, x, y, dx, dy, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);\
43    d += (mv_penalty[hx - pred_x] + mv_penalty[hy - pred_y])*penalty_factor;\
44    COPY3_IF_LT(dmin, d, bx, hx, by, hy)\
45}
46
47#if 0
48static int hpel_motion_search)(MpegEncContext * s,
49                                  int *mx_ptr, int *my_ptr, int dmin,
50                                  uint8_t *ref_data[3],
51                                  int size)
52{
53    const int xx = 16 * s->mb_x + 8*(n&1);
54    const int yy = 16 * s->mb_y + 8*(n>>1);
55    const int mx = *mx_ptr;
56    const int my = *my_ptr;
57    const int penalty_factor= c->sub_penalty_factor;
58
59    LOAD_COMMON
60
61 //   INIT;
62 //FIXME factorize
63    me_cmp_func cmp, chroma_cmp, cmp_sub, chroma_cmp_sub;
64
65    if(s->no_rounding /*FIXME b_type*/){
66        hpel_put= &s->dsp.put_no_rnd_pixels_tab[size];
67        chroma_hpel_put= &s->dsp.put_no_rnd_pixels_tab[size+1];
68    }else{
69        hpel_put=& s->dsp.put_pixels_tab[size];
70        chroma_hpel_put= &s->dsp.put_pixels_tab[size+1];
71    }
72    cmpf= s->dsp.me_cmp[size];
73    chroma_cmpf= s->dsp.me_cmp[size+1];
74    cmp_sub= s->dsp.me_sub_cmp[size];
75    chroma_cmp_sub= s->dsp.me_sub_cmp[size+1];
76
77    if(c->skip){ //FIXME somehow move up (benchmark)
78        *mx_ptr = 0;
79        *my_ptr = 0;
80        return dmin;
81    }
82
83    if(c->avctx->me_cmp != c->avctx->me_sub_cmp){
84        CMP_HPEL(dmin, 0, 0, mx, my, size);
85        if(mx || my)
86            dmin += (mv_penalty[2*mx - pred_x] + mv_penalty[2*my - pred_y])*penalty_factor;
87    }
88
89    if (mx > xmin && mx < xmax &&
90        my > ymin && my < ymax) {
91        int bx=2*mx, by=2*my;
92        int d= dmin;
93
94        CHECK_HALF_MV(1, 1, mx-1, my-1)
95        CHECK_HALF_MV(0, 1, mx  , my-1)
96        CHECK_HALF_MV(1, 1, mx  , my-1)
97        CHECK_HALF_MV(1, 0, mx-1, my  )
98        CHECK_HALF_MV(1, 0, mx  , my  )
99        CHECK_HALF_MV(1, 1, mx-1, my  )
100        CHECK_HALF_MV(0, 1, mx  , my  )
101        CHECK_HALF_MV(1, 1, mx  , my  )
102
103        assert(bx >= xmin*2 || bx <= xmax*2 || by >= ymin*2 || by <= ymax*2);
104
105        *mx_ptr = bx;
106        *my_ptr = by;
107    }else{
108        *mx_ptr =2*mx;
109        *my_ptr =2*my;
110    }
111
112    return dmin;
113}
114
115#else
116static int hpel_motion_search(MpegEncContext * s,
117                                  int *mx_ptr, int *my_ptr, int dmin,
118                                  int src_index, int ref_index,
119                                  int size, int h)
120{
121    MotionEstContext * const c= &s->me;
122    const int mx = *mx_ptr;
123    const int my = *my_ptr;
124    const int penalty_factor= c->sub_penalty_factor;
125    me_cmp_func cmp_sub, chroma_cmp_sub;
126    int bx=2*mx, by=2*my;
127
128    LOAD_COMMON
129    int flags= c->sub_flags;
130
131 //FIXME factorize
132
133    cmp_sub= s->dsp.me_sub_cmp[size];
134    chroma_cmp_sub= s->dsp.me_sub_cmp[size+1];
135
136    if(c->skip){ //FIXME move out of hpel?
137        *mx_ptr = 0;
138        *my_ptr = 0;
139        return dmin;
140    }
141
142    if(c->avctx->me_cmp != c->avctx->me_sub_cmp){
143        dmin= cmp(s, mx, my, 0, 0, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);
144        if(mx || my || size>0)
145            dmin += (mv_penalty[2*mx - pred_x] + mv_penalty[2*my - pred_y])*penalty_factor;
146    }
147
148    if (mx > xmin && mx < xmax &&
149        my > ymin && my < ymax) {
150        int d= dmin;
151        const int index= (my<<ME_MAP_SHIFT) + mx;
152        const int t= score_map[(index-(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)]
153                     + (mv_penalty[bx   - pred_x] + mv_penalty[by-2 - pred_y])*c->penalty_factor;
154        const int l= score_map[(index- 1               )&(ME_MAP_SIZE-1)]
155                     + (mv_penalty[bx-2 - pred_x] + mv_penalty[by   - pred_y])*c->penalty_factor;
156        const int r= score_map[(index+ 1               )&(ME_MAP_SIZE-1)]
157                     + (mv_penalty[bx+2 - pred_x] + mv_penalty[by   - pred_y])*c->penalty_factor;
158        const int b= score_map[(index+(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)]
159                     + (mv_penalty[bx   - pred_x] + mv_penalty[by+2 - pred_y])*c->penalty_factor;
160
161#if 1
162        int key;
163        int map_generation= c->map_generation;
164#ifndef NDEBUG
165        uint32_t *map= c->map;
166#endif
167        key= ((my-1)<<ME_MAP_MV_BITS) + (mx) + map_generation;
168        assert(map[(index-(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)] == key);
169        key= ((my+1)<<ME_MAP_MV_BITS) + (mx) + map_generation;
170        assert(map[(index+(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)] == key);
171        key= ((my)<<ME_MAP_MV_BITS) + (mx+1) + map_generation;
172        assert(map[(index+1)&(ME_MAP_SIZE-1)] == key);
173        key= ((my)<<ME_MAP_MV_BITS) + (mx-1) + map_generation;
174        assert(map[(index-1)&(ME_MAP_SIZE-1)] == key);
175#endif
176        if(t<=b){
177            CHECK_HALF_MV(0, 1, mx  ,my-1)
178            if(l<=r){
179                CHECK_HALF_MV(1, 1, mx-1, my-1)
180                if(t+r<=b+l){
181                    CHECK_HALF_MV(1, 1, mx  , my-1)
182                }else{
183                    CHECK_HALF_MV(1, 1, mx-1, my  )
184                }
185                CHECK_HALF_MV(1, 0, mx-1, my  )
186            }else{
187                CHECK_HALF_MV(1, 1, mx  , my-1)
188                if(t+l<=b+r){
189                    CHECK_HALF_MV(1, 1, mx-1, my-1)
190                }else{
191                    CHECK_HALF_MV(1, 1, mx  , my  )
192                }
193                CHECK_HALF_MV(1, 0, mx  , my  )
194            }
195        }else{
196            if(l<=r){
197                if(t+l<=b+r){
198                    CHECK_HALF_MV(1, 1, mx-1, my-1)
199                }else{
200                    CHECK_HALF_MV(1, 1, mx  , my  )
201                }
202                CHECK_HALF_MV(1, 0, mx-1, my)
203                CHECK_HALF_MV(1, 1, mx-1, my)
204            }else{
205                if(t+r<=b+l){
206                    CHECK_HALF_MV(1, 1, mx  , my-1)
207                }else{
208                    CHECK_HALF_MV(1, 1, mx-1, my)
209                }
210                CHECK_HALF_MV(1, 0, mx  , my)
211                CHECK_HALF_MV(1, 1, mx  , my)
212            }
213            CHECK_HALF_MV(0, 1, mx  , my)
214        }
215        assert(bx >= xmin*2 && bx <= xmax*2 && by >= ymin*2 && by <= ymax*2);
216    }
217
218    *mx_ptr = bx;
219    *my_ptr = by;
220
221    return dmin;
222}
223#endif
224
225static int no_sub_motion_search(MpegEncContext * s,
226          int *mx_ptr, int *my_ptr, int dmin,
227                                  int src_index, int ref_index,
228                                  int size, int h)
229{
230    (*mx_ptr)<<=1;
231    (*my_ptr)<<=1;
232    return dmin;
233}
234
235inline int ff_get_mb_score(MpegEncContext * s, int mx, int my, int src_index,
236                               int ref_index, int size, int h, int add_rate)
237{
238//    const int check_luma= s->dsp.me_sub_cmp != s->dsp.mb_cmp;
239    MotionEstContext * const c= &s->me;
240    const int penalty_factor= c->mb_penalty_factor;
241    const int flags= c->mb_flags;
242    const int qpel= flags & FLAG_QPEL;
243    const int mask= 1+2*qpel;
244    me_cmp_func cmp_sub, chroma_cmp_sub;
245    int d;
246
247    LOAD_COMMON
248
249 //FIXME factorize
250
251    cmp_sub= s->dsp.mb_cmp[size];
252    chroma_cmp_sub= s->dsp.mb_cmp[size+1];
253
254//    assert(!c->skip);
255//    assert(c->avctx->me_sub_cmp != c->avctx->mb_cmp);
256
257    d= cmp(s, mx>>(qpel+1), my>>(qpel+1), mx&mask, my&mask, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);
258    //FIXME check cbp before adding penalty for (0,0) vector
259    if(add_rate && (mx || my || size>0))
260        d += (mv_penalty[mx - pred_x] + mv_penalty[my - pred_y])*penalty_factor;
261
262    return d;
263}
264
265#define CHECK_QUARTER_MV(dx, dy, x, y)\
266{\
267    const int hx= 4*(x)+(dx);\
268    const int hy= 4*(y)+(dy);\
269    d= cmp(s, x, y, dx, dy, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
270    d += (mv_penalty[hx - pred_x] + mv_penalty[hy - pred_y])*penalty_factor;\
271    COPY3_IF_LT(dmin, d, bx, hx, by, hy)\
272}
273
274static int qpel_motion_search(MpegEncContext * s,
275                                  int *mx_ptr, int *my_ptr, int dmin,
276                                  int src_index, int ref_index,
277                                  int size, int h)
278{
279    MotionEstContext * const c= &s->me;
280    const int mx = *mx_ptr;
281    const int my = *my_ptr;
282    const int penalty_factor= c->sub_penalty_factor;
283    const int map_generation= c->map_generation;
284    const int subpel_quality= c->avctx->me_subpel_quality;
285    uint32_t *map= c->map;
286    me_cmp_func cmpf, chroma_cmpf;
287    me_cmp_func cmp_sub, chroma_cmp_sub;
288
289    LOAD_COMMON
290    int flags= c->sub_flags;
291
292    cmpf= s->dsp.me_cmp[size];
293    chroma_cmpf= s->dsp.me_cmp[size+1]; //factorize FIXME
294 //FIXME factorize
295
296    cmp_sub= s->dsp.me_sub_cmp[size];
297    chroma_cmp_sub= s->dsp.me_sub_cmp[size+1];
298
299    if(c->skip){ //FIXME somehow move up (benchmark)
300        *mx_ptr = 0;
301        *my_ptr = 0;
302        return dmin;
303    }
304
305    if(c->avctx->me_cmp != c->avctx->me_sub_cmp){
306        dmin= cmp(s, mx, my, 0, 0, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);
307        if(mx || my || size>0)
308            dmin += (mv_penalty[4*mx - pred_x] + mv_penalty[4*my - pred_y])*penalty_factor;
309    }
310
311    if (mx > xmin && mx < xmax &&
312        my > ymin && my < ymax) {
313        int bx=4*mx, by=4*my;
314        int d= dmin;
315        int i, nx, ny;
316        const int index= (my<<ME_MAP_SHIFT) + mx;
317        const int t= score_map[(index-(1<<ME_MAP_SHIFT)  )&(ME_MAP_SIZE-1)];
318        const int l= score_map[(index- 1                 )&(ME_MAP_SIZE-1)];
319        const int r= score_map[(index+ 1                 )&(ME_MAP_SIZE-1)];
320        const int b= score_map[(index+(1<<ME_MAP_SHIFT)  )&(ME_MAP_SIZE-1)];
321        const int c= score_map[(index                    )&(ME_MAP_SIZE-1)];
322        int best[8];
323        int best_pos[8][2];
324
325        memset(best, 64, sizeof(int)*8);
326#if 1
327        if(s->me.dia_size>=2){
328            const int tl= score_map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
329            const int bl= score_map[(index+(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
330            const int tr= score_map[(index-(1<<ME_MAP_SHIFT)+1)&(ME_MAP_SIZE-1)];
331            const int br= score_map[(index+(1<<ME_MAP_SHIFT)+1)&(ME_MAP_SIZE-1)];
332
333            for(ny= -3; ny <= 3; ny++){
334                for(nx= -3; nx <= 3; nx++){
335                    //FIXME this could overflow (unlikely though)
336                    const int64_t t2= nx*nx*(tr + tl - 2*t) + 4*nx*(tr-tl) + 32*t;
337                    const int64_t c2= nx*nx*( r +  l - 2*c) + 4*nx*( r- l) + 32*c;
338                    const int64_t b2= nx*nx*(br + bl - 2*b) + 4*nx*(br-bl) + 32*b;
339                    int score= (ny*ny*(b2 + t2 - 2*c2) + 4*ny*(b2 - t2) + 32*c2 + 512)>>10;
340                    int i;
341
342                    if((nx&3)==0 && (ny&3)==0) continue;
343
344                    score += (mv_penalty[4*mx + nx - pred_x] + mv_penalty[4*my + ny - pred_y])*penalty_factor;
345
346//                    if(nx&1) score-=1024*c->penalty_factor;
347//                    if(ny&1) score-=1024*c->penalty_factor;
348
349                    for(i=0; i<8; i++){
350                        if(score < best[i]){
351                            memmove(&best[i+1], &best[i], sizeof(int)*(7-i));
352                            memmove(&best_pos[i+1][0], &best_pos[i][0], sizeof(int)*2*(7-i));
353                            best[i]= score;
354                            best_pos[i][0]= nx + 4*mx;
355                            best_pos[i][1]= ny + 4*my;
356                            break;
357                        }
358                    }
359                }
360            }
361        }else{
362            int tl;
363            //FIXME this could overflow (unlikely though)
364            const int cx = 4*(r - l);
365            const int cx2= r + l - 2*c;
366            const int cy = 4*(b - t);
367            const int cy2= b + t - 2*c;
368            int cxy;
369
370            if(map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)] == (my<<ME_MAP_MV_BITS) + mx + map_generation && 0){ //FIXME
371                tl= score_map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
372            }else{
373                tl= cmp(s, mx-1, my-1, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);//FIXME wrong if chroma me is different
374            }
375
376            cxy= 2*tl + (cx + cy)/4 - (cx2 + cy2) - 2*c;
377
378            assert(16*cx2 + 4*cx + 32*c == 32*r);
379            assert(16*cx2 - 4*cx + 32*c == 32*l);
380            assert(16*cy2 + 4*cy + 32*c == 32*b);
381            assert(16*cy2 - 4*cy + 32*c == 32*t);
382            assert(16*cxy + 16*cy2 + 16*cx2 - 4*cy - 4*cx + 32*c == 32*tl);
383
384            for(ny= -3; ny <= 3; ny++){
385                for(nx= -3; nx <= 3; nx++){
386                    //FIXME this could overflow (unlikely though)
387                    int score= ny*nx*cxy + nx*nx*cx2 + ny*ny*cy2 + nx*cx + ny*cy + 32*c; //FIXME factor
388                    int i;
389
390                    if((nx&3)==0 && (ny&3)==0) continue;
391
392                    score += 32*(mv_penalty[4*mx + nx - pred_x] + mv_penalty[4*my + ny - pred_y])*penalty_factor;
393//                    if(nx&1) score-=32*c->penalty_factor;
394  //                  if(ny&1) score-=32*c->penalty_factor;
395
396                    for(i=0; i<8; i++){
397                        if(score < best[i]){
398                            memmove(&best[i+1], &best[i], sizeof(int)*(7-i));
399                            memmove(&best_pos[i+1][0], &best_pos[i][0], sizeof(int)*2*(7-i));
400                            best[i]= score;
401                            best_pos[i][0]= nx + 4*mx;
402                            best_pos[i][1]= ny + 4*my;
403                            break;
404                        }
405                    }
406                }
407            }
408        }
409        for(i=0; i<subpel_quality; i++){
410            nx= best_pos[i][0];
411            ny= best_pos[i][1];
412            CHECK_QUARTER_MV(nx&3, ny&3, nx>>2, ny>>2)
413        }
414
415#if 0
416            const int tl= score_map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
417            const int bl= score_map[(index+(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
418            const int tr= score_map[(index-(1<<ME_MAP_SHIFT)+1)&(ME_MAP_SIZE-1)];
419            const int br= score_map[(index+(1<<ME_MAP_SHIFT)+1)&(ME_MAP_SIZE-1)];
420//            if(l < r && l < t && l < b && l < tl && l < bl && l < tr && l < br && bl < tl){
421            if(tl<br){
422
423//            nx= FFMAX(4*mx - bx, bx - 4*mx);
424//            ny= FFMAX(4*my - by, by - 4*my);
425
426            static int stats[7][7], count;
427            count++;
428            stats[4*mx - bx + 3][4*my - by + 3]++;
429            if(256*256*256*64 % count ==0){
430                for(i=0; i<49; i++){
431                    if((i%7)==0) printf("\n");
432                    printf("%6d ", stats[0][i]);
433                }
434                printf("\n");
435            }
436            }
437#endif
438#else
439
440        CHECK_QUARTER_MV(2, 2, mx-1, my-1)
441        CHECK_QUARTER_MV(0, 2, mx  , my-1)
442        CHECK_QUARTER_MV(2, 2, mx  , my-1)
443        CHECK_QUARTER_MV(2, 0, mx  , my  )
444        CHECK_QUARTER_MV(2, 2, mx  , my  )
445        CHECK_QUARTER_MV(0, 2, mx  , my  )
446        CHECK_QUARTER_MV(2, 2, mx-1, my  )
447        CHECK_QUARTER_MV(2, 0, mx-1, my  )
448
449        nx= bx;
450        ny= by;
451
452        for(i=0; i<8; i++){
453            int ox[8]= {0, 1, 1, 1, 0,-1,-1,-1};
454            int oy[8]= {1, 1, 0,-1,-1,-1, 0, 1};
455            CHECK_QUARTER_MV((nx + ox[i])&3, (ny + oy[i])&3, (nx + ox[i])>>2, (ny + oy[i])>>2)
456        }
457#endif
458#if 0
459        //outer ring
460        CHECK_QUARTER_MV(1, 3, mx-1, my-1)
461        CHECK_QUARTER_MV(1, 2, mx-1, my-1)
462        CHECK_QUARTER_MV(1, 1, mx-1, my-1)
463        CHECK_QUARTER_MV(2, 1, mx-1, my-1)
464        CHECK_QUARTER_MV(3, 1, mx-1, my-1)
465        CHECK_QUARTER_MV(0, 1, mx  , my-1)
466        CHECK_QUARTER_MV(1, 1, mx  , my-1)
467        CHECK_QUARTER_MV(2, 1, mx  , my-1)
468        CHECK_QUARTER_MV(3, 1, mx  , my-1)
469        CHECK_QUARTER_MV(3, 2, mx  , my-1)
470        CHECK_QUARTER_MV(3, 3, mx  , my-1)
471        CHECK_QUARTER_MV(3, 0, mx  , my  )
472        CHECK_QUARTER_MV(3, 1, mx  , my  )
473        CHECK_QUARTER_MV(3, 2, mx  , my  )
474        CHECK_QUARTER_MV(3, 3, mx  , my  )
475        CHECK_QUARTER_MV(2, 3, mx  , my  )
476        CHECK_QUARTER_MV(1, 3, mx  , my  )
477        CHECK_QUARTER_MV(0, 3, mx  , my  )
478        CHECK_QUARTER_MV(3, 3, mx-1, my  )
479        CHECK_QUARTER_MV(2, 3, mx-1, my  )
480        CHECK_QUARTER_MV(1, 3, mx-1, my  )
481        CHECK_QUARTER_MV(1, 2, mx-1, my  )
482        CHECK_QUARTER_MV(1, 1, mx-1, my  )
483        CHECK_QUARTER_MV(1, 0, mx-1, my  )
484#endif
485        assert(bx >= xmin*4 && bx <= xmax*4 && by >= ymin*4 && by <= ymax*4);
486
487        *mx_ptr = bx;
488        *my_ptr = by;
489    }else{
490        *mx_ptr =4*mx;
491        *my_ptr =4*my;
492    }
493
494    return dmin;
495}
496
497
498#define CHECK_MV(x,y)\
499{\
500    const int key= ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\
501    const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\
502    assert((x) >= xmin);\
503    assert((x) <= xmax);\
504    assert((y) >= ymin);\
505    assert((y) <= ymax);\
506/*printf("check_mv %d %d\n", x, y);*/\
507    if(map[index]!=key){\
508        d= cmp(s, x, y, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
509        map[index]= key;\
510        score_map[index]= d;\
511        d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*penalty_factor;\
512/*printf("score:%d\n", d);*/\
513        COPY3_IF_LT(dmin, d, best[0], x, best[1], y)\
514    }\
515}
516
517#define CHECK_CLIPPED_MV(ax,ay)\
518{\
519    const int Lx= ax;\
520    const int Ly= ay;\
521    const int Lx2= FFMAX(xmin, FFMIN(Lx, xmax));\
522    const int Ly2= FFMAX(ymin, FFMIN(Ly, ymax));\
523    CHECK_MV(Lx2, Ly2)\
524}
525
526#define CHECK_MV_DIR(x,y,new_dir)\
527{\
528    const int key= ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\
529    const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\
530/*printf("check_mv_dir %d %d %d\n", x, y, new_dir);*/\
531    if(map[index]!=key){\
532        d= cmp(s, x, y, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
533        map[index]= key;\
534        score_map[index]= d;\
535        d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*penalty_factor;\
536/*printf("score:%d\n", d);*/\
537        if(d<dmin){\
538            best[0]=x;\
539            best[1]=y;\
540            dmin=d;\
541            next_dir= new_dir;\
542        }\
543    }\
544}
545
546#define check(x,y,S,v)\
547if( (x)<(xmin<<(S)) ) printf("%d %d %d %d %d xmin" #v, xmin, (x), (y), s->mb_x, s->mb_y);\
548if( (x)>(xmax<<(S)) ) printf("%d %d %d %d %d xmax" #v, xmax, (x), (y), s->mb_x, s->mb_y);\
549if( (y)<(ymin<<(S)) ) printf("%d %d %d %d %d ymin" #v, ymin, (x), (y), s->mb_x, s->mb_y);\
550if( (y)>(ymax<<(S)) ) printf("%d %d %d %d %d ymax" #v, ymax, (x), (y), s->mb_x, s->mb_y);\
551
552#define LOAD_COMMON2\
553    uint32_t *map= c->map;\
554    const int qpel= flags&FLAG_QPEL;\
555    const int shift= 1+qpel;\
556
557static av_always_inline int small_diamond_search(MpegEncContext * s, int *best, int dmin,
558                                       int src_index, int ref_index, int const penalty_factor,
559                                       int size, int h, int flags)
560{
561    MotionEstContext * const c= &s->me;
562    me_cmp_func cmpf, chroma_cmpf;
563    int next_dir=-1;
564    LOAD_COMMON
565    LOAD_COMMON2
566    int map_generation= c->map_generation;
567
568    cmpf= s->dsp.me_cmp[size];
569    chroma_cmpf= s->dsp.me_cmp[size+1];
570
571    { /* ensure that the best point is in the MAP as h/qpel refinement needs it */
572        const int key= (best[1]<<ME_MAP_MV_BITS) + best[0] + map_generation;
573        const int index= ((best[1]<<ME_MAP_SHIFT) + best[0])&(ME_MAP_SIZE-1);
574        if(map[index]!=key){ //this will be executed only very rarey
575            score_map[index]= cmp(s, best[0], best[1], 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);
576            map[index]= key;
577        }
578    }
579
580    for(;;){
581        int d;
582        const int dir= next_dir;
583        const int x= best[0];
584        const int y= best[1];
585        next_dir=-1;
586
587//printf("%d", dir);
588        if(dir!=2 && x>xmin) CHECK_MV_DIR(x-1, y  , 0)
589        if(dir!=3 && y>ymin) CHECK_MV_DIR(x  , y-1, 1)
590        if(dir!=0 && x<xmax) CHECK_MV_DIR(x+1, y  , 2)
591        if(dir!=1 && y<ymax) CHECK_MV_DIR(x  , y+1, 3)
592
593        if(next_dir==-1){
594            return dmin;
595        }
596    }
597}
598
599static int funny_diamond_search(MpegEncContext * s, int *best, int dmin,
600                                       int src_index, int ref_index, int const penalty_factor,
601                                       int size, int h, int flags)
602{
603    MotionEstContext * const c= &s->me;
604    me_cmp_func cmpf, chroma_cmpf;
605    int dia_size;
606    LOAD_COMMON
607    LOAD_COMMON2
608    int map_generation= c->map_generation;
609
610    cmpf= s->dsp.me_cmp[size];
611    chroma_cmpf= s->dsp.me_cmp[size+1];
612
613    for(dia_size=1; dia_size<=4; dia_size++){
614        int dir;
615        const int x= best[0];
616        const int y= best[1];
617
618        if(dia_size&(dia_size-1)) continue;
619
620        if(   x + dia_size > xmax
621           || x - dia_size < xmin
622           || y + dia_size > ymax
623           || y - dia_size < ymin)
624           continue;
625
626        for(dir= 0; dir<dia_size; dir+=2){
627            int d;
628
629            CHECK_MV(x + dir           , y + dia_size - dir);
630            CHECK_MV(x + dia_size - dir, y - dir           );
631            CHECK_MV(x - dir           , y - dia_size + dir);
632            CHECK_MV(x - dia_size + dir, y + dir           );
633        }
634
635        if(x!=best[0] || y!=best[1])
636            dia_size=0;
637#if 0
638{
639int dx, dy, i;
640static int stats[8*8];
641dx= FFABS(x-best[0]);
642dy= FFABS(y-best[1]);
643if(dy>dx){
644    dx^=dy; dy^=dx; dx^=dy;
645}
646stats[dy*8 + dx] ++;
647if(256*256*256*64 % (stats[0]+1)==0){
648    for(i=0; i<64; i++){
649        if((i&7)==0) printf("\n");
650        printf("%8d ", stats[i]);
651    }
652    printf("\n");
653}
654}
655#endif
656    }
657    return dmin;
658}
659
660static int hex_search(MpegEncContext * s, int *best, int dmin,
661                                       int src_index, int ref_index, int const penalty_factor,
662                                       int size, int h, int flags, int dia_size)
663{
664    MotionEstContext * const c= &s->me;
665    me_cmp_func cmpf, chroma_cmpf;
666    LOAD_COMMON
667    LOAD_COMMON2
668    int map_generation= c->map_generation;
669    int x,y,d;
670    const int dec= dia_size & (dia_size-1);
671
672    cmpf= s->dsp.me_cmp[size];
673    chroma_cmpf= s->dsp.me_cmp[size+1];
674
675    for(;dia_size; dia_size= dec ? dia_size-1 : dia_size>>1){
676        do{
677            x= best[0];
678            y= best[1];
679
680            CHECK_CLIPPED_MV(x  -dia_size    , y);
681            CHECK_CLIPPED_MV(x+  dia_size    , y);
682            CHECK_CLIPPED_MV(x+( dia_size>>1), y+dia_size);
683            CHECK_CLIPPED_MV(x+( dia_size>>1), y-dia_size);
684            if(dia_size>1){
685                CHECK_CLIPPED_MV(x+(-dia_size>>1), y+dia_size);
686                CHECK_CLIPPED_MV(x+(-dia_size>>1), y-dia_size);
687            }
688        }while(best[0] != x || best[1] != y);
689    }
690
691    return dmin;
692}
693
694static int l2s_dia_search(MpegEncContext * s, int *best, int dmin,
695                                       int src_index, int ref_index, int const penalty_factor,
696                                       int size, int h, int flags)
697{
698    MotionEstContext * const c= &s->me;
699    me_cmp_func cmpf, chroma_cmpf;
700    LOAD_COMMON
701    LOAD_COMMON2
702    int map_generation= c->map_generation;
703    int x,y,i,d;
704    int dia_size= c->dia_size&0xFF;
705    const int dec= dia_size & (dia_size-1);
706    static const int hex[8][2]={{-2, 0}, {-1,-1}, { 0,-2}, { 1,-1},
707                                { 2, 0}, { 1, 1}, { 0, 2}, {-1, 1}};
708
709    cmpf= s->dsp.me_cmp[size];
710    chroma_cmpf= s->dsp.me_cmp[size+1];
711
712    for(; dia_size; dia_size= dec ? dia_size-1 : dia_size>>1){
713        do{
714            x= best[0];
715            y= best[1];
716            for(i=0; i<8; i++){
717                CHECK_CLIPPED_MV(x+hex[i][0]*dia_size, y+hex[i][1]*dia_size);
718            }
719        }while(best[0] != x || best[1] != y);
720    }
721
722    x= best[0];
723    y= best[1];
724    CHECK_CLIPPED_MV(x+1, y);
725    CHECK_CLIPPED_MV(x, y+1);
726    CHECK_CLIPPED_MV(x-1, y);
727    CHECK_CLIPPED_MV(x, y-1);
728
729    return dmin;
730}
731
732static int umh_search(MpegEncContext * s, int *best, int dmin,
733                                       int src_index, int ref_index, int const penalty_factor,
734                                       int size, int h, int flags)
735{
736    MotionEstContext * const c= &s->me;
737    me_cmp_func cmpf, chroma_cmpf;
738    LOAD_COMMON
739    LOAD_COMMON2
740    int map_generation= c->map_generation;
741    int x,y,x2,y2, i, j, d;
742    const int dia_size= c->dia_size&0xFE;
743    static const int hex[16][2]={{-4,-2}, {-4,-1}, {-4, 0}, {-4, 1}, {-4, 2},
744                                 { 4,-2}, { 4,-1}, { 4, 0}, { 4, 1}, { 4, 2},
745                                 {-2, 3}, { 0, 4}, { 2, 3},
746                                 {-2,-3}, { 0,-4}, { 2,-3},};
747
748    cmpf= s->dsp.me_cmp[size];
749    chroma_cmpf= s->dsp.me_cmp[size+1];
750
751    x= best[0];
752    y= best[1];
753    for(x2=FFMAX(x-dia_size+1, xmin); x2<=FFMIN(x+dia_size-1,xmax); x2+=2){
754        CHECK_MV(x2, y);
755    }
756    for(y2=FFMAX(y-dia_size/2+1, ymin); y2<=FFMIN(y+dia_size/2-1,ymax); y2+=2){
757        CHECK_MV(x, y2);
758    }
759
760    x= best[0];
761    y= best[1];
762    for(y2=FFMAX(y-2, ymin); y2<=FFMIN(y+2,ymax); y2++){
763        for(x2=FFMAX(x-2, xmin); x2<=FFMIN(x+2,xmax); x2++){
764            CHECK_MV(x2, y2);
765        }
766    }
767
768//FIXME prevent the CLIP stuff
769
770    for(j=1; j<=dia_size/4; j++){
771        for(i=0; i<16; i++){
772            CHECK_CLIPPED_MV(x+hex[i][0]*j, y+hex[i][1]*j);
773        }
774    }
775
776    return hex_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags, 2);
777}
778
779static int full_search(MpegEncContext * s, int *best, int dmin,
780                                       int src_index, int ref_index, int const penalty_factor,
781                                       int size, int h, int flags)
782{
783    MotionEstContext * const c= &s->me;
784    me_cmp_func cmpf, chroma_cmpf;
785    LOAD_COMMON
786    LOAD_COMMON2
787    int map_generation= c->map_generation;
788    int x,y, d;
789    const int dia_size= c->dia_size&0xFF;
790
791    cmpf= s->dsp.me_cmp[size];
792    chroma_cmpf= s->dsp.me_cmp[size+1];
793
794    for(y=FFMAX(-dia_size, ymin); y<=FFMIN(dia_size,ymax); y++){
795        for(x=FFMAX(-dia_size, xmin); x<=FFMIN(dia_size,xmax); x++){
796            CHECK_MV(x, y);
797        }
798    }
799
800    x= best[0];
801    y= best[1];
802    d= dmin;
803    CHECK_CLIPPED_MV(x  , y);
804    CHECK_CLIPPED_MV(x+1, y);
805    CHECK_CLIPPED_MV(x, y+1);
806    CHECK_CLIPPED_MV(x-1, y);
807    CHECK_CLIPPED_MV(x, y-1);
808    best[0]= x;
809    best[1]= y;
810
811    return d;
812}
813
814#define SAB_CHECK_MV(ax,ay)\
815{\
816    const int key= ((ay)<<ME_MAP_MV_BITS) + (ax) + map_generation;\
817    const int index= (((ay)<<ME_MAP_SHIFT) + (ax))&(ME_MAP_SIZE-1);\
818/*printf("sab check %d %d\n", ax, ay);*/\
819    if(map[index]!=key){\
820        d= cmp(s, ax, ay, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
821        map[index]= key;\
822        score_map[index]= d;\
823        d += (mv_penalty[((ax)<<shift)-pred_x] + mv_penalty[((ay)<<shift)-pred_y])*penalty_factor;\
824/*printf("score: %d\n", d);*/\
825        if(d < minima[minima_count-1].height){\
826            int j=0;\
827            \
828            while(d >= minima[j].height) j++;\
829\
830            memmove(&minima [j+1], &minima [j], (minima_count - j - 1)*sizeof(Minima));\
831\
832            minima[j].checked= 0;\
833            minima[j].height= d;\
834            minima[j].x= ax;\
835            minima[j].y= ay;\
836            \
837            i=-1;\
838            continue;\
839        }\
840    }\
841}
842
843#define MAX_SAB_SIZE ME_MAP_SIZE
844static int sab_diamond_search(MpegEncContext * s, int *best, int dmin,
845                                       int src_index, int ref_index, int const penalty_factor,
846                                       int size, int h, int flags)
847{
848    MotionEstContext * const c= &s->me;
849    me_cmp_func cmpf, chroma_cmpf;
850    Minima minima[MAX_SAB_SIZE];
851    const int minima_count= FFABS(c->dia_size);
852    int i, j;
853    LOAD_COMMON
854    LOAD_COMMON2
855    int map_generation= c->map_generation;
856
857    cmpf= s->dsp.me_cmp[size];
858    chroma_cmpf= s->dsp.me_cmp[size+1];
859
860    /*Note j<MAX_SAB_SIZE is needed if MAX_SAB_SIZE < ME_MAP_SIZE as j can
861      become larger due to MVs overflowing their ME_MAP_MV_BITS bits space in map
862     */
863    for(j=i=0; i<ME_MAP_SIZE && j<MAX_SAB_SIZE; i++){
864        uint32_t key= map[i];
865
866        key += (1<<(ME_MAP_MV_BITS-1)) + (1<<(2*ME_MAP_MV_BITS-1));
867
868        if((key&((-1)<<(2*ME_MAP_MV_BITS))) != map_generation) continue;
869
870        minima[j].height= score_map[i];
871        minima[j].x= key & ((1<<ME_MAP_MV_BITS)-1); key>>=ME_MAP_MV_BITS;
872        minima[j].y= key & ((1<<ME_MAP_MV_BITS)-1);
873        minima[j].x-= (1<<(ME_MAP_MV_BITS-1));
874        minima[j].y-= (1<<(ME_MAP_MV_BITS-1));
875
876        // all entries in map should be in range except if the mv overflows their ME_MAP_MV_BITS bits space
877        if(   minima[j].x > xmax || minima[j].x < xmin
878           || minima[j].y > ymax || minima[j].y < ymin)
879            continue;
880
881        minima[j].checked=0;
882        if(minima[j].x || minima[j].y)
883            minima[j].height+= (mv_penalty[((minima[j].x)<<shift)-pred_x] + mv_penalty[((minima[j].y)<<shift)-pred_y])*penalty_factor;
884
885        j++;
886    }
887
888    qsort(minima, j, sizeof(Minima), minima_cmp);
889
890    for(; j<minima_count; j++){
891        minima[j].height=256*256*256*64;
892        minima[j].checked=0;
893        minima[j].x= minima[j].y=0;
894    }
895
896    for(i=0; i<minima_count; i++){
897        const int x= minima[i].x;
898        const int y= minima[i].y;
899        int d;
900
901        if(minima[i].checked) continue;
902
903        if(   x >= xmax || x <= xmin
904           || y >= ymax || y <= ymin)
905           continue;
906
907        SAB_CHECK_MV(x-1, y)
908        SAB_CHECK_MV(x+1, y)
909        SAB_CHECK_MV(x  , y-1)
910        SAB_CHECK_MV(x  , y+1)
911
912        minima[i].checked= 1;
913    }
914
915    best[0]= minima[0].x;
916    best[1]= minima[0].y;
917    dmin= minima[0].height;
918
919    if(   best[0] < xmax && best[0] > xmin
920       && best[1] < ymax && best[1] > ymin){
921        int d;
922        //ensure that the refernece samples for hpel refinement are in the map
923        CHECK_MV(best[0]-1, best[1])
924        CHECK_MV(best[0]+1, best[1])
925        CHECK_MV(best[0], best[1]-1)
926        CHECK_MV(best[0], best[1]+1)
927    }
928    return dmin;
929}
930
931static int var_diamond_search(MpegEncContext * s, int *best, int dmin,
932                                       int src_index, int ref_index, int const penalty_factor,
933                                       int size, int h, int flags)
934{
935    MotionEstContext * const c= &s->me;
936    me_cmp_func cmpf, chroma_cmpf;
937    int dia_size;
938    LOAD_COMMON
939    LOAD_COMMON2
940    int map_generation= c->map_generation;
941
942    cmpf= s->dsp.me_cmp[size];
943    chroma_cmpf= s->dsp.me_cmp[size+1];
944
945    for(dia_size=1; dia_size<=c->dia_size; dia_size++){
946        int dir, start, end;
947        const int x= best[0];
948        const int y= best[1];
949
950        start= FFMAX(0, y + dia_size - ymax);
951        end  = FFMIN(dia_size, xmax - x + 1);
952        for(dir= start; dir<end; dir++){
953            int d;
954
955//check(x + dir,y + dia_size - dir,0, a0)
956            CHECK_MV(x + dir           , y + dia_size - dir);
957        }
958
959        start= FFMAX(0, x + dia_size - xmax);
960        end  = FFMIN(dia_size, y - ymin + 1);
961        for(dir= start; dir<end; dir++){
962            int d;
963
964//check(x + dia_size - dir, y - dir,0, a1)
965            CHECK_MV(x + dia_size - dir, y - dir           );
966        }
967
968        start= FFMAX(0, -y + dia_size + ymin );
969        end  = FFMIN(dia_size, x - xmin + 1);
970        for(dir= start; dir<end; dir++){
971            int d;
972
973//check(x - dir,y - dia_size + dir,0, a2)
974            CHECK_MV(x - dir           , y - dia_size + dir);
975        }
976
977        start= FFMAX(0, -x + dia_size + xmin );
978        end  = FFMIN(dia_size, ymax - y + 1);
979        for(dir= start; dir<end; dir++){
980            int d;
981
982//check(x - dia_size + dir, y + dir,0, a3)
983            CHECK_MV(x - dia_size + dir, y + dir           );
984        }
985
986        if(x!=best[0] || y!=best[1])
987            dia_size=0;
988#if 0
989{
990int dx, dy, i;
991static int stats[8*8];
992dx= FFABS(x-best[0]);
993dy= FFABS(y-best[1]);
994stats[dy*8 + dx] ++;
995if(256*256*256*64 % (stats[0]+1)==0){
996    for(i=0; i<64; i++){
997        if((i&7)==0) printf("\n");
998        printf("%6d ", stats[i]);
999    }
1000    printf("\n");
1001}
1002}
1003#endif
1004    }
1005    return dmin;
1006}
1007
1008static av_always_inline int diamond_search(MpegEncContext * s, int *best, int dmin,
1009                                       int src_index, int ref_index, int const penalty_factor,
1010                                       int size, int h, int flags){
1011    MotionEstContext * const c= &s->me;
1012    if(c->dia_size==-1)
1013        return funny_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
1014    else if(c->dia_size<-1)
1015        return   sab_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
1016    else if(c->dia_size<2)
1017        return small_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
1018    else if(c->dia_size>1024)
1019        return          full_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
1020    else if(c->dia_size>768)
1021        return           umh_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
1022    else if(c->dia_size>512)
1023        return           hex_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags, c->dia_size&0xFF);
1024    else if(c->dia_size>256)
1025        return       l2s_dia_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
1026    else
1027        return   var_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
1028}
1029
1030/*!
1031   \param P[10][2] a list of candidate mvs to check before starting the
1032   iterative search. If one of the candidates is close to the optimal mv, then
1033   it takes fewer iterations. And it increases the chance that we find the
1034   optimal mv.
1035 */
1036static av_always_inline int epzs_motion_search_internal(MpegEncContext * s, int *mx_ptr, int *my_ptr,
1037                             int P[10][2], int src_index, int ref_index, int16_t (*last_mv)[2],
1038                             int ref_mv_scale, int flags, int size, int h)
1039{
1040    MotionEstContext * const c= &s->me;
1041    int best[2]={0, 0};      /*!< x and y coordinates of the best motion vector.
1042                               i.e. the difference between the position of the
1043                               block currently being encoded and the position of
1044                               the block chosen to predict it from. */
1045    int d;                   ///< the score (cmp + penalty) of any given mv
1046    int dmin;                /*!< the best value of d, i.e. the score
1047                               corresponding to the mv stored in best[]. */
1048    int map_generation;
1049    int penalty_factor;
1050    const int ref_mv_stride= s->mb_stride; //pass as arg  FIXME
1051    const int ref_mv_xy= s->mb_x + s->mb_y*ref_mv_stride; //add to last_mv beforepassing FIXME
1052    me_cmp_func cmpf, chroma_cmpf;
1053
1054    LOAD_COMMON
1055    LOAD_COMMON2
1056
1057    if(c->pre_pass){
1058        penalty_factor= c->pre_penalty_factor;
1059        cmpf= s->dsp.me_pre_cmp[size];
1060        chroma_cmpf= s->dsp.me_pre_cmp[size+1];
1061    }else{
1062        penalty_factor= c->penalty_factor;
1063        cmpf= s->dsp.me_cmp[size];
1064        chroma_cmpf= s->dsp.me_cmp[size+1];
1065    }
1066
1067    map_generation= update_map_generation(c);
1068
1069    assert(cmpf);
1070    dmin= cmp(s, 0, 0, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);
1071    map[0]= map_generation;
1072    score_map[0]= dmin;
1073
1074    //FIXME precalc first term below?
1075    if((s->pict_type == FF_B_TYPE && !(c->flags & FLAG_DIRECT)) || s->flags&CODEC_FLAG_MV0)
1076        dmin += (mv_penalty[pred_x] + mv_penalty[pred_y])*penalty_factor;
1077
1078    /* first line */
1079    if (s->first_slice_line) {
1080        CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
1081        CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
1082                        (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
1083    }else{
1084        if(dmin<((h*h*s->avctx->mv0_threshold)>>8)
1085                    && ( P_LEFT[0]    |P_LEFT[1]
1086                        |P_TOP[0]     |P_TOP[1]
1087                        |P_TOPRIGHT[0]|P_TOPRIGHT[1])==0){
1088            *mx_ptr= 0;
1089            *my_ptr= 0;
1090            c->skip=1;
1091            return dmin;
1092        }
1093        CHECK_MV(    P_MEDIAN[0] >>shift ,    P_MEDIAN[1] >>shift)
1094        CHECK_CLIPPED_MV((P_MEDIAN[0]>>shift)  , (P_MEDIAN[1]>>shift)-1)
1095        CHECK_CLIPPED_MV((P_MEDIAN[0]>>shift)  , (P_MEDIAN[1]>>shift)+1)
1096        CHECK_CLIPPED_MV((P_MEDIAN[0]>>shift)-1, (P_MEDIAN[1]>>shift)  )
1097        CHECK_CLIPPED_MV((P_MEDIAN[0]>>shift)+1, (P_MEDIAN[1]>>shift)  )
1098        CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
1099                        (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
1100        CHECK_MV(P_LEFT[0]    >>shift, P_LEFT[1]    >>shift)
1101        CHECK_MV(P_TOP[0]     >>shift, P_TOP[1]     >>shift)
1102        CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift)
1103    }
1104    if(dmin>h*h*4){
1105        if(c->pre_pass){
1106            CHECK_CLIPPED_MV((last_mv[ref_mv_xy-1][0]*ref_mv_scale + (1<<15))>>16,
1107                            (last_mv[ref_mv_xy-1][1]*ref_mv_scale + (1<<15))>>16)
1108            if(!s->first_slice_line)
1109                CHECK_CLIPPED_MV((last_mv[ref_mv_xy-ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
1110                                (last_mv[ref_mv_xy-ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
1111        }else{
1112            CHECK_CLIPPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16,
1113                            (last_mv[ref_mv_xy+1][1]*ref_mv_scale + (1<<15))>>16)
1114            if(s->mb_y+1<s->end_mb_y)  //FIXME replace at least with last_slice_line
1115                CHECK_CLIPPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
1116                                (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
1117        }
1118    }
1119
1120    if(c->avctx->last_predictor_count){
1121        const int count= c->avctx->last_predictor_count;
1122        const int xstart= FFMAX(0, s->mb_x - count);
1123        const int ystart= FFMAX(0, s->mb_y - count);
1124        const int xend= FFMIN(s->mb_width , s->mb_x + count + 1);
1125        const int yend= FFMIN(s->mb_height, s->mb_y + count + 1);
1126        int mb_y;
1127
1128        for(mb_y=ystart; mb_y<yend; mb_y++){
1129            int mb_x;
1130            for(mb_x=xstart; mb_x<xend; mb_x++){
1131                const int xy= mb_x + 1 + (mb_y + 1)*ref_mv_stride;
1132                int mx= (last_mv[xy][0]*ref_mv_scale + (1<<15))>>16;
1133                int my= (last_mv[xy][1]*ref_mv_scale + (1<<15))>>16;
1134
1135                if(mx>xmax || mx<xmin || my>ymax || my<ymin) continue;
1136                CHECK_MV(mx,my)
1137            }
1138        }
1139    }
1140
1141//check(best[0],best[1],0, b0)
1142    dmin= diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
1143
1144//check(best[0],best[1],0, b1)
1145    *mx_ptr= best[0];
1146    *my_ptr= best[1];
1147
1148//    printf("%d %d %d \n", best[0], best[1], dmin);
1149    return dmin;
1150}
1151
1152//this function is dedicated to the braindamaged gcc
1153inline int ff_epzs_motion_search(MpegEncContext * s, int *mx_ptr, int *my_ptr,
1154                             int P[10][2], int src_index, int ref_index, int16_t (*last_mv)[2],
1155                             int ref_mv_scale, int size, int h)
1156{
1157    MotionEstContext * const c= &s->me;
1158//FIXME convert other functions in the same way if faster
1159    if(c->flags==0 && h==16 && size==0){
1160        return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, 0, 0, 16);
1161//    case FLAG_QPEL:
1162//        return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, FLAG_QPEL);
1163    }else{
1164        return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, c->flags, size, h);
1165    }
1166}
1167
1168static int epzs_motion_search4(MpegEncContext * s,
1169                             int *mx_ptr, int *my_ptr, int P[10][2],
1170                             int src_index, int ref_index, int16_t (*last_mv)[2],
1171                             int ref_mv_scale)
1172{
1173    MotionEstContext * const c= &s->me;
1174    int best[2]={0, 0};
1175    int d, dmin;
1176    int map_generation;
1177    const int penalty_factor= c->penalty_factor;
1178    const int size=1;
1179    const int h=8;
1180    const int ref_mv_stride= s->mb_stride;
1181    const int ref_mv_xy= s->mb_x + s->mb_y *ref_mv_stride;
1182    me_cmp_func cmpf, chroma_cmpf;
1183    LOAD_COMMON
1184    int flags= c->flags;
1185    LOAD_COMMON2
1186
1187    cmpf= s->dsp.me_cmp[size];
1188    chroma_cmpf= s->dsp.me_cmp[size+1];
1189
1190    map_generation= update_map_generation(c);
1191
1192    dmin = 1000000;
1193//printf("%d %d %d %d //",xmin, ymin, xmax, ymax);
1194    /* first line */
1195    if (s->first_slice_line) {
1196        CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
1197        CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
1198                        (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
1199        CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
1200    }else{
1201        CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
1202        //FIXME try some early stop
1203        CHECK_MV(P_MEDIAN[0]>>shift, P_MEDIAN[1]>>shift)
1204        CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
1205        CHECK_MV(P_TOP[0]>>shift, P_TOP[1]>>shift)
1206        CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift)
1207        CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
1208                        (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
1209    }
1210    if(dmin>64*4){
1211        CHECK_CLIPPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16,
1212                        (last_mv[ref_mv_xy+1][1]*ref_mv_scale + (1<<15))>>16)
1213        if(s->mb_y+1<s->end_mb_y)  //FIXME replace at least with last_slice_line
1214            CHECK_CLIPPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
1215                            (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
1216    }
1217
1218    dmin= diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
1219
1220    *mx_ptr= best[0];
1221    *my_ptr= best[1];
1222
1223//    printf("%d %d %d \n", best[0], best[1], dmin);
1224    return dmin;
1225}
1226
1227//try to merge with above FIXME (needs PSNR test)
1228static int epzs_motion_search2(MpegEncContext * s,
1229                             int *mx_ptr, int *my_ptr, int P[10][2],
1230                             int src_index, int ref_index, int16_t (*last_mv)[2],
1231                             int ref_mv_scale)
1232{
1233    MotionEstContext * const c= &s->me;
1234    int best[2]={0, 0};
1235    int d, dmin;
1236    int map_generation;
1237    const int penalty_factor= c->penalty_factor;
1238    const int size=0; //FIXME pass as arg
1239    const int h=8;
1240    const int ref_mv_stride= s->mb_stride;
1241    const int ref_mv_xy= s->mb_x + s->mb_y *ref_mv_stride;
1242    me_cmp_func cmpf, chroma_cmpf;
1243    LOAD_COMMON
1244    int flags= c->flags;
1245    LOAD_COMMON2
1246
1247    cmpf= s->dsp.me_cmp[size];
1248    chroma_cmpf= s->dsp.me_cmp[size+1];
1249
1250    map_generation= update_map_generation(c);
1251
1252    dmin = 1000000;
1253//printf("%d %d %d %d //",xmin, ymin, xmax, ymax);
1254    /* first line */
1255    if (s->first_slice_line) {
1256        CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
1257        CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
1258                        (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
1259        CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
1260    }else{
1261        CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
1262        //FIXME try some early stop
1263        CHECK_MV(P_MEDIAN[0]>>shift, P_MEDIAN[1]>>shift)
1264        CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
1265        CHECK_MV(P_TOP[0]>>shift, P_TOP[1]>>shift)
1266        CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift)
1267        CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
1268                        (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
1269    }
1270    if(dmin>64*4){
1271        CHECK_CLIPPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16,
1272                        (last_mv[ref_mv_xy+1][1]*ref_mv_scale + (1<<15))>>16)
1273        if(s->mb_y+1<s->end_mb_y)  //FIXME replace at least with last_slice_line
1274            CHECK_CLIPPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
1275                            (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
1276    }
1277
1278    dmin= diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
1279
1280    *mx_ptr= best[0];
1281    *my_ptr= best[1];
1282
1283//    printf("%d %d %d \n", best[0], best[1], dmin);
1284    return dmin;
1285}
1286