1/*
2 * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.  Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26
27/*
28 * FUNCTION
29 *      mlib_ImageConvMxN_Fp - image convolution with edge condition
30 *
31 * SYNOPSIS
32 *      mlib_status mlib_ImageConvMxN_Fp(mlib_image       *dst,
33 *                                       const mlib_image *src,
34 *                                       const mlib_d64   *kernel,
35 *                                       mlib_s32         m,
36 *                                       mlib_s32         n,
37 *                                       mlib_s32         dm,
38 *                                       mlib_s32         dn,
39 *                                       mlib_s32         cmask,
40 *                                       mlib_edge        edge)
41 *
42 * ARGUMENTS
43 *      dst       Pointer to destination image.
44 *      src       Pointer to source image.
45 *      m         Kernel width (m must be not less than 1).
46 *      n         Kernel height (n must be not less than 1).
47 *      dm, dn    Position of key element in convolution kernel.
48 *      kernel    Pointer to convolution kernel.
49 *      cmask     Channel mask to indicate the channels to be convolved.
50 *                Each bit of which represents a channel in the image. The
51 *                channels corresponded to 1 bits are those to be processed.
52 *      edge      Type of edge condition.
53 *
54 * DESCRIPTION
55 *      2-D convolution, MxN kernel.
56 *
57 *      The center of the source image is mapped to the center of the
58 *      destination image.
59 *      The unselected channels are not overwritten. If both src and dst have
60 *      just one channel, cmask is ignored.
61 *
62 *      The edge condition can be one of the following:
63 *              MLIB_EDGE_DST_NO_WRITE  (default)
64 *              MLIB_EDGE_DST_FILL_ZERO
65 *              MLIB_EDGE_DST_COPY_SRC
66 *              MLIB_EDGE_SRC_EXTEND
67 *
68 * RESTRICTION
69 *      The src and the dst must be the same type and have same number
70 *      of channels (1, 2, 3, or 4).
71 *      m >= 1, n >= 1,
72 *      0 <= dm < m, 0 <= dn < n.
73 */
74
75#include "mlib_image.h"
76#include "mlib_ImageCheck.h"
77#include "mlib_SysMath.h"
78#include "mlib_ImageConv.h"
79
80/***************************************************************/
81static void mlib_ImageConvMxNMulAdd_F32(mlib_f32       *dst,
82                                        const mlib_f32 *src,
83                                        const mlib_d64 *kernel,
84                                        mlib_s32       n,
85                                        mlib_s32       m,
86                                        mlib_s32       nch,
87                                        mlib_s32       dnch);
88
89static void mlib_ImageConvMxNF322F32_ext(mlib_f32       *dst,
90                                         const mlib_f32 *src,
91                                         mlib_s32       n,
92                                         mlib_s32       nch,
93                                         mlib_s32       dx_l,
94                                         mlib_s32       dx_r);
95
96static void mlib_ImageConvMxNMulAdd_D64(mlib_d64       *dst,
97                                        const mlib_d64 *src,
98                                        const mlib_d64 *kernel,
99                                        mlib_s32       n,
100                                        mlib_s32       m,
101                                        mlib_s32       nch,
102                                        mlib_s32       dnch);
103
104static void mlib_ImageConvMxND642D64_ext(mlib_d64       *dst,
105                                         const mlib_d64 *src,
106                                         mlib_s32       n,
107                                         mlib_s32       nch,
108                                         mlib_s32       dx_l,
109                                         mlib_s32       dx_r);
110
111/***************************************************************/
112#if 0
113static void mlib_ImageConvMxNMulAdd2_F32(mlib_f32       *hdst,
114                                         mlib_f32       *vdst,
115                                         const mlib_f32 *src,
116                                         const mlib_d64 *hfilter,
117                                         const mlib_d64 *vfilter,
118                                         mlib_s32       n,
119                                         mlib_s32       m,
120                                         mlib_s32       nch,
121                                         mlib_s32       dnch);
122
123static void mlib_ImageConvMxNMulAdd2_D64(mlib_d64       *hdst,
124                                         mlib_d64       *vdst,
125                                         const mlib_d64 *src,
126                                         const mlib_d64 *hfilter,
127                                         const mlib_d64 *vfilter,
128                                         mlib_s32       n,
129                                         mlib_s32       m,
130                                         mlib_s32       nch,
131                                         mlib_s32       dnch);
132#endif /* 0 */
133
134/***************************************************************/
135mlib_status mlib_ImageConvMxN_Fp(mlib_image       *dst,
136                                 const mlib_image *src,
137                                 const mlib_d64   *kernel,
138                                 mlib_s32         m,
139                                 mlib_s32         n,
140                                 mlib_s32         dm,
141                                 mlib_s32         dn,
142                                 mlib_s32         cmask,
143                                 mlib_edge        edge)
144{
145  mlib_type type;
146
147  MLIB_IMAGE_CHECK(dst);
148  type = mlib_ImageGetType(dst);
149
150  if (type != MLIB_FLOAT && type != MLIB_DOUBLE)
151    return MLIB_FAILURE;
152
153  return mlib_ImageConvMxN_f(dst, src, kernel, m, n, dm, dn, 0, cmask, edge);
154}
155
156/***************************************************************/
157void mlib_ImageConvMxNMulAdd_F32(mlib_f32       *dst,
158                                 const mlib_f32 *src,
159                                 const mlib_d64 *kernel,
160                                 mlib_s32       n,
161                                 mlib_s32       m,
162                                 mlib_s32       nch,
163                                 mlib_s32       dnch)
164{
165  mlib_f32 *hdst1 = dst + dnch;
166  mlib_s32 i, j;
167
168  for (j = 0; j < m - 2; j += 3, src += 3 * nch, kernel += 3) {
169    const mlib_f32 *src2 = src + 2 * nch;
170    mlib_f32 hval0 = (mlib_f32) kernel[0];
171    mlib_f32 hval1 = (mlib_f32) kernel[1];
172    mlib_f32 hval2 = (mlib_f32) kernel[2];
173    mlib_f32 val0 = src[0];
174    mlib_f32 val1 = src[nch];
175    mlib_f32 hdvl = dst[0];
176
177#ifdef __SUNPRO_C
178#pragma pipeloop(0)
179#endif /* __SUNPRO_C */
180    for (i = 0; i < n; i++) {
181      mlib_f32 hdvl0 = val0 * hval0 + hdvl;
182      mlib_f32 val2 = src2[i * nch];
183
184      hdvl = hdst1[i * dnch];
185      hdvl0 += val1 * hval1;
186      hdvl0 += val2 * hval2;
187      val0 = val1;
188      val1 = val2;
189
190      dst[i * dnch] = hdvl0;
191    }
192  }
193
194  if (j < m - 1) {
195    const mlib_f32 *src2 = src + 2 * nch;
196    mlib_f32 hval0 = (mlib_f32) kernel[0];
197    mlib_f32 hval1 = (mlib_f32) kernel[1];
198    mlib_f32 val0 = src[0];
199    mlib_f32 val1 = src[nch];
200    mlib_f32 hdvl = dst[0];
201#ifdef __SUNPRO_C
202#pragma pipeloop(0)
203#endif /* __SUNPRO_C */
204    for (i = 0; i < n; i++) {
205      mlib_f32 hdvl0 = val0 * hval0 + hdvl;
206      mlib_f32 val2 = src2[i * nch];
207
208      hdvl = hdst1[i * dnch];
209      hdvl0 += val1 * hval1;
210      val0 = val1;
211      val1 = val2;
212
213      dst[i * dnch] = hdvl0;
214    }
215
216  }
217  else if (j < m) {
218    const mlib_f32 *src2 = src + 2 * nch;
219    mlib_f32 hval0 = (mlib_f32) kernel[0];
220    mlib_f32 val0 = src[0];
221    mlib_f32 val1 = src[nch];
222    mlib_f32 hdvl = dst[0];
223
224#ifdef __SUNPRO_C
225#pragma pipeloop(0)
226#endif /* __SUNPRO_C */
227    for (i = 0; i < n; i++) {
228      mlib_f32 hdvl0 = val0 * hval0 + hdvl;
229      mlib_f32 val2 = src2[i * nch];
230
231      hdvl = hdst1[i * dnch];
232      val0 = val1;
233      val1 = val2;
234
235      dst[i * dnch] = hdvl0;
236    }
237  }
238}
239
240/***************************************************************/
241void mlib_ImageConvMxNF322F32_ext(mlib_f32       *dst,
242                                  const mlib_f32 *src,
243                                  mlib_s32       n,
244                                  mlib_s32       nch,
245                                  mlib_s32       dx_l,
246                                  mlib_s32       dx_r)
247{
248  mlib_s32 i;
249  mlib_f32 val = src[0];
250
251  for (i = 0; i < dx_l; i++)
252    dst[i] = val;
253#ifdef __SUNPRO_C
254#pragma pipeloop(0)
255#endif /* __SUNPRO_C */
256  for (; i < n - dx_r; i++)
257    dst[i] = src[nch * (i - dx_l)];
258  val = dst[n - dx_r - 1];
259  for (; i < n; i++)
260    dst[i] = val;
261}
262
263/***************************************************************/
264mlib_status mlib_convMxNext_f32(mlib_image       *dst,
265                                const mlib_image *src,
266                                const mlib_d64   *kernel,
267                                mlib_s32         m,
268                                mlib_s32         n,
269                                mlib_s32         dx_l,
270                                mlib_s32         dx_r,
271                                mlib_s32         dy_t,
272                                mlib_s32         dy_b,
273                                mlib_s32         cmask)
274{
275  mlib_d64 dspace[1024], *dsa = dspace;
276  mlib_s32 wid_e = mlib_ImageGetWidth(src);
277  mlib_f32 *fsa;
278  mlib_f32 *da = mlib_ImageGetData(dst);
279  mlib_f32 *sa = mlib_ImageGetData(src);
280  mlib_s32 dlb = mlib_ImageGetStride(dst) >> 2;
281  mlib_s32 slb = mlib_ImageGetStride(src) >> 2;
282  mlib_s32 dw = mlib_ImageGetWidth(dst);
283  mlib_s32 dh = mlib_ImageGetHeight(dst);
284  mlib_s32 nch = mlib_ImageGetChannels(dst);
285  mlib_s32 i, j, j1, k;
286
287  if (3 * wid_e + m > 1024) {
288    dsa = mlib_malloc((3 * wid_e + m) * sizeof(mlib_d64));
289
290    if (dsa == NULL)
291      return MLIB_FAILURE;
292  }
293
294  fsa = (mlib_f32 *) dsa;
295
296  for (j = 0; j < dh; j++, da += dlb) {
297    for (k = 0; k < nch; k++)
298      if (cmask & (1 << (nch - 1 - k))) {
299        const mlib_f32 *sa1 = sa + k;
300        mlib_f32 *da1 = da + k;
301        const mlib_d64 *kernel1 = kernel;
302
303        for (i = 0; i < dw; i++)
304          da1[i * nch] = 0.f;
305        for (j1 = 0; j1 < n; j1++, kernel1 += m) {
306          mlib_ImageConvMxNF322F32_ext(fsa, sa1, dw + m - 1, nch, dx_l, dx_r);
307          mlib_ImageConvMxNMulAdd_F32(da1, fsa, kernel1, dw, m, 1, nch);
308
309          if ((j + j1 >= dy_t) && (j + j1 < dh + n - dy_b - 2))
310            sa1 += slb;
311        }
312      }
313
314    if ((j >= dy_t) && (j < dh + n - dy_b - 2))
315      sa += slb;
316  }
317
318  if (dsa != dspace)
319    mlib_free(dsa);
320  return MLIB_SUCCESS;
321}
322
323/***************************************************************/
324#if 0
325
326void mlib_ImageConvMxNMulAdd2_F32(mlib_f32       *hdst,
327                                  mlib_f32       *vdst,
328                                  const mlib_f32 *src,
329                                  const mlib_d64 *hfilter,
330                                  const mlib_d64 *vfilter,
331                                  mlib_s32       n,
332                                  mlib_s32       m,
333                                  mlib_s32       nch,
334                                  mlib_s32       dnch)
335{
336  mlib_f32 *hdst1 = hdst + dnch, *vdst1 = vdst + dnch;
337  mlib_s32 i, j;
338
339  for (j = 0; j < m - 2; j += 3, src += 3 * nch, hfilter += 3, vfilter += 3) {
340    mlib_f32 *src2 = src + 2 * nch;
341    mlib_f32 hval0 = (mlib_f32) hfilter[0];
342    mlib_f32 vval0 = (mlib_f32) vfilter[0];
343    mlib_f32 hval1 = (mlib_f32) hfilter[1];
344    mlib_f32 vval1 = (mlib_f32) vfilter[1];
345    mlib_f32 hval2 = (mlib_f32) hfilter[2];
346    mlib_f32 vval2 = (mlib_f32) vfilter[2];
347    mlib_f32 val0 = src[0];
348    mlib_f32 val1 = src[nch];
349    mlib_f32 hdvl = hdst[0];
350    mlib_f32 vdvl = vdst[0];
351
352#ifdef __SUNPRO_C
353#pragma pipeloop(0)
354#endif /* __SUNPRO_C */
355    for (i = 0; i < n; i++) {
356      mlib_f32 hdvl0 = val0 * hval0 + hdvl;
357      mlib_f32 vdvl0 = val0 * vval0 + vdvl;
358      mlib_f32 val2 = src2[i * nch];
359
360      hdvl = hdst1[i * dnch];
361      vdvl = vdst1[i * dnch];
362      hdvl0 += val1 * hval1;
363      vdvl0 += val1 * vval1;
364      hdvl0 += val2 * hval2;
365      vdvl0 += val2 * vval2;
366      val0 = val1;
367      val1 = val2;
368
369      hdst[i * dnch] = hdvl0;
370      vdst[i * dnch] = vdvl0;
371    }
372  }
373
374  if (j < m - 1) {
375    mlib_f32 *src2 = src + 2 * nch;
376    mlib_f32 hval0 = (mlib_f32) hfilter[0];
377    mlib_f32 vval0 = (mlib_f32) vfilter[0];
378    mlib_f32 hval1 = (mlib_f32) hfilter[1];
379    mlib_f32 vval1 = (mlib_f32) vfilter[1];
380    mlib_f32 val0 = src[0];
381    mlib_f32 val1 = src[nch];
382    mlib_f32 hdvl = hdst[0];
383    mlib_f32 vdvl = vdst[0];
384
385#ifdef __SUNPRO_C
386#pragma pipeloop(0)
387#endif /* __SUNPRO_C */
388    for (i = 0; i < n; i++) {
389      mlib_f32 hdvl0 = val0 * hval0 + hdvl;
390      mlib_f32 vdvl0 = val0 * vval0 + vdvl;
391      mlib_f32 val2 = src2[i * nch];
392
393      hdvl = hdst1[i * dnch];
394      vdvl = vdst1[i * dnch];
395      hdvl0 += val1 * hval1;
396      vdvl0 += val1 * vval1;
397      val0 = val1;
398      val1 = val2;
399
400      hdst[i * dnch] = hdvl0;
401      vdst[i * dnch] = vdvl0;
402    }
403
404  }
405  else if (j < m) {
406    mlib_f32 *src2 = src + 2 * nch;
407    mlib_f32 hval0 = (mlib_f32) hfilter[0];
408    mlib_f32 vval0 = (mlib_f32) vfilter[0];
409    mlib_f32 val0 = src[0];
410    mlib_f32 val1 = src[nch];
411    mlib_f32 hdvl = hdst[0];
412    mlib_f32 vdvl = vdst[0];
413
414#ifdef __SUNPRO_C
415#pragma pipeloop(0)
416#endif /* __SUNPRO_C */
417    for (i = 0; i < n; i++) {
418      mlib_f32 hdvl0 = val0 * hval0 + hdvl;
419      mlib_f32 vdvl0 = val0 * vval0 + vdvl;
420      mlib_f32 val2 = src2[i * nch];
421
422      hdvl = hdst1[i * dnch];
423      vdvl = vdst1[i * dnch];
424      val0 = val1;
425      val1 = val2;
426
427      hdst[i * dnch] = hdvl0;
428      vdst[i * dnch] = vdvl0;
429    }
430  }
431}
432
433/***************************************************************/
434void mlib_ImageConvMxNMulAdd2_D64(mlib_d64       *hdst,
435                                  mlib_d64       *vdst,
436                                  const mlib_d64 *src,
437                                  const mlib_d64 *hfilter,
438                                  const mlib_d64 *vfilter,
439                                  mlib_s32       n,
440                                  mlib_s32       m,
441                                  mlib_s32       nch,
442                                  mlib_s32       dnch)
443{
444  mlib_d64 *hdst1 = hdst + dnch, *vdst1 = vdst + dnch;
445  mlib_s32 i, j;
446
447  for (j = 0; j < m - 2; j += 3, src += 3 * nch, hfilter += 3, vfilter += 3) {
448    mlib_d64 *src2 = src + 2 * nch;
449    mlib_d64 hval0 = hfilter[0];
450    mlib_d64 vval0 = vfilter[0];
451    mlib_d64 hval1 = hfilter[1];
452    mlib_d64 vval1 = vfilter[1];
453    mlib_d64 hval2 = hfilter[2];
454    mlib_d64 vval2 = vfilter[2];
455    mlib_d64 val0 = src[0];
456    mlib_d64 val1 = src[nch];
457    mlib_d64 hdvl = hdst[0];
458    mlib_d64 vdvl = vdst[0];
459
460#ifdef __SUNPRO_C
461#pragma pipeloop(0)
462#endif /* __SUNPRO_C */
463    for (i = 0; i < n; i++) {
464      mlib_d64 hdvl0 = val0 * hval0 + hdvl;
465      mlib_d64 vdvl0 = val0 * vval0 + vdvl;
466      mlib_d64 val2 = src2[i * nch];
467
468      hdvl = hdst1[i * dnch];
469      vdvl = vdst1[i * dnch];
470      hdvl0 += val1 * hval1;
471      vdvl0 += val1 * vval1;
472      hdvl0 += val2 * hval2;
473      vdvl0 += val2 * vval2;
474      val0 = val1;
475      val1 = val2;
476
477      hdst[i * dnch] = hdvl0;
478      vdst[i * dnch] = vdvl0;
479    }
480  }
481
482  if (j < m - 1) {
483    mlib_d64 *src2 = src + 2 * nch;
484    mlib_d64 hval0 = hfilter[0];
485    mlib_d64 vval0 = vfilter[0];
486    mlib_d64 hval1 = hfilter[1];
487    mlib_d64 vval1 = vfilter[1];
488    mlib_d64 val0 = src[0];
489    mlib_d64 val1 = src[nch];
490    mlib_d64 hdvl = hdst[0];
491    mlib_d64 vdvl = vdst[0];
492
493#ifdef __SUNPRO_C
494#pragma pipeloop(0)
495#endif /* __SUNPRO_C */
496    for (i = 0; i < n; i++) {
497      mlib_d64 hdvl0 = val0 * hval0 + hdvl;
498      mlib_d64 vdvl0 = val0 * vval0 + vdvl;
499      mlib_d64 val2 = src2[i * nch];
500
501      hdvl = hdst1[i * dnch];
502      vdvl = vdst1[i * dnch];
503      hdvl0 += val1 * hval1;
504      vdvl0 += val1 * vval1;
505      val0 = val1;
506      val1 = val2;
507
508      hdst[i * dnch] = hdvl0;
509      vdst[i * dnch] = vdvl0;
510    }
511
512  }
513  else if (j < m) {
514    mlib_d64 *src2 = src + 2 * nch;
515    mlib_d64 hval0 = hfilter[0];
516    mlib_d64 vval0 = vfilter[0];
517    mlib_d64 val0 = src[0];
518    mlib_d64 val1 = src[nch];
519    mlib_d64 hdvl = hdst[0];
520    mlib_d64 vdvl = vdst[0];
521
522#ifdef __SUNPRO_C
523#pragma pipeloop(0)
524#endif /* __SUNPRO_C */
525    for (i = 0; i < n; i++) {
526      mlib_d64 hdvl0 = val0 * hval0 + hdvl;
527      mlib_d64 vdvl0 = val0 * vval0 + vdvl;
528      mlib_d64 val2 = src2[i * nch];
529
530      hdvl = hdst1[i * dnch];
531      vdvl = vdst1[i * dnch];
532      val0 = val1;
533      val1 = val2;
534
535      hdst[i * dnch] = hdvl0;
536      vdst[i * dnch] = vdvl0;
537    }
538  }
539}
540
541#endif /* 0 */
542
543/***************************************************************/
544void mlib_ImageConvMxNMulAdd_D64(mlib_d64       *dst,
545                                 const mlib_d64 *src,
546                                 const mlib_d64 *kernel,
547                                 mlib_s32       n,
548                                 mlib_s32       m,
549                                 mlib_s32       nch,
550                                 mlib_s32       dnch)
551{
552  mlib_d64 *hdst1 = dst + dnch;
553  mlib_s32 i, j;
554
555  for (j = 0; j < m - 2; j += 3, src += 3 * nch, kernel += 3) {
556    const mlib_d64 *src2 = src + 2 * nch;
557    mlib_d64 hval0 = kernel[0];
558    mlib_d64 hval1 = kernel[1];
559    mlib_d64 hval2 = kernel[2];
560    mlib_d64 val0 = src[0];
561    mlib_d64 val1 = src[nch];
562    mlib_d64 hdvl = dst[0];
563
564#ifdef __SUNPRO_C
565#pragma pipeloop(0)
566#endif /* __SUNPRO_C */
567    for (i = 0; i < n; i++) {
568      mlib_d64 hdvl0 = val0 * hval0 + hdvl;
569      mlib_d64 val2 = src2[i * nch];
570
571      hdvl = hdst1[i * dnch];
572      hdvl0 += val1 * hval1;
573      hdvl0 += val2 * hval2;
574      val0 = val1;
575      val1 = val2;
576
577      dst[i * dnch] = hdvl0;
578    }
579  }
580
581  if (j < m - 1) {
582    const mlib_d64 *src2 = src + 2 * nch;
583    mlib_d64 hval0 = kernel[0];
584    mlib_d64 hval1 = kernel[1];
585    mlib_d64 val0 = src[0];
586    mlib_d64 val1 = src[nch];
587    mlib_d64 hdvl = dst[0];
588
589#ifdef __SUNPRO_C
590#pragma pipeloop(0)
591#endif /* __SUNPRO_C */
592    for (i = 0; i < n; i++) {
593      mlib_d64 hdvl0 = val0 * hval0 + hdvl;
594      mlib_d64 val2 = src2[i * nch];
595
596      hdvl = hdst1[i * dnch];
597      hdvl0 += val1 * hval1;
598      val0 = val1;
599      val1 = val2;
600
601      dst[i * dnch] = hdvl0;
602    }
603
604  }
605  else if (j < m) {
606    const mlib_d64 *src2 = src + 2 * nch;
607    mlib_d64 hval0 = kernel[0];
608    mlib_d64 val0 = src[0];
609    mlib_d64 val1 = src[nch];
610    mlib_d64 hdvl = dst[0];
611
612#ifdef __SUNPRO_C
613#pragma pipeloop(0)
614#endif /* __SUNPRO_C */
615    for (i = 0; i < n; i++) {
616      mlib_d64 hdvl0 = val0 * hval0 + hdvl;
617      mlib_d64 val2 = src2[i * nch];
618
619      hdvl = hdst1[i * dnch];
620      val0 = val1;
621      val1 = val2;
622
623      dst[i * dnch] = hdvl0;
624    }
625  }
626}
627
628/***************************************************************/
629void mlib_ImageConvMxND642D64_ext(mlib_d64       *dst,
630                                  const mlib_d64 *src,
631                                  mlib_s32       n,
632                                  mlib_s32       nch,
633                                  mlib_s32       dx_l,
634                                  mlib_s32       dx_r)
635{
636  mlib_s32 i;
637  mlib_d64 val = src[0];
638
639  for (i = 0; i < dx_l; i++)
640    dst[i] = val;
641#ifdef __SUNPRO_C
642#pragma pipeloop(0)
643#endif /* __SUNPRO_C */
644  for (; i < n - dx_r; i++)
645    dst[i] = src[nch * (i - dx_l)];
646  val = dst[n - dx_r - 1];
647  for (; i < n; i++)
648    dst[i] = val;
649}
650
651/***************************************************************/
652mlib_status mlib_convMxNext_d64(mlib_image       *dst,
653                                const mlib_image *src,
654                                const mlib_d64   *kernel,
655                                mlib_s32         m,
656                                mlib_s32         n,
657                                mlib_s32         dx_l,
658                                mlib_s32         dx_r,
659                                mlib_s32         dy_t,
660                                mlib_s32         dy_b,
661                                mlib_s32         cmask)
662{
663  mlib_d64 dspace[1024], *dsa = dspace;
664  mlib_s32 wid_e = mlib_ImageGetWidth(src);
665  mlib_d64 *da = mlib_ImageGetData(dst);
666  mlib_d64 *sa = mlib_ImageGetData(src);
667  mlib_s32 dlb = mlib_ImageGetStride(dst) >> 3;
668  mlib_s32 slb = mlib_ImageGetStride(src) >> 3;
669  mlib_s32 dw = mlib_ImageGetWidth(dst);
670  mlib_s32 dh = mlib_ImageGetHeight(dst);
671  mlib_s32 nch = mlib_ImageGetChannels(dst);
672  mlib_s32 i, j, j1, k;
673
674  if (3 * wid_e + m > 1024) {
675    dsa = mlib_malloc((3 * wid_e + m) * sizeof(mlib_d64));
676
677    if (dsa == NULL)
678      return MLIB_FAILURE;
679  }
680
681  for (j = 0; j < dh; j++, da += dlb) {
682    for (k = 0; k < nch; k++)
683      if (cmask & (1 << (nch - 1 - k))) {
684        mlib_d64 *sa1 = sa + k;
685        mlib_d64 *da1 = da + k;
686        const mlib_d64 *kernel1 = kernel;
687
688        for (i = 0; i < dw; i++)
689          da1[i * nch] = 0.;
690        for (j1 = 0; j1 < n; j1++, kernel1 += m) {
691          mlib_ImageConvMxND642D64_ext(dsa, sa1, dw + m - 1, nch, dx_l, dx_r);
692          mlib_ImageConvMxNMulAdd_D64(da1, dsa, kernel1, dw, m, 1, nch);
693
694          if ((j + j1 >= dy_t) && (j + j1 < dh + n - dy_b - 2))
695            sa1 += slb;
696        }
697      }
698
699    if ((j >= dy_t) && (j < dh + n - dy_b - 2))
700      sa += slb;
701  }
702
703  if (dsa != dspace)
704    mlib_free(dsa);
705  return MLIB_SUCCESS;
706}
707
708/***************************************************************/
709