1/*
2 * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.  Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26
27/*
28 * FUNCTIONS
29 *      mlib_ImageCopy_bit_na     - BIT, non-aligned
30 *      mlib_ImageCopy_bit_na_r   - BIT, non-aligned, reverse
31 *
32 * SYNOPSIS
33 *
34 *      void mlib_ImageCopy_bit_na(const mlib_u8 *sa,
35 *                                 mlib_u8       *da,
36 *                                 mlib_s32      size,
37 *                                 mlib_s32      s_offset,
38 *                                 mlib_s32      d_offset);
39 *      void mlib_ImageCopy_bit_na_r(const mlib_u8 *sa,
40 *                                   mlib_u8       *da,
41 *                                   mlib_s32      size,
42 *                                   mlib_s32      s_offset,
43 *                                   mlib_s32      d_offset);
44 * ARGUMENT
45 *      sp       pointer to source image data
46 *      dp       pointer to destination image data
47 *      size     size in 8-bytes, bytes, or SHORTs
48 *      width    image width in 8-bytes
49 *      height   image height in lines
50 *      stride   source image line stride in 8-bytes
51 *      dstride  destination image line stride in 8-bytes
52 *      s_offset source image line bit offset
53 *      d_offset destination image line bit offset
54 *
55 * DESCRIPTION
56 *      Direct copy from one image to another -- C version low level
57 *      functions.
58 */
59
60#include <stdlib.h>
61#include "mlib_image.h"
62#include "mlib_ImageCopy.h"
63
64/***************************************************************/
65/*
66 * Bit offsets of source and distination are not the same
67 */
68
69void mlib_ImageCopy_bit_na(const mlib_u8 *sa,
70                           mlib_u8       *da,
71                           mlib_s32      size,
72                           mlib_s32      s_offset,
73                           mlib_s32      d_offset)
74{
75#ifdef _NO_LONGLONG
76
77  mlib_u32 *dp;          /* 4-byte aligned start points in dst */
78  mlib_u32 *sp;          /* 4-byte aligned start point in src */
79  mlib_s32 j;            /* offset of address in dst */
80  mlib_u32 mask0 = 0xFFFFFFFF;
81  mlib_u32 dmask;
82  mlib_u32 src, src0, src1, dst;
83  mlib_s32 ls_offset, ld_offset, shift;
84
85  if (size <= 0) return;
86
87  /* prepare the destination addresses */
88  dp = (mlib_u32 *)((mlib_addr)da & (~3));
89  sp = (mlib_u32 *)((mlib_addr)sa & (~3));
90  ld_offset = (((mlib_addr)da & 3) << 3) + d_offset;     /* bit d_offset to first mlib_s32 */
91  ls_offset = (((mlib_addr)sa & 3) << 3) + s_offset;     /* bit d_offset to first mlib_s32 */
92
93  if (ld_offset > ls_offset) {
94    src0 = sp[0];
95    dst = dp[0];
96    if (ld_offset + size < 32) {
97      dmask = (mask0 << (32 - size)) >> ld_offset;
98#ifdef _LITTLE_ENDIAN
99      src0 = (src0 << 24) | ((src0 & 0xFF00) << 8) | ((src0 >> 8) & 0xFF00) | (src0 >> 24);
100      src = (src0 >> (ld_offset - ls_offset));
101      dst = (dst << 24) | ((dst & 0xFF00) << 8) | ((dst >> 8) & 0xFF00) | (dst >> 24);
102      dst = (dst & (~dmask)) | (src & dmask);
103      dp[0] = (dst << 24) | ((dst & 0xFF00) << 8) | ((dst >> 8) & 0xFF00) | (dst >> 24);
104#else
105      src = (src0 >> (ld_offset - ls_offset));
106      dp[0] = (dst & (~dmask)) | (src & dmask);
107#endif /* _LITTLE_ENDIAN */
108      return;
109    }
110
111    dmask = mask0 >> ld_offset;
112#ifdef _LITTLE_ENDIAN
113    src0 = (src0 << 24) | ((src0 & 0xFF00) << 8) | ((src0 >> 8) & 0xFF00) | (src0 >> 24);
114    src = (src0 >> (ld_offset - ls_offset));
115    dst = (dst << 24) | ((dst & 0xFF00) << 8) | ((dst >> 8) & 0xFF00) | (dst >> 24);
116    dst = (dst & ~dmask) | (src & dmask);
117    dp[0] = (dst << 24) | ((dst & 0xFF00) << 8) | ((dst >> 8) & 0xFF00) | (dst >> 24);
118#else
119    src = (src0 >> (ld_offset - ls_offset));
120    dp[0] = (dst & ~dmask) | (src & dmask);
121#endif /* _LITTLE_ENDIAN */
122    j = 32 - ld_offset;
123    dp++;
124    ls_offset += j;
125  } else {
126
127    shift = ls_offset - ld_offset;
128    src0 = sp[0];
129    if (ls_offset + size > 32) src1 = sp[1];
130    dst = dp[0];
131
132    if (ld_offset + size < 32) {
133      dmask = (mask0 << (32 - size)) >> ld_offset;
134#ifdef _LITTLE_ENDIAN
135      src0 = (src0 << 24) | ((src0 & 0xFF00) << 8) | ((src0 >> 8) & 0xFF00) | (src0 >> 24);
136      src1 = (src1 << 24) | ((src1 & 0xFF00) << 8) | ((src1 >> 8) & 0xFF00) | (src1 >> 24);
137      src = (src0 << shift) | (src1 >> (32 - shift));
138      dst = (dst << 24) | ((dst & 0xFF00) << 8) | ((dst >> 8) & 0xFF00) | (dst >> 24);
139      dst = (dst & ~dmask) | (src & dmask);
140      dp[0] = (dst << 24) | ((dst & 0xFF00) << 8) | ((dst >> 8) & 0xFF00) | (dst >> 24);
141#else
142      src = (src0 << shift) | (src1 >> (32 - shift));
143      dp[0] = (dst & ~dmask) | (src & dmask);
144#endif /* _LITTLE_ENDIAN */
145      return;
146    }
147
148    dmask = mask0 >> ld_offset;
149#ifdef _LITTLE_ENDIAN
150    src0 = (src0 << 24) | ((src0 & 0xFF00) << 8) | ((src0 >> 8) & 0xFF00) | (src0 >> 24);
151    src1 = (src1 << 24) | ((src1 & 0xFF00) << 8) | ((src1 >> 8) & 0xFF00) | (src1 >> 24);
152    src = (src0 << shift) | (src1 >> (32 - shift));
153    dst = (dst << 24) | ((dst & 0xFF00) << 8) | ((dst >> 8) & 0xFF00) | (dst >> 24);
154    dst = (dst & ~dmask) | (src & dmask);
155    dp[0] = (dst << 24) | ((dst & 0xFF00) << 8) | ((dst >> 8) & 0xFF00) | (dst >> 24);
156#else
157    src = (src0 << shift) | (src1 >> (32 - shift));
158    dp[0] = (dst & ~dmask) | (src & dmask);
159#endif /* _LITTLE_ENDIAN */
160    j = 32 - ld_offset;
161    dp++;
162    sp++;
163    ls_offset = ls_offset + j - 32;
164  }
165
166  if (j < size) src1 = sp[0];
167#ifdef _LITTLE_ENDIAN
168  src1 = (src1 << 24) | ((src1 & 0xFF00) << 8) | ((src1 >> 8) & 0xFF00) | (src1 >> 24);
169#endif /* _LITTLE_ENDIAN */
170  for (; j <= size - 32; j += 32) {
171    src0 = src1;
172    src1 = sp[1];
173#ifdef _LITTLE_ENDIAN
174    src1 = (src1 << 24) | ((src1 & 0xFF00) << 8) | ((src1 >> 8) & 0xFF00) | (src1 >> 24);
175    src = (src0 << ls_offset) | (src1 >> (32 - ls_offset));
176    dp[0] = (src << 24) | ((src & 0xFF00) << 8) | ((src >> 8) & 0xFF00) | (src >> 24);
177#else
178    dp[0] = (src0 << ls_offset) | (src1 >> (32 - ls_offset));
179#endif /* _LITTLE_ENDIAN */
180    sp++;
181    dp++;
182  }
183
184  if (j < size) {
185    j = size - j;
186    src0 = src1;
187    if (ls_offset + j > 32) src1 = sp[1];
188    dst = dp[0];
189    dmask = mask0 << (32 - j);
190#ifdef _LITTLE_ENDIAN
191    src1 = (src1 << 24) | ((src1 & 0xFF00) << 8) | ((src1 >> 8) & 0xFF00) | (src1 >> 24);
192    src = (src0 << ls_offset) | (src1 >> (32 - ls_offset));
193    dst = (dst << 24) | ((dst & 0xFF00) << 8) | ((dst >> 8) & 0xFF00) | (dst >> 24);
194    dst = (dst & ~dmask) | (src & dmask);
195    dp[0] = (dst << 24) | ((dst & 0xFF00) << 8) | ((dst >> 8) & 0xFF00) | (dst >> 24);
196#else
197    src = (src0 << ls_offset) | (src1 >> (32 - ls_offset));
198    dp[0] = (dst & ~dmask) | (src & dmask);
199#endif /* _LITTLE_ENDIAN */
200  }
201
202#else /* _LONGLONG */
203
204  mlib_u64 *dp;          /* 8-byte aligned start points in dst */
205  mlib_u64 *sp;          /* 8-byte aligned start point in src */
206  mlib_s32 j;            /* offset of address in dst */
207  mlib_u64 lmask0 = 0xFFFFFFFFFFFFFFFFULL;
208  mlib_u64 dmask;
209  mlib_u64 lsrc, lsrc0, lsrc1 = 0ULL, ldst;
210  mlib_s32 ls_offset, ld_offset, shift;
211
212  if (size <= 0) return;
213
214  /* prepare the destination addresses */
215  dp = (mlib_u64 *)((mlib_addr)da & (~7));
216  sp = (mlib_u64 *)((mlib_addr)sa & (~7));
217  /* we can explicitly cast ro mlib_s32 here because value is in [0,64] range */
218  ld_offset = (((mlib_s32) ((mlib_addr)da & 7)) << 3) + d_offset;     /* bit d_offset to first mlib_d64 */
219  ls_offset = (((mlib_s32) ((mlib_addr)sa & 7)) << 3) + s_offset;     /* bit d_offset to first mlib_d64 */
220
221  if (ld_offset > ls_offset) {
222    lsrc0 = sp[0];
223    ldst = dp[0];
224    lsrc = (lsrc0 >> (ld_offset - ls_offset));
225    if (ld_offset + size < 64) {
226      dmask = (lmask0 << (64 - size)) >> ld_offset;
227      dp[0] = (ldst & (~dmask)) | (lsrc & dmask);
228      return;
229    }
230
231    dmask = lmask0 >> ld_offset;
232    dp[0] = (ldst & ~dmask) | (lsrc & dmask);
233    j = 64 - ld_offset;
234    dp++;
235    ls_offset += j;
236  } else {
237
238    shift = ls_offset - ld_offset;
239    lsrc0 = sp[0];
240    if (ls_offset + size > 64) lsrc1 = sp[1];
241    ldst = dp[0];
242    lsrc = (lsrc0 << shift) | (lsrc1 >> (64 - shift));
243
244    if (ld_offset + size < 64) {
245      dmask = (lmask0 << (64 - size)) >> ld_offset;
246      dp[0] = (ldst & ~dmask) | (lsrc & dmask);
247      return;
248    }
249
250    dmask = lmask0 >> ld_offset;
251    dp[0] = (ldst & ~dmask) | (lsrc & dmask);
252    j = 64 - ld_offset;
253    dp++;
254    sp++;
255    ls_offset = ls_offset + j - 64;
256  }
257
258  if (j < size) lsrc1 = sp[0];
259#ifdef __SUNPRO_C
260#pragma pipeloop(0)
261#endif /* __SUNPRO_C */
262  for (; j <= size - 64; j += 64) {
263    lsrc0 = lsrc1;
264    lsrc1 = sp[1];
265    lsrc = (lsrc0 << ls_offset) | (lsrc1 >> (64 - ls_offset));
266    dp[0] = lsrc;
267    sp++;
268    dp++;
269  }
270
271  if (j < size) {
272    j = size - j;
273    lsrc0 = lsrc1;
274    if (ls_offset + j > 64) lsrc1 = sp[1];
275    ldst = dp[0];
276    dmask = lmask0 << (64 - j);
277    lsrc = (lsrc0 << ls_offset) | (lsrc1 >> (64 - ls_offset));
278    dp[0] = (ldst & ~dmask) | (lsrc & dmask);
279  }
280#endif /* _NO_LONGLONG */
281}
282
283/***************************************************************/
284/*
285 * Bit offsets of source and distination are not the same
286 * This function is both for C and VIS version (LONGLONG case)
287 */
288
289void mlib_ImageCopy_bit_na_r(const mlib_u8 *sa,
290                             mlib_u8       *da,
291                             mlib_s32      size,
292                             mlib_s32      s_offset,
293                             mlib_s32      d_offset)
294{
295#ifdef _NO_LONGLONG
296
297  mlib_u32 *dp;          /* 4-byte aligned start points in dst */
298  mlib_u32 *sp;          /* 4-byte aligned start point in src */
299  mlib_s32 j;            /* offset of address in dst */
300  mlib_u32 lmask0 = 0xFFFFFFFF;
301  mlib_u32 dmask;
302  mlib_u32 src, src0, src1, dst;
303  mlib_s32 ls_offset, ld_offset, shift;
304
305  if (size <= 0) return;
306
307  /* prepare the destination addresses */
308  dp = (mlib_u32 *)((mlib_addr)da & (~3));
309  sp = (mlib_u32 *)((mlib_addr)sa & (~3));
310  ld_offset = (((mlib_addr)da & 3) << 3) + d_offset;     /* bit d_offset to first mlib_s32 */
311  ls_offset = (((mlib_addr)sa & 3) << 3) + s_offset;     /* bit d_offset to first mlib_s32 */
312
313  if (ld_offset < ls_offset) {
314    src0 = sp[0];
315    dst = dp[0];
316    if (ld_offset >= size) {
317      dmask = (lmask0 << (32 - size)) >> (ld_offset - size);
318#ifdef _LITTLE_ENDIAN
319      src0 = (src0 << 24) | ((src0 & 0xFF00) << 8) | ((src0 >> 8) & 0xFF00) | (src0 >> 24);
320      src = (src0 << (ls_offset - ld_offset));
321      dst = (dst << 24) | ((dst & 0xFF00) << 8) | ((dst >> 8) & 0xFF00) | (dst >> 24);
322      dst = (dst & (~dmask)) | (src & dmask);
323      dp[0] = (dst << 24) | ((dst & 0xFF00) << 8) | ((dst >> 8) & 0xFF00) | (dst >> 24);
324#else
325      src = (src0 << (ls_offset - ld_offset));
326      dp[0] = (dst & (~dmask)) | (src & dmask);
327#endif /* _LITTLE_ENDIAN */
328      return;
329    }
330
331    dmask = lmask0 << (32 - ld_offset);
332#ifdef _LITTLE_ENDIAN
333    src0 = (src0 << 24) | ((src0 & 0xFF00) << 8) | ((src0 >> 8) & 0xFF00) | (src0 >> 24);
334    src = (src0 << (ls_offset - ld_offset));
335    dst = (dst << 24) | ((dst & 0xFF00) << 8) | ((dst >> 8) & 0xFF00) | (dst >> 24);
336    dst = (dst & ~dmask) | (src & dmask);
337    dp[0] = (dst << 24) | ((dst & 0xFF00) << 8) | ((dst >> 8) & 0xFF00) | (dst >> 24);
338#else
339    src = (src0 << (ls_offset - ld_offset));
340    dp[0] = (dst & ~dmask) | (src & dmask);
341#endif /* _LITTLE_ENDIAN */
342    j = ld_offset;
343    dp--;
344    ls_offset -= j;
345  } else {
346
347    shift = ld_offset - ls_offset;
348    src0 = sp[0];
349    if (ls_offset < size) src1 = sp[-1];
350    dst = dp[0];
351
352    if (ld_offset >= size) {
353      dmask = (lmask0 << (32 - size)) >> (ld_offset - size);
354#ifdef _LITTLE_ENDIAN
355      src0 = (src0 << 24) | ((src0 & 0xFF00) << 8) | ((src0 >> 8) & 0xFF00) | (src0 >> 24);
356      src1 = (src1 << 24) | ((src1 & 0xFF00) << 8) | ((src1 >> 8) & 0xFF00) | (src1 >> 24);
357      src = (src0 >> shift) | (src1 << (32 - shift));
358      dst = (dst << 24) | ((dst & 0xFF00) << 8) | ((dst >> 8) & 0xFF00) | (dst >> 24);
359      dst = (dst & ~dmask) | (src & dmask);
360      dp[0] = (dst << 24) | ((dst & 0xFF00) << 8) | ((dst >> 8) & 0xFF00) | (dst >> 24);
361#else
362      src = (src0 >> shift) | (src1 << (32 - shift));
363      dp[0] = (dst & ~dmask) | (src & dmask);
364#endif /* _LITTLE_ENDIAN */
365      return;
366    }
367
368    dmask = lmask0 << (32 - ld_offset);
369#ifdef _LITTLE_ENDIAN
370    src0 = (src0 << 24) | ((src0 & 0xFF00) << 8) | ((src0 >> 8) & 0xFF00) | (src0 >> 24);
371    src1 = (src1 << 24) | ((src1 & 0xFF00) << 8) | ((src1 >> 8) & 0xFF00) | (src1 >> 24);
372    src = (src0 >> shift) | (src1 << (32 - shift));
373    dst = (dst << 24) | ((dst & 0xFF00) << 8) | ((dst >> 8) & 0xFF00) | (dst >> 24);
374    dst = (dst & ~dmask) | (src & dmask);
375    dp[0] = (dst << 24) | ((dst & 0xFF00) << 8) | ((dst >> 8) & 0xFF00) | (dst >> 24);
376#else
377    src = (src0 >> shift) | (src1 << (32 - shift));
378    dp[0] = (dst & ~dmask) | (src & dmask);
379#endif /* _LITTLE_ENDIAN */
380    j = ld_offset;
381    dp--;
382    sp--;
383    ls_offset = ls_offset - j + 32;
384  }
385
386  if (j < size) src1 = sp[0];
387#ifdef _LITTLE_ENDIAN
388  src1 = (src1 << 24) | ((src1 & 0xFF00) << 8) | ((src1 >> 8) & 0xFF00) | (src1 >> 24);
389#endif /* _LITTLE_ENDIAN */
390#ifdef __SUNPRO_C
391#pragma pipeloop(0)
392#endif /* __SUNPRO_C */
393  for (; j <= size - 32; j += 32) {
394    src0 = src1;
395    src1 = sp[-1];
396#ifdef _LITTLE_ENDIAN
397    src1 = (src1 << 24) | ((src1 & 0xFF00) << 8) | ((src1 >> 8) & 0xFF00) | (src1 >> 24);
398    src = (src0 >> (32 - ls_offset)) | (src1 << ls_offset);
399    dp[0] = (src << 24) | ((src & 0xFF00) << 8) | ((src >> 8) & 0xFF00) | (src >> 24);
400#else
401    dp[0] = (src0 >> (32 - ls_offset)) | (src1 << ls_offset);
402#endif /* _LITTLE_ENDIAN */
403    sp--;
404    dp--;
405  }
406
407  if (j < size) {
408    j = size - j;
409    src0 = src1;
410    if (ls_offset < j) src1 = sp[-1];
411    dst = dp[0];
412    dmask = lmask0 >> (32 - j);
413#ifdef _LITTLE_ENDIAN
414    src1 = (src1 << 24) | ((src1 & 0xFF00) << 8) | ((src1 >> 8) & 0xFF00) | (src1 >> 24);
415    src = (src0 >> (32 - ls_offset)) | (src1 << ls_offset);
416    dst = (dst << 24) | ((dst & 0xFF00) << 8) | ((dst >> 8) & 0xFF00) | (dst >> 24);
417    dst = (dst & ~dmask) | (src & dmask);
418    dp[0] = (dst << 24) | ((dst & 0xFF00) << 8) | ((dst >> 8) & 0xFF00) | (dst >> 24);
419#else
420    src = (src0 >> (32 - ls_offset)) | (src1 << ls_offset);
421    dp[0] = (dst & ~dmask) | (src & dmask);
422#endif /* _LITTLE_ENDIAN */
423  }
424
425#else  /* _LONGLONG */
426
427  mlib_u64 *dp;          /* 8-byte aligned start points in dst */
428  mlib_u64 *sp;          /* 8-byte aligned start point in src */
429  mlib_s32 j;            /* offset of address in dst */
430  mlib_u64 lmask0 = 0xFFFFFFFFFFFFFFFFULL;
431  mlib_u64 dmask;
432  mlib_u64 lsrc, lsrc0, lsrc1 = 0ULL, ldst;
433  mlib_s32 ls_offset, ld_offset, shift;
434
435  if (size <= 0) return;
436
437  /* prepare the destination addresses */
438  dp = (mlib_u64 *)((mlib_addr)da & (~7));
439  sp = (mlib_u64 *)((mlib_addr)sa & (~7));
440  /* we can explicitly cast ro mlib_s32 here because value is in [0,64] range */
441  ld_offset = (((mlib_s32) ((mlib_addr)da & 7)) << 3) + d_offset;     /* bit d_offset to first mlib_d64 */
442  ls_offset = (((mlib_s32) ((mlib_addr)sa & 7)) << 3) + s_offset;     /* bit d_offset to first mlib_d64 */
443
444  if (ld_offset < ls_offset) {
445    lsrc0 = sp[0];
446    ldst = dp[0];
447    lsrc = (lsrc0 << (ls_offset - ld_offset));
448    if (ld_offset >= size) {
449      dmask = (lmask0 << (64 - size)) >> (ld_offset - size);
450      dp[0] = (ldst & (~dmask)) | (lsrc & dmask);
451      return;
452    }
453
454    dmask = lmask0 << (64 - ld_offset);
455    dp[0] = (ldst & ~dmask) | (lsrc & dmask);
456    j = ld_offset;
457    dp--;
458    ls_offset -= j;
459  } else {
460
461    shift = ld_offset - ls_offset;
462    lsrc0 = sp[0];
463    if (ls_offset < size) lsrc1 = sp[-1];
464    ldst = dp[0];
465    lsrc = (lsrc0 >> shift) | (lsrc1 << (64 - shift));
466    if (ld_offset >= size) {
467      dmask = (lmask0 << (64 - size)) >> (ld_offset - size);
468      dp[0] = (ldst & ~dmask) | (lsrc & dmask);
469      return;
470    }
471
472    dmask = lmask0 << (64 - ld_offset);
473    dp[0] = (ldst & ~dmask) | (lsrc & dmask);
474    j = ld_offset;
475    dp--;
476    sp--;
477    ls_offset = ls_offset - j + 64;
478  }
479
480  if (j < size) lsrc1 = sp[0];
481#ifdef __SUNPRO_C
482#pragma pipeloop(0)
483#endif /* __SUNPRO_C */
484  for (; j <= size - 64; j += 64) {
485    lsrc0 = lsrc1;
486    lsrc1 = sp[-1];
487    dp[0] = (lsrc0 >> (64 - ls_offset)) | (lsrc1 << ls_offset);
488    sp--;
489    dp--;
490  }
491
492  if (j < size) {
493    j = size - j;
494    lsrc0 = lsrc1;
495    if (ls_offset < j) lsrc1 = sp[-1];
496    ldst = dp[0];
497    dmask = lmask0 >> (64 - j);
498    lsrc = (lsrc0 >> (64 - ls_offset)) | (lsrc1 << ls_offset);
499    dp[0] = (ldst & ~dmask) | (lsrc & dmask);
500  }
501#endif /* _NO_LONGLONG */
502}
503
504/***************************************************************/
505