1296177Sjhibbits/* Copyright (c) 2008-2011 Freescale Semiconductor, Inc.
2296177Sjhibbits * All rights reserved.
3296177Sjhibbits *
4296177Sjhibbits * Redistribution and use in source and binary forms, with or without
5296177Sjhibbits * modification, are permitted provided that the following conditions are met:
6296177Sjhibbits *     * Redistributions of source code must retain the above copyright
7296177Sjhibbits *       notice, this list of conditions and the following disclaimer.
8296177Sjhibbits *     * Redistributions in binary form must reproduce the above copyright
9296177Sjhibbits *       notice, this list of conditions and the following disclaimer in the
10296177Sjhibbits *       documentation and/or other materials provided with the distribution.
11296177Sjhibbits *     * Neither the name of Freescale Semiconductor nor the
12296177Sjhibbits *       names of its contributors may be used to endorse or promote products
13296177Sjhibbits *       derived from this software without specific prior written permission.
14296177Sjhibbits *
15296177Sjhibbits *
16296177Sjhibbits * ALTERNATIVELY, this software may be distributed under the terms of the
17296177Sjhibbits * GNU General Public License ("GPL") as published by the Free Software
18296177Sjhibbits * Foundation, either version 2 of that License or (at your option) any
19296177Sjhibbits * later version.
20296177Sjhibbits *
21296177Sjhibbits * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
22296177Sjhibbits * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
23296177Sjhibbits * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
24296177Sjhibbits * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
25296177Sjhibbits * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
26296177Sjhibbits * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27296177Sjhibbits * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
28296177Sjhibbits * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29296177Sjhibbits * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
30296177Sjhibbits * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31296177Sjhibbits */
32296177Sjhibbits
33296177Sjhibbits
34296177Sjhibbits#include "std_ext.h"
35296177Sjhibbits#include "xx_ext.h"
36296177Sjhibbits#include "memcpy_ext.h"
37296177Sjhibbits
38296177Sjhibbits
39296177Sjhibbits#ifdef CORE_8BIT_ACCESS_ERRATA
40296177Sjhibbitsstatic void MY_MY_WRITE_UINT8(uint8_t *addr, uint8_t val)
41296177Sjhibbits{
42296177Sjhibbits    uint32_t newAddr, newVal;
43296177Sjhibbits    newAddr = (uint32_t)addr & ~0x3L;
44296177Sjhibbits    switch ((uint32_t)addr%4)
45296177Sjhibbits    {
46296177Sjhibbits    case (0):
47296177Sjhibbits        newVal = GET_UINT32(*(uint32_t*)newAddr);
48296177Sjhibbits        newVal = (newVal & 0x00ffffff) | (((uint32_t)val)<<24);
49296177Sjhibbits        WRITE_UINT32(*(uint32_t*)newAddr, newVal);
50296177Sjhibbits        break;
51296177Sjhibbits    case (1):
52296177Sjhibbits         newVal = GET_UINT32(*(uint32_t*)newAddr);
53296177Sjhibbits        newVal = (newVal & 0xff00ffff) | (((uint32_t)val)<<16);
54296177Sjhibbits        WRITE_UINT32(*(uint32_t*)newAddr, newVal);
55296177Sjhibbits        break;
56296177Sjhibbits    case (2):
57296177Sjhibbits        newVal = GET_UINT32(*(uint32_t*)newAddr);
58296177Sjhibbits        newVal = (newVal & 0xffff00ff) | (((uint32_t)val)<<8);
59296177Sjhibbits        WRITE_UINT32(*(uint32_t*)newAddr, newVal);
60296177Sjhibbits        break;
61296177Sjhibbits    case (3):
62296177Sjhibbits        newVal = GET_UINT32(*(uint32_t*)newAddr);
63296177Sjhibbits        newVal = (newVal & 0xffffff00) | val;
64296177Sjhibbits        WRITE_UINT32(*(uint32_t*)newAddr, newVal);
65296177Sjhibbits        break;
66296177Sjhibbits    }
67296177Sjhibbits}
68296177Sjhibbits
69296177Sjhibbitsstatic uint8_t MY_MY_GET_UINT8(uint8_t *addr)
70296177Sjhibbits{
71296177Sjhibbits    uint32_t newAddr, newVal=0;
72296177Sjhibbits    newAddr = (uint32_t)addr & ~0x3L;
73296177Sjhibbits    switch ((uint32_t)addr%4)
74296177Sjhibbits    {
75296177Sjhibbits    case (0):
76296177Sjhibbits        newVal = GET_UINT32(*(uint32_t*)newAddr);
77296177Sjhibbits        newVal = (newVal & 0xff000000)>>24;
78296177Sjhibbits        break;
79296177Sjhibbits    case (1):
80296177Sjhibbits        newVal = GET_UINT32(*(uint32_t*)newAddr);
81296177Sjhibbits        newVal = (newVal & 0x00ff0000)>>16;
82296177Sjhibbits        break;
83296177Sjhibbits    case (2):
84296177Sjhibbits        newVal = GET_UINT32(*(uint32_t*)newAddr);
85296177Sjhibbits        newVal = (newVal & 0x0000ff00)>>8;
86296177Sjhibbits        break;
87296177Sjhibbits    case (3):
88296177Sjhibbits        newVal = GET_UINT32(*(uint32_t*)newAddr);
89296177Sjhibbits        newVal = (newVal & 0x000000ff);
90296177Sjhibbits        break;
91296177Sjhibbits    }
92296177Sjhibbits
93296177Sjhibbits    return (uint8_t)newVal;
94296177Sjhibbits}
95296177Sjhibbits
96296177Sjhibbits#define MY_WRITE_UINT8(addr,val) MY_MY_WRITE_UINT8(&addr,val)
97296177Sjhibbits#define MY_GET_UINT8(addr) MY_MY_GET_UINT8(&addr)
98296177Sjhibbits#else
99296177Sjhibbits#define MY_WRITE_UINT8 WRITE_UINT8
100296177Sjhibbits#define MY_GET_UINT8   GET_UINT8
101296177Sjhibbits#endif /* CORE_8BIT_ACCESS_ERRATA */
102296177Sjhibbits
103296177Sjhibbits
104296177Sjhibbitsvoid * MemCpy32(void* pDst,void* pSrc, uint32_t size)
105296177Sjhibbits{
106296177Sjhibbits    uint32_t leftAlign;
107296177Sjhibbits    uint32_t rightAlign;
108296177Sjhibbits    uint32_t lastWord;
109296177Sjhibbits    uint32_t currWord;
110296177Sjhibbits    uint32_t *p_Src32;
111296177Sjhibbits    uint32_t *p_Dst32;
112296177Sjhibbits    uint8_t  *p_Src8;
113296177Sjhibbits    uint8_t  *p_Dst8;
114296177Sjhibbits
115296177Sjhibbits    p_Src8 = (uint8_t*)(pSrc);
116296177Sjhibbits    p_Dst8 = (uint8_t*)(pDst);
117296177Sjhibbits    /* first copy byte by byte till the source first alignment
118296177Sjhibbits     * this step is necessary to ensure we do not even try to access
119296177Sjhibbits     * data which is before the source buffer, hence it is not ours.
120296177Sjhibbits     */
121296177Sjhibbits    while((PTR_TO_UINT(p_Src8) & 3) && size) /* (pSrc mod 4) > 0 and size > 0 */
122296177Sjhibbits    {
123296177Sjhibbits        *p_Dst8++ = *p_Src8++;
124296177Sjhibbits        size--;
125296177Sjhibbits    }
126296177Sjhibbits
127296177Sjhibbits    /* align destination (possibly disaligning source)*/
128296177Sjhibbits    while((PTR_TO_UINT(p_Dst8) & 3) && size) /* (pDst mod 4) > 0 and size > 0 */
129296177Sjhibbits    {
130296177Sjhibbits        *p_Dst8++ = *p_Src8++;
131296177Sjhibbits        size--;
132296177Sjhibbits    }
133296177Sjhibbits
134296177Sjhibbits    /* dest is aligned and source is not necessarily aligned */
135296177Sjhibbits    leftAlign = (uint32_t)((PTR_TO_UINT(p_Src8) & 3) << 3); /* leftAlign = (pSrc mod 4)*8 */
136296177Sjhibbits    rightAlign = 32 - leftAlign;
137296177Sjhibbits
138296177Sjhibbits
139296177Sjhibbits    if (leftAlign == 0)
140296177Sjhibbits    {
141296177Sjhibbits        /* source is also aligned */
142296177Sjhibbits        p_Src32 = (uint32_t*)(p_Src8);
143296177Sjhibbits        p_Dst32 = (uint32_t*)(p_Dst8);
144296177Sjhibbits        while (size >> 2) /* size >= 4 */
145296177Sjhibbits        {
146296177Sjhibbits            *p_Dst32++ = *p_Src32++;
147296177Sjhibbits            size -= 4;
148296177Sjhibbits        }
149296177Sjhibbits        p_Src8 = (uint8_t*)(p_Src32);
150296177Sjhibbits        p_Dst8 = (uint8_t*)(p_Dst32);
151296177Sjhibbits    }
152296177Sjhibbits    else
153296177Sjhibbits    {
154296177Sjhibbits        /* source is not aligned (destination is aligned)*/
155296177Sjhibbits        p_Src32 = (uint32_t*)(p_Src8 - (leftAlign >> 3));
156296177Sjhibbits        p_Dst32 = (uint32_t*)(p_Dst8);
157296177Sjhibbits        lastWord = *p_Src32++;
158296177Sjhibbits        while(size >> 3) /* size >= 8 */
159296177Sjhibbits        {
160296177Sjhibbits            currWord = *p_Src32;
161296177Sjhibbits            *p_Dst32 = (lastWord << leftAlign) | (currWord >> rightAlign);
162296177Sjhibbits            lastWord = currWord;
163296177Sjhibbits            p_Src32++;
164296177Sjhibbits            p_Dst32++;
165296177Sjhibbits            size -= 4;
166296177Sjhibbits        }
167296177Sjhibbits        p_Dst8 = (uint8_t*)(p_Dst32);
168296177Sjhibbits        p_Src8 = (uint8_t*)(p_Src32) - 4 + (leftAlign >> 3);
169296177Sjhibbits    }
170296177Sjhibbits
171296177Sjhibbits    /* complete the left overs */
172296177Sjhibbits    while (size--)
173296177Sjhibbits        *p_Dst8++ = *p_Src8++;
174296177Sjhibbits
175296177Sjhibbits    return pDst;
176296177Sjhibbits}
177296177Sjhibbits
178296177Sjhibbitsvoid * IO2IOCpy32(void* pDst,void* pSrc, uint32_t size)
179296177Sjhibbits{
180296177Sjhibbits    uint32_t leftAlign;
181296177Sjhibbits    uint32_t rightAlign;
182296177Sjhibbits    uint32_t lastWord;
183296177Sjhibbits    uint32_t currWord;
184296177Sjhibbits    uint32_t *p_Src32;
185296177Sjhibbits    uint32_t *p_Dst32;
186296177Sjhibbits    uint8_t  *p_Src8;
187296177Sjhibbits    uint8_t  *p_Dst8;
188296177Sjhibbits
189296177Sjhibbits    p_Src8 = (uint8_t*)(pSrc);
190296177Sjhibbits    p_Dst8 = (uint8_t*)(pDst);
191296177Sjhibbits    /* first copy byte by byte till the source first alignment
192296177Sjhibbits     * this step is necessary to ensure we do not even try to access
193296177Sjhibbits     * data which is before the source buffer, hence it is not ours.
194296177Sjhibbits     */
195296177Sjhibbits    while((PTR_TO_UINT(p_Src8) & 3) && size) /* (pSrc mod 4) > 0 and size > 0 */
196296177Sjhibbits    {
197296177Sjhibbits        MY_WRITE_UINT8(*p_Dst8, MY_GET_UINT8(*p_Src8));
198296177Sjhibbits        p_Dst8++;p_Src8++;
199296177Sjhibbits        size--;
200296177Sjhibbits    }
201296177Sjhibbits
202296177Sjhibbits    /* align destination (possibly disaligning source)*/
203296177Sjhibbits    while((PTR_TO_UINT(p_Dst8) & 3) && size) /* (pDst mod 4) > 0 and size > 0 */
204296177Sjhibbits    {
205296177Sjhibbits        MY_WRITE_UINT8(*p_Dst8, MY_GET_UINT8(*p_Src8));
206296177Sjhibbits        p_Dst8++;p_Src8++;
207296177Sjhibbits        size--;
208296177Sjhibbits    }
209296177Sjhibbits
210296177Sjhibbits    /* dest is aligned and source is not necessarily aligned */
211296177Sjhibbits    leftAlign = (uint32_t)((PTR_TO_UINT(p_Src8) & 3) << 3); /* leftAlign = (pSrc mod 4)*8 */
212296177Sjhibbits    rightAlign = 32 - leftAlign;
213296177Sjhibbits
214296177Sjhibbits    if (leftAlign == 0)
215296177Sjhibbits    {
216296177Sjhibbits        /* source is also aligned */
217296177Sjhibbits        p_Src32 = (uint32_t*)(p_Src8);
218296177Sjhibbits        p_Dst32 = (uint32_t*)(p_Dst8);
219296177Sjhibbits        while (size >> 2) /* size >= 4 */
220296177Sjhibbits        {
221296177Sjhibbits            WRITE_UINT32(*p_Dst32, GET_UINT32(*p_Src32));
222296177Sjhibbits            p_Dst32++;p_Src32++;
223296177Sjhibbits            size -= 4;
224296177Sjhibbits        }
225296177Sjhibbits        p_Src8 = (uint8_t*)(p_Src32);
226296177Sjhibbits        p_Dst8 = (uint8_t*)(p_Dst32);
227296177Sjhibbits    }
228296177Sjhibbits    else
229296177Sjhibbits    {
230296177Sjhibbits        /* source is not aligned (destination is aligned)*/
231296177Sjhibbits        p_Src32 = (uint32_t*)(p_Src8 - (leftAlign >> 3));
232296177Sjhibbits        p_Dst32 = (uint32_t*)(p_Dst8);
233296177Sjhibbits        lastWord = GET_UINT32(*p_Src32);
234296177Sjhibbits        p_Src32++;
235296177Sjhibbits        while(size >> 3) /* size >= 8 */
236296177Sjhibbits        {
237296177Sjhibbits            currWord = GET_UINT32(*p_Src32);
238296177Sjhibbits            WRITE_UINT32(*p_Dst32, (lastWord << leftAlign) | (currWord >> rightAlign));
239296177Sjhibbits            lastWord = currWord;
240296177Sjhibbits            p_Src32++;p_Dst32++;
241296177Sjhibbits            size -= 4;
242296177Sjhibbits        }
243296177Sjhibbits        p_Dst8 = (uint8_t*)(p_Dst32);
244296177Sjhibbits        p_Src8 = (uint8_t*)(p_Src32) - 4 + (leftAlign >> 3);
245296177Sjhibbits    }
246296177Sjhibbits
247296177Sjhibbits    /* complete the left overs */
248296177Sjhibbits    while (size--)
249296177Sjhibbits    {
250296177Sjhibbits        MY_WRITE_UINT8(*p_Dst8, MY_GET_UINT8(*p_Src8));
251296177Sjhibbits        p_Dst8++;p_Src8++;
252296177Sjhibbits    }
253296177Sjhibbits
254296177Sjhibbits    return pDst;
255296177Sjhibbits}
256296177Sjhibbits
257296177Sjhibbitsvoid * Mem2IOCpy32(void* pDst,void* pSrc, uint32_t size)
258296177Sjhibbits{
259296177Sjhibbits    uint32_t leftAlign;
260296177Sjhibbits    uint32_t rightAlign;
261296177Sjhibbits    uint32_t lastWord;
262296177Sjhibbits    uint32_t currWord;
263296177Sjhibbits    uint32_t *p_Src32;
264296177Sjhibbits    uint32_t *p_Dst32;
265296177Sjhibbits    uint8_t  *p_Src8;
266296177Sjhibbits    uint8_t  *p_Dst8;
267296177Sjhibbits
268296177Sjhibbits    p_Src8 = (uint8_t*)(pSrc);
269296177Sjhibbits    p_Dst8 = (uint8_t*)(pDst);
270296177Sjhibbits    /* first copy byte by byte till the source first alignment
271296177Sjhibbits     * this step is necessary to ensure we do not even try to access
272296177Sjhibbits     * data which is before the source buffer, hence it is not ours.
273296177Sjhibbits     */
274296177Sjhibbits    while((PTR_TO_UINT(p_Src8) & 3) && size) /* (pSrc mod 4) > 0 and size > 0 */
275296177Sjhibbits    {
276296177Sjhibbits        MY_WRITE_UINT8(*p_Dst8, *p_Src8);
277296177Sjhibbits        p_Dst8++;p_Src8++;
278296177Sjhibbits        size--;
279296177Sjhibbits    }
280296177Sjhibbits
281296177Sjhibbits    /* align destination (possibly disaligning source)*/
282296177Sjhibbits    while((PTR_TO_UINT(p_Dst8) & 3) && size) /* (pDst mod 4) > 0 and size > 0 */
283296177Sjhibbits    {
284296177Sjhibbits        MY_WRITE_UINT8(*p_Dst8, *p_Src8);
285296177Sjhibbits        p_Dst8++;p_Src8++;
286296177Sjhibbits        size--;
287296177Sjhibbits    }
288296177Sjhibbits
289296177Sjhibbits    /* dest is aligned and source is not necessarily aligned */
290296177Sjhibbits    leftAlign = (uint32_t)((PTR_TO_UINT(p_Src8) & 3) << 3); /* leftAlign = (pSrc mod 4)*8 */
291296177Sjhibbits    rightAlign = 32 - leftAlign;
292296177Sjhibbits
293296177Sjhibbits    if (leftAlign == 0)
294296177Sjhibbits    {
295296177Sjhibbits        /* source is also aligned */
296296177Sjhibbits        p_Src32 = (uint32_t*)(p_Src8);
297296177Sjhibbits        p_Dst32 = (uint32_t*)(p_Dst8);
298296177Sjhibbits        while (size >> 2) /* size >= 4 */
299296177Sjhibbits        {
300296177Sjhibbits            WRITE_UINT32(*p_Dst32, *p_Src32);
301296177Sjhibbits            p_Dst32++;p_Src32++;
302296177Sjhibbits            size -= 4;
303296177Sjhibbits        }
304296177Sjhibbits        p_Src8 = (uint8_t*)(p_Src32);
305296177Sjhibbits        p_Dst8 = (uint8_t*)(p_Dst32);
306296177Sjhibbits    }
307296177Sjhibbits    else
308296177Sjhibbits    {
309296177Sjhibbits        /* source is not aligned (destination is aligned)*/
310296177Sjhibbits        p_Src32 = (uint32_t*)(p_Src8 - (leftAlign >> 3));
311296177Sjhibbits        p_Dst32 = (uint32_t*)(p_Dst8);
312296177Sjhibbits        lastWord = *p_Src32++;
313296177Sjhibbits        while(size >> 3) /* size >= 8 */
314296177Sjhibbits        {
315296177Sjhibbits            currWord = *p_Src32;
316296177Sjhibbits            WRITE_UINT32(*p_Dst32, (lastWord << leftAlign) | (currWord >> rightAlign));
317296177Sjhibbits            lastWord = currWord;
318296177Sjhibbits            p_Src32++;p_Dst32++;
319296177Sjhibbits            size -= 4;
320296177Sjhibbits        }
321296177Sjhibbits        p_Dst8 = (uint8_t*)(p_Dst32);
322296177Sjhibbits        p_Src8 = (uint8_t*)(p_Src32) - 4 + (leftAlign >> 3);
323296177Sjhibbits    }
324296177Sjhibbits
325296177Sjhibbits    /* complete the left overs */
326296177Sjhibbits    while (size--)
327296177Sjhibbits    {
328296177Sjhibbits        MY_WRITE_UINT8(*p_Dst8, *p_Src8);
329296177Sjhibbits        p_Dst8++;p_Src8++;
330296177Sjhibbits    }
331296177Sjhibbits
332296177Sjhibbits    return pDst;
333296177Sjhibbits}
334296177Sjhibbits
335296177Sjhibbitsvoid * IO2MemCpy32(void* pDst,void* pSrc, uint32_t size)
336296177Sjhibbits{
337296177Sjhibbits    uint32_t leftAlign;
338296177Sjhibbits    uint32_t rightAlign;
339296177Sjhibbits    uint32_t lastWord;
340296177Sjhibbits    uint32_t currWord;
341296177Sjhibbits    uint32_t *p_Src32;
342296177Sjhibbits    uint32_t *p_Dst32;
343296177Sjhibbits    uint8_t  *p_Src8;
344296177Sjhibbits    uint8_t  *p_Dst8;
345296177Sjhibbits
346296177Sjhibbits    p_Src8 = (uint8_t*)(pSrc);
347296177Sjhibbits    p_Dst8 = (uint8_t*)(pDst);
348296177Sjhibbits    /* first copy byte by byte till the source first alignment
349296177Sjhibbits     * this step is necessary to ensure we do not even try to access
350296177Sjhibbits     * data which is before the source buffer, hence it is not ours.
351296177Sjhibbits     */
352296177Sjhibbits    while((PTR_TO_UINT(p_Src8) & 3) && size) /* (pSrc mod 4) > 0 and size > 0 */
353296177Sjhibbits    {
354296177Sjhibbits        *p_Dst8 = MY_GET_UINT8(*p_Src8);
355296177Sjhibbits        p_Dst8++;p_Src8++;
356296177Sjhibbits        size--;
357296177Sjhibbits    }
358296177Sjhibbits
359296177Sjhibbits    /* align destination (possibly disaligning source)*/
360296177Sjhibbits    while((PTR_TO_UINT(p_Dst8) & 3) && size) /* (pDst mod 4) > 0 and size > 0 */
361296177Sjhibbits    {
362296177Sjhibbits        *p_Dst8 = MY_GET_UINT8(*p_Src8);
363296177Sjhibbits        p_Dst8++;p_Src8++;
364296177Sjhibbits        size--;
365296177Sjhibbits    }
366296177Sjhibbits
367296177Sjhibbits    /* dest is aligned and source is not necessarily aligned */
368296177Sjhibbits    leftAlign = (uint32_t)((PTR_TO_UINT(p_Src8) & 3) << 3); /* leftAlign = (pSrc mod 4)*8 */
369296177Sjhibbits    rightAlign = 32 - leftAlign;
370296177Sjhibbits
371296177Sjhibbits    if (leftAlign == 0)
372296177Sjhibbits    {
373296177Sjhibbits        /* source is also aligned */
374296177Sjhibbits        p_Src32 = (uint32_t*)(p_Src8);
375296177Sjhibbits        p_Dst32 = (uint32_t*)(p_Dst8);
376296177Sjhibbits        while (size >> 2) /* size >= 4 */
377296177Sjhibbits        {
378296177Sjhibbits            *p_Dst32 = GET_UINT32(*p_Src32);
379296177Sjhibbits            p_Dst32++;p_Src32++;
380296177Sjhibbits            size -= 4;
381296177Sjhibbits        }
382296177Sjhibbits        p_Src8 = (uint8_t*)(p_Src32);
383296177Sjhibbits        p_Dst8 = (uint8_t*)(p_Dst32);
384296177Sjhibbits    }
385296177Sjhibbits    else
386296177Sjhibbits    {
387296177Sjhibbits        /* source is not aligned (destination is aligned)*/
388296177Sjhibbits        p_Src32 = (uint32_t*)(p_Src8 - (leftAlign >> 3));
389296177Sjhibbits        p_Dst32 = (uint32_t*)(p_Dst8);
390296177Sjhibbits        lastWord = GET_UINT32(*p_Src32);
391296177Sjhibbits        p_Src32++;
392296177Sjhibbits        while(size >> 3) /* size >= 8 */
393296177Sjhibbits        {
394296177Sjhibbits            currWord = GET_UINT32(*p_Src32);
395296177Sjhibbits            *p_Dst32 = (lastWord << leftAlign) | (currWord >> rightAlign);
396296177Sjhibbits            lastWord = currWord;
397296177Sjhibbits            p_Src32++;p_Dst32++;
398296177Sjhibbits            size -= 4;
399296177Sjhibbits        }
400296177Sjhibbits        p_Dst8 = (uint8_t*)(p_Dst32);
401296177Sjhibbits        p_Src8 = (uint8_t*)(p_Src32) - 4 + (leftAlign >> 3);
402296177Sjhibbits    }
403296177Sjhibbits
404296177Sjhibbits    /* complete the left overs */
405296177Sjhibbits    while (size--)
406296177Sjhibbits    {
407296177Sjhibbits        *p_Dst8 = MY_GET_UINT8(*p_Src8);
408296177Sjhibbits        p_Dst8++;p_Src8++;
409296177Sjhibbits    }
410296177Sjhibbits
411296177Sjhibbits    return pDst;
412296177Sjhibbits}
413296177Sjhibbits
414296177Sjhibbitsvoid * MemCpy64(void* pDst,void* pSrc, uint32_t size)
415296177Sjhibbits{
416296177Sjhibbits    uint32_t leftAlign;
417296177Sjhibbits    uint32_t rightAlign;
418296177Sjhibbits    uint64_t lastWord;
419296177Sjhibbits    uint64_t currWord;
420296177Sjhibbits    uint64_t *pSrc64;
421296177Sjhibbits    uint64_t *pDst64;
422296177Sjhibbits    uint8_t  *p_Src8;
423296177Sjhibbits    uint8_t  *p_Dst8;
424296177Sjhibbits
425296177Sjhibbits    p_Src8 = (uint8_t*)(pSrc);
426296177Sjhibbits    p_Dst8 = (uint8_t*)(pDst);
427296177Sjhibbits    /* first copy byte by byte till the source first alignment
428296177Sjhibbits     * this step is necessarily to ensure we do not even try to access
429296177Sjhibbits     * data which is before the source buffer, hence it is not ours.
430296177Sjhibbits     */
431296177Sjhibbits    while((PTR_TO_UINT(p_Src8) & 7) && size) /* (pSrc mod 8) > 0 and size > 0 */
432296177Sjhibbits    {
433296177Sjhibbits        *p_Dst8++ = *p_Src8++;
434296177Sjhibbits        size--;
435296177Sjhibbits    }
436296177Sjhibbits
437296177Sjhibbits    /* align destination (possibly disaligning source)*/
438296177Sjhibbits    while((PTR_TO_UINT(p_Dst8) & 7) && size) /* (pDst mod 8) > 0 and size > 0 */
439296177Sjhibbits    {
440296177Sjhibbits        *p_Dst8++ = *p_Src8++;
441296177Sjhibbits        size--;
442296177Sjhibbits    }
443296177Sjhibbits
444296177Sjhibbits    /* dest is aligned and source is not necessarily aligned */
445296177Sjhibbits    leftAlign = (uint32_t)((PTR_TO_UINT(p_Src8) & 7) << 3); /* leftAlign = (pSrc mod 8)*8 */
446296177Sjhibbits    rightAlign = 64 - leftAlign;
447296177Sjhibbits
448296177Sjhibbits
449296177Sjhibbits    if (leftAlign == 0)
450296177Sjhibbits    {
451296177Sjhibbits        /* source is also aligned */
452296177Sjhibbits        pSrc64 = (uint64_t*)(p_Src8);
453296177Sjhibbits        pDst64 = (uint64_t*)(p_Dst8);
454296177Sjhibbits        while (size >> 3) /* size >= 8 */
455296177Sjhibbits        {
456296177Sjhibbits            *pDst64++ = *pSrc64++;
457296177Sjhibbits            size -= 8;
458296177Sjhibbits        }
459296177Sjhibbits        p_Src8 = (uint8_t*)(pSrc64);
460296177Sjhibbits        p_Dst8 = (uint8_t*)(pDst64);
461296177Sjhibbits    }
462296177Sjhibbits    else
463296177Sjhibbits    {
464296177Sjhibbits        /* source is not aligned (destination is aligned)*/
465296177Sjhibbits        pSrc64 = (uint64_t*)(p_Src8 - (leftAlign >> 3));
466296177Sjhibbits        pDst64 = (uint64_t*)(p_Dst8);
467296177Sjhibbits        lastWord = *pSrc64++;
468296177Sjhibbits        while(size >> 4) /* size >= 16 */
469296177Sjhibbits        {
470296177Sjhibbits            currWord = *pSrc64;
471296177Sjhibbits            *pDst64 = (lastWord << leftAlign) | (currWord >> rightAlign);
472296177Sjhibbits            lastWord = currWord;
473296177Sjhibbits            pSrc64++;
474296177Sjhibbits            pDst64++;
475296177Sjhibbits            size -= 8;
476296177Sjhibbits        }
477296177Sjhibbits        p_Dst8 = (uint8_t*)(pDst64);
478296177Sjhibbits        p_Src8 = (uint8_t*)(pSrc64) - 8 + (leftAlign >> 3);
479296177Sjhibbits    }
480296177Sjhibbits
481296177Sjhibbits    /* complete the left overs */
482296177Sjhibbits    while (size--)
483296177Sjhibbits        *p_Dst8++ = *p_Src8++;
484296177Sjhibbits
485296177Sjhibbits    return pDst;
486296177Sjhibbits}
487296177Sjhibbits
488296177Sjhibbitsvoid * MemSet32(void* pDst, uint8_t val, uint32_t size)
489296177Sjhibbits{
490296177Sjhibbits    uint32_t val32;
491296177Sjhibbits    uint32_t *p_Dst32;
492296177Sjhibbits    uint8_t  *p_Dst8;
493296177Sjhibbits
494296177Sjhibbits    p_Dst8 = (uint8_t*)(pDst);
495296177Sjhibbits
496296177Sjhibbits    /* generate four 8-bit val's in 32-bit container */
497296177Sjhibbits    val32  = (uint32_t) val;
498296177Sjhibbits    val32 |= (val32 <<  8);
499296177Sjhibbits    val32 |= (val32 << 16);
500296177Sjhibbits
501296177Sjhibbits    /* align destination to 32 */
502296177Sjhibbits    while((PTR_TO_UINT(p_Dst8) & 3) && size) /* (pDst mod 4) > 0 and size > 0 */
503296177Sjhibbits    {
504296177Sjhibbits        *p_Dst8++ = val;
505296177Sjhibbits        size--;
506296177Sjhibbits    }
507296177Sjhibbits
508296177Sjhibbits    /* 32-bit chunks */
509296177Sjhibbits    p_Dst32 = (uint32_t*)(p_Dst8);
510296177Sjhibbits    while (size >> 2) /* size >= 4 */
511296177Sjhibbits    {
512296177Sjhibbits        *p_Dst32++ = val32;
513296177Sjhibbits        size -= 4;
514296177Sjhibbits    }
515296177Sjhibbits
516296177Sjhibbits    /* complete the leftovers */
517296177Sjhibbits    p_Dst8 = (uint8_t*)(p_Dst32);
518296177Sjhibbits    while (size--)
519296177Sjhibbits        *p_Dst8++ = val;
520296177Sjhibbits
521296177Sjhibbits    return pDst;
522296177Sjhibbits}
523296177Sjhibbits
524296177Sjhibbitsvoid * IOMemSet32(void* pDst, uint8_t val, uint32_t size)
525296177Sjhibbits{
526296177Sjhibbits    uint32_t val32;
527296177Sjhibbits    uint32_t *p_Dst32;
528296177Sjhibbits    uint8_t  *p_Dst8;
529296177Sjhibbits
530296177Sjhibbits    p_Dst8 = (uint8_t*)(pDst);
531296177Sjhibbits
532296177Sjhibbits    /* generate four 8-bit val's in 32-bit container */
533296177Sjhibbits    val32  = (uint32_t) val;
534296177Sjhibbits    val32 |= (val32 <<  8);
535296177Sjhibbits    val32 |= (val32 << 16);
536296177Sjhibbits
537296177Sjhibbits    /* align destination to 32 */
538296177Sjhibbits    while((PTR_TO_UINT(p_Dst8) & 3) && size) /* (pDst mod 4) > 0 and size > 0 */
539296177Sjhibbits    {
540296177Sjhibbits        MY_WRITE_UINT8(*p_Dst8, val);
541296177Sjhibbits        p_Dst8++;
542296177Sjhibbits        size--;
543296177Sjhibbits    }
544296177Sjhibbits
545296177Sjhibbits    /* 32-bit chunks */
546296177Sjhibbits    p_Dst32 = (uint32_t*)(p_Dst8);
547296177Sjhibbits    while (size >> 2) /* size >= 4 */
548296177Sjhibbits    {
549296177Sjhibbits        WRITE_UINT32(*p_Dst32, val32);
550296177Sjhibbits        p_Dst32++;
551296177Sjhibbits        size -= 4;
552296177Sjhibbits    }
553296177Sjhibbits
554296177Sjhibbits    /* complete the leftovers */
555296177Sjhibbits    p_Dst8 = (uint8_t*)(p_Dst32);
556296177Sjhibbits    while (size--)
557296177Sjhibbits    {
558296177Sjhibbits        MY_WRITE_UINT8(*p_Dst8, val);
559296177Sjhibbits        p_Dst8++;
560296177Sjhibbits    }
561296177Sjhibbits
562296177Sjhibbits    return pDst;
563296177Sjhibbits}
564296177Sjhibbits
565296177Sjhibbitsvoid * MemSet64(void* pDst, uint8_t val, uint32_t size)
566296177Sjhibbits{
567296177Sjhibbits    uint64_t val64;
568296177Sjhibbits    uint64_t *pDst64;
569296177Sjhibbits    uint8_t  *p_Dst8;
570296177Sjhibbits
571296177Sjhibbits    p_Dst8 = (uint8_t*)(pDst);
572296177Sjhibbits
573296177Sjhibbits    /* generate four 8-bit val's in 32-bit container */
574296177Sjhibbits    val64  = (uint64_t) val;
575296177Sjhibbits    val64 |= (val64 <<  8);
576296177Sjhibbits    val64 |= (val64 << 16);
577296177Sjhibbits    val64 |= (val64 << 24);
578296177Sjhibbits    val64 |= (val64 << 32);
579296177Sjhibbits
580296177Sjhibbits    /* align destination to 64 */
581296177Sjhibbits    while((PTR_TO_UINT(p_Dst8) & 7) && size) /* (pDst mod 8) > 0 and size > 0 */
582296177Sjhibbits    {
583296177Sjhibbits        *p_Dst8++ = val;
584296177Sjhibbits        size--;
585296177Sjhibbits    }
586296177Sjhibbits
587296177Sjhibbits    /* 64-bit chunks */
588296177Sjhibbits    pDst64 = (uint64_t*)(p_Dst8);
589296177Sjhibbits    while (size >> 4) /* size >= 8 */
590296177Sjhibbits    {
591296177Sjhibbits        *pDst64++ = val64;
592296177Sjhibbits        size -= 8;
593296177Sjhibbits    }
594296177Sjhibbits
595296177Sjhibbits    /* complete the leftovers */
596296177Sjhibbits    p_Dst8 = (uint8_t*)(pDst64);
597296177Sjhibbits    while (size--)
598296177Sjhibbits        *p_Dst8++ = val;
599296177Sjhibbits
600296177Sjhibbits    return pDst;
601296177Sjhibbits}
602296177Sjhibbits
603296177Sjhibbitsvoid MemDisp(uint8_t *p, int size)
604296177Sjhibbits{
605296177Sjhibbits    uint32_t    space = (uint32_t)(PTR_TO_UINT(p) & 0x3);
606296177Sjhibbits    uint8_t     *p_Limit;
607296177Sjhibbits
608296177Sjhibbits    if (space)
609296177Sjhibbits    {
610296177Sjhibbits        p_Limit = (p - space + 4);
611296177Sjhibbits
612296177Sjhibbits        XX_Print("0x%08X: ", (p - space));
613296177Sjhibbits
614296177Sjhibbits        while (space--)
615296177Sjhibbits        {
616296177Sjhibbits            XX_Print("--");
617296177Sjhibbits        }
618296177Sjhibbits        while (size  && (p < p_Limit))
619296177Sjhibbits        {
620296177Sjhibbits            XX_Print("%02x", *(uint8_t*)p);
621296177Sjhibbits            size--;
622296177Sjhibbits            p++;
623296177Sjhibbits        }
624296177Sjhibbits
625296177Sjhibbits        XX_Print(" ");
626296177Sjhibbits        p_Limit += 12;
627296177Sjhibbits
628296177Sjhibbits        while ((size > 3) && (p < p_Limit))
629296177Sjhibbits        {
630296177Sjhibbits            XX_Print("%08x ", *(uint32_t*)p);
631296177Sjhibbits            size -= 4;
632296177Sjhibbits            p += 4;
633296177Sjhibbits        }
634296177Sjhibbits        XX_Print("\r\n");
635296177Sjhibbits    }
636296177Sjhibbits
637296177Sjhibbits    while (size > 15)
638296177Sjhibbits    {
639296177Sjhibbits        XX_Print("0x%08X: %08x %08x %08x %08x\r\n",
640296177Sjhibbits                 p, *(uint32_t *)p, *(uint32_t *)(p + 4),
641296177Sjhibbits                 *(uint32_t *)(p + 8), *(uint32_t *)(p + 12));
642296177Sjhibbits        size -= 16;
643296177Sjhibbits        p += 16;
644296177Sjhibbits    }
645296177Sjhibbits
646296177Sjhibbits    if (size)
647296177Sjhibbits    {
648296177Sjhibbits        XX_Print("0x%08X: ", p);
649296177Sjhibbits
650296177Sjhibbits        while (size > 3)
651296177Sjhibbits        {
652296177Sjhibbits            XX_Print("%08x ", *(uint32_t *)p);
653296177Sjhibbits            size -= 4;
654296177Sjhibbits            p += 4;
655296177Sjhibbits        }
656296177Sjhibbits        while (size)
657296177Sjhibbits        {
658296177Sjhibbits            XX_Print("%02x", *(uint8_t *)p);
659296177Sjhibbits            size--;
660296177Sjhibbits            p++;
661296177Sjhibbits        }
662296177Sjhibbits
663296177Sjhibbits        XX_Print("\r\n");
664296177Sjhibbits    }
665296177Sjhibbits}
666