1#include <string.h> 2#include <stdint.h> 3#include <endian.h> 4 5void *memcpy(void *restrict dest, const void *restrict src, size_t n) 6{ 7 unsigned char *d = dest; 8 const unsigned char *s = src; 9 10#ifdef __GNUC__ 11 12#if __BYTE_ORDER == __LITTLE_ENDIAN 13#define LS >> 14#define RS << 15#else 16#define LS << 17#define RS >> 18#endif 19 20 typedef uint32_t __attribute__((__may_alias__)) u32; 21 uint32_t w, x; 22 23 for (; (uintptr_t)s % 4 && n; n--) *d++ = *s++; 24 25 if ((uintptr_t)d % 4 == 0) { 26 for (; n>=16; s+=16, d+=16, n-=16) { 27 *(u32 *)(d+0) = *(u32 *)(s+0); 28 *(u32 *)(d+4) = *(u32 *)(s+4); 29 *(u32 *)(d+8) = *(u32 *)(s+8); 30 *(u32 *)(d+12) = *(u32 *)(s+12); 31 } 32 if (n&8) { 33 *(u32 *)(d+0) = *(u32 *)(s+0); 34 *(u32 *)(d+4) = *(u32 *)(s+4); 35 d += 8; s += 8; 36 } 37 if (n&4) { 38 *(u32 *)(d+0) = *(u32 *)(s+0); 39 d += 4; s += 4; 40 } 41 if (n&2) { 42 *d++ = *s++; *d++ = *s++; 43 } 44 if (n&1) { 45 *d = *s; 46 } 47 return dest; 48 } 49 50 if (n >= 32) switch ((uintptr_t)d % 4) { 51 case 1: 52 w = *(u32 *)s; 53 *d++ = *s++; 54 *d++ = *s++; 55 *d++ = *s++; 56 n -= 3; 57 for (; n>=17; s+=16, d+=16, n-=16) { 58 x = *(u32 *)(s+1); 59 *(u32 *)(d+0) = (w LS 24) | (x RS 8); 60 w = *(u32 *)(s+5); 61 *(u32 *)(d+4) = (x LS 24) | (w RS 8); 62 x = *(u32 *)(s+9); 63 *(u32 *)(d+8) = (w LS 24) | (x RS 8); 64 w = *(u32 *)(s+13); 65 *(u32 *)(d+12) = (x LS 24) | (w RS 8); 66 } 67 break; 68 case 2: 69 w = *(u32 *)s; 70 *d++ = *s++; 71 *d++ = *s++; 72 n -= 2; 73 for (; n>=18; s+=16, d+=16, n-=16) { 74 x = *(u32 *)(s+2); 75 *(u32 *)(d+0) = (w LS 16) | (x RS 16); 76 w = *(u32 *)(s+6); 77 *(u32 *)(d+4) = (x LS 16) | (w RS 16); 78 x = *(u32 *)(s+10); 79 *(u32 *)(d+8) = (w LS 16) | (x RS 16); 80 w = *(u32 *)(s+14); 81 *(u32 *)(d+12) = (x LS 16) | (w RS 16); 82 } 83 break; 84 case 3: 85 w = *(u32 *)s; 86 *d++ = *s++; 87 n -= 1; 88 for (; n>=19; s+=16, d+=16, n-=16) { 89 x = *(u32 *)(s+3); 90 *(u32 *)(d+0) = (w LS 8) | (x RS 24); 91 w = *(u32 *)(s+7); 92 *(u32 *)(d+4) = (x LS 8) | (w RS 24); 93 x = *(u32 *)(s+11); 94 *(u32 *)(d+8) = (w LS 8) | (x RS 24); 95 w = *(u32 *)(s+15); 96 *(u32 *)(d+12) = (x LS 8) | (w RS 24); 97 } 98 break; 99 } 100 if (n&16) { 101 *d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++; 102 *d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++; 103 *d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++; 104 *d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++; 105 } 106 if (n&8) { 107 *d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++; 108 *d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++; 109 } 110 if (n&4) { 111 *d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++; 112 } 113 if (n&2) { 114 *d++ = *s++; *d++ = *s++; 115 } 116 if (n&1) { 117 *d = *s; 118 } 119 return dest; 120#endif 121 122 for (; n; n--) *d++ = *s++; 123 return dest; 124} 125