1/*
2 *  cc_priv.h
3 *  corecrypto
4 *
5 *  Created by Michael Brouwer on 12/1/10.
6 *  Copyright 2010,2011 Apple Inc. All rights reserved.
7 *
8 */
9
10#ifndef _CORECRYPTO_CC_PRIV_H_
11#define _CORECRYPTO_CC_PRIV_H_
12
13#include <corecrypto/cc.h>
14#include <stdint.h>
15
16/* defines the following macros :
17
18 CC_MEMCPY  : optimized memcpy.
19 CC_MEMMOVE : optimized memmove.
20 CC_MEMSET  : optimized memset.
21 CC_BZERO   : optimized bzero.
22
23 CC_STORE32_BE : store 32 bit value in big endian in unaligned buffer.
24 CC_STORE32_LE : store 32 bit value in little endian in unaligned buffer.
25 CC_STORE64_BE : store 64 bit value in big endian in unaligned buffer.
26 CC_STORE64_LE : store 64 bit value in little endian in unaligned buffer.
27
28 CC_LOAD32_BE : load 32 bit value in big endian from unaligned buffer.
29 CC_LOAD32_LE : load 32 bit value in little endian from unaligned buffer.
30 CC_LOAD64_BE : load 64 bit value in big endian from unaligned buffer.
31 CC_LOAD64_LE : load 64 bit value in little endian from unaligned buffer.
32
33 CC_ROR  : Rotate Right 32 bits. Rotate count can be a variable.
34 CC_ROL  : Rotate Left 32 bits. Rotate count can be a variable.
35 CC_RORc : Rotate Right 32 bits. Rotate count must be a constant.
36 CC_ROLc : Rotate Left 32 bits. Rotate count must be a constant.
37
38 CC_ROR64  : Rotate Right 64 bits. Rotate count can be a variable.
39 CC_ROL64  : Rotate Left 64 bits. Rotate count can be a variable.
40 CC_ROR64c : Rotate Right 64 bits. Rotate count must be a constant.
41 CC_ROL64c : Rotate Left 64 bits. Rotate count must be a constant.
42
43 CC_BSWAP  : byte swap a 32 bits variable.
44
45 CC_H2BE32 : convert a 32 bits value between host and big endian order.
46 CC_H2LE32 : convert a 32 bits value between host and little endian order.
47
48The following are not defined yet... define them if needed.
49
50 CC_BSWAPc   : byte swap a 32 bits constant
51
52 CC_BSWAP64  : byte swap a 64 bits variable
53 CC_BSWAP64c : byte swap a 64 bits constant
54
55 CC_READ_LE32 : read a 32 bits little endian value
56 CC_READ_LE64 : read a 64 bits little endian value
57 CC_READ_BE32 : read a 32 bits big endian value
58 CC_READ_BE64 : read a 64 bits big endian value
59
60 CC_WRITE_LE32 : write a 32 bits little endian value
61 CC_WRITE_LE64 : write a 64 bits little endian value
62 CC_WRITE_BE32 : write a 32 bits big endian value
63 CC_WRITE_BE64 : write a 64 bits big endian value
64
65 CC_H2BE64 : convert a 64 bits value between host and big endian order
66 CC_H2LE64 : convert a 64 bits value between host and little endian order
67
68*/
69
70/* TODO: optimized versions */
71#define CC_MEMCPY(D,S,L) memcpy((D),(S),(L))
72#define CC_MEMMOVE(D,S,L) memmove((D),(S),(L))
73#define CC_MEMSET(D,V,L) memset((D),(V),(L))
74#define CC_BZERO(D,L) memset((D),0,(L))
75
76
77// MARK: - Loads and Store
78
79// MARK: -- 32 bits - little endian
80
81// MARK: --- Default version
82
83#define	CC_STORE32_LE(x, y) do {                                    \
84    ((unsigned char *)(y))[3] = (unsigned char)(((x)>>24)&255);		\
85    ((unsigned char *)(y))[2] = (unsigned char)(((x)>>16)&255);		\
86    ((unsigned char *)(y))[1] = (unsigned char)(((x)>>8)&255);		\
87    ((unsigned char *)(y))[0] = (unsigned char)((x)&255);			\
88} while(0)
89
90#define	CC_LOAD32_LE(x, y) do {                                     \
91x = ((uint32_t)(((unsigned char *)(y))[3] & 255)<<24) |			    \
92    ((uint32_t)(((unsigned char *)(y))[2] & 255)<<16) |			    \
93    ((uint32_t)(((unsigned char *)(y))[1] & 255)<<8)  |			    \
94    ((uint32_t)(((unsigned char *)(y))[0] & 255));				    \
95} while(0)
96
97// MARK: -- 64 bits - little endian
98
99#define	CC_STORE64_LE(x, y) do {                                    \
100    ((unsigned char *)(y))[7] = (unsigned char)(((x)>>56)&255);     \
101    ((unsigned char *)(y))[6] = (unsigned char)(((x)>>48)&255);		\
102    ((unsigned char *)(y))[5] = (unsigned char)(((x)>>40)&255);		\
103    ((unsigned char *)(y))[4] = (unsigned char)(((x)>>32)&255);		\
104    ((unsigned char *)(y))[3] = (unsigned char)(((x)>>24)&255);		\
105    ((unsigned char *)(y))[2] = (unsigned char)(((x)>>16)&255);		\
106    ((unsigned char *)(y))[1] = (unsigned char)(((x)>>8)&255);		\
107    ((unsigned char *)(y))[0] = (unsigned char)((x)&255);			\
108} while(0)
109
110#define	CC_LOAD64_LE(x, y) do {                                     \
111x = (((uint64_t)(((unsigned char *)(y))[7] & 255))<<56) |           \
112    (((uint64_t)(((unsigned char *)(y))[6] & 255))<<48) |           \
113    (((uint64_t)(((unsigned char *)(y))[5] & 255))<<40) |           \
114    (((uint64_t)(((unsigned char *)(y))[4] & 255))<<32) |           \
115    (((uint64_t)(((unsigned char *)(y))[3] & 255))<<24) |           \
116    (((uint64_t)(((unsigned char *)(y))[2] & 255))<<16) |           \
117    (((uint64_t)(((unsigned char *)(y))[1] & 255))<<8)  |           \
118    (((uint64_t)(((unsigned char *)(y))[0] & 255)));                \
119} while(0)
120
121// MARK: -- 32 bits - big endian
122// MARK: --- intel version
123
124#if (defined(__i386__) || defined(__x86_64__))
125
126#define CC_STORE32_BE(x, y)     \
127    __asm__ __volatile__ (      \
128    "bswapl %0     \n\t"        \
129    "movl   %0,(%1)\n\t"        \
130    "bswapl %0     \n\t"        \
131    ::"r"(x), "r"(y))
132
133#define CC_LOAD32_BE(x, y)      \
134    __asm__ __volatile__ (      \
135    "movl (%1),%0\n\t"          \
136    "bswapl %0\n\t"             \
137    :"=r"(x): "r"(y))
138
139#else
140// MARK: --- default version
141#define	CC_STORE32_BE(x, y) do {                                \
142    ((unsigned char *)(y))[0] = (unsigned char)(((x)>>24)&255);	\
143    ((unsigned char *)(y))[1] = (unsigned char)(((x)>>16)&255);	\
144    ((unsigned char *)(y))[2] = (unsigned char)(((x)>>8)&255);	\
145    ((unsigned char *)(y))[3] = (unsigned char)((x)&255);       \
146} while(0)
147
148#define	CC_LOAD32_BE(x, y) do {                             \
149x = ((uint32_t)(((unsigned char *)(y))[0] & 255)<<24) |	    \
150    ((uint32_t)(((unsigned char *)(y))[1] & 255)<<16) |		\
151    ((uint32_t)(((unsigned char *)(y))[2] & 255)<<8)  |		\
152    ((uint32_t)(((unsigned char *)(y))[3] & 255));          \
153} while(0)
154
155#endif
156
157// MARK: -- 64 bits - big endian
158
159// MARK: --- intel 64 bits version
160
161#if defined(__x86_64__)
162
163#define	CC_STORE64_BE(x, y)   \
164__asm__ __volatile__ (        \
165"bswapq %0     \n\t"          \
166"movq   %0,(%1)\n\t"          \
167"bswapq %0     \n\t"          \
168::"r"(x), "r"(y))
169
170#define	CC_LOAD64_BE(x, y)    \
171__asm__ __volatile__ (        \
172"movq (%1),%0\n\t"            \
173"bswapq %0\n\t"               \
174:"=r"(x): "r"(y))
175
176#else
177
178// MARK: --- default version
179
180#define CC_STORE64_BE(x, y) do {                                    \
181    ((unsigned char *)(y))[0] = (unsigned char)(((x)>>56)&255);		\
182    ((unsigned char *)(y))[1] = (unsigned char)(((x)>>48)&255);		\
183    ((unsigned char *)(y))[2] = (unsigned char)(((x)>>40)&255);		\
184    ((unsigned char *)(y))[3] = (unsigned char)(((x)>>32)&255);		\
185    ((unsigned char *)(y))[4] = (unsigned char)(((x)>>24)&255);		\
186    ((unsigned char *)(y))[5] = (unsigned char)(((x)>>16)&255);		\
187    ((unsigned char *)(y))[6] = (unsigned char)(((x)>>8)&255);		\
188    ((unsigned char *)(y))[7] = (unsigned char)((x)&255);			\
189} while(0)
190
191#define	CC_LOAD64_BE(x, y) do {                                     \
192x = (((uint64_t)(((unsigned char *)(y))[0] & 255))<<56) |           \
193    (((uint64_t)(((unsigned char *)(y))[1] & 255))<<48) |           \
194    (((uint64_t)(((unsigned char *)(y))[2] & 255))<<40) |           \
195    (((uint64_t)(((unsigned char *)(y))[3] & 255))<<32) |           \
196    (((uint64_t)(((unsigned char *)(y))[4] & 255))<<24) |           \
197    (((uint64_t)(((unsigned char *)(y))[5] & 255))<<16) |           \
198    (((uint64_t)(((unsigned char *)(y))[6] & 255))<<8)  |          	\
199    (((uint64_t)(((unsigned char *)(y))[7] & 255)));	            \
200} while(0)
201
202#endif
203
204// MARK: - 32-bit Rotates
205
206#if defined(_MSC_VER)
207// MARK: -- MSVC version
208
209#include <stdlib.h>
210#pragma intrinsic(_lrotr,_lrotl)
211#define	CC_ROR(x,n) _lrotr(x,n)
212#define	CC_ROL(x,n) _lrotl(x,n)
213#define	CC_RORc(x,n) _lrotr(x,n)
214#define	CC_ROLc(x,n) _lrotl(x,n)
215
216#elif (defined(__i386__) || defined(__x86_64__))
217// MARK: -- intel asm version
218
219static inline uint32_t CC_ROL(uint32_t word, int i)
220{
221    __asm__ ("roll %%cl,%0"
222         :"=r" (word)
223         :"0" (word),"c" (i));
224    return word;
225}
226
227static inline uint32_t CC_ROR(uint32_t word, int i)
228{
229    __asm__ ("rorl %%cl,%0"
230         :"=r" (word)
231         :"0" (word),"c" (i));
232    return word;
233}
234
235/* Need to be a macro here, because 'i' is an immediate (constant) */
236#define CC_ROLc(word, i)                \
237({  uint32_t _word=(word);              \
238    __asm__ __volatile__ ("roll %2,%0"  \
239        :"=r" (_word)                   \
240        :"0" (_word),"I" (i));          \
241    _word;                              \
242})
243
244
245#define CC_RORc(word, i)                \
246({  uint32_t _word=(word);              \
247    __asm__ __volatile__ ("rorl %2,%0"  \
248        :"=r" (_word)                   \
249        :"0" (_word),"I" (i));          \
250    _word;                              \
251})
252
253#else
254
255// MARK: -- default version
256
257static inline uint32_t CC_ROL(uint32_t word, int i)
258{
259    return ( (word<<(i&31)) | (word>>(32-(i&31))) );
260}
261
262static inline uint32_t CC_ROR(uint32_t word, int i)
263{
264    return ( (word>>(i&31)) | (word<<(32-(i&31))) );
265}
266
267#define	CC_ROLc(x, y) CC_ROL(x, y)
268#define	CC_RORc(x, y) CC_ROR(x, y)
269
270#endif
271
272// MARK: - 64 bits rotates
273
274#if defined(__x86_64__)
275// MARK: -- intel 64 asm version
276
277static inline uint64_t CC_ROL64(uint64_t word, int i)
278{
279    __asm__("rolq %%cl,%0"
280        :"=r" (word)
281        :"0" (word),"c" (i));
282    return word;
283}
284
285static inline uint64_t CC_ROR64(uint64_t word, int i)
286{
287    __asm__("rorq %%cl,%0"
288        :"=r" (word)
289        :"0" (word),"c" (i));
290    return word;
291}
292
293/* Need to be a macro here, because 'i' is an immediate (constant) */
294#define CC_ROL64c(word, i)      \
295({                              \
296    uint64_t _word=(word);      \
297    __asm__("rolq %2,%0"        \
298        :"=r" (_word)           \
299        :"0" (_word),"J" (i));  \
300    _word;                      \
301})
302
303#define CC_ROR64c(word, i)      \
304({                              \
305    uint64_t _word=(word);      \
306    __asm__("rorq %2,%0"        \
307        :"=r" (_word)           \
308        :"0" (_word),"J" (i));  \
309    _word;                      \
310})
311
312
313#else /* Not x86_64  */
314
315// MARK: -- default C version
316
317static inline uint64_t CC_ROL64(uint64_t word, int i)
318{
319    return ( (word<<(i&63)) | (word>>(64-(i&63))) );
320}
321
322static inline uint64_t CC_ROR64(uint64_t word, int i)
323{
324    return ( (word>>(i&63)) | (word<<(64-(i&63))) );
325}
326
327#define	CC_ROL64c(x, y) CC_ROL64(x, y)
328#define	CC_ROR64c(x, y) CC_ROR64(x, y)
329
330#endif
331
332
333// MARK: - Byte Swaps
334
335static inline uint32_t CC_BSWAP(uint32_t x)
336{
337    return (
338        ((x>>24)&0x000000FF) |
339        ((x<<24)&0xFF000000) |
340        ((x>>8) &0x0000FF00) |
341        ((x<<8) &0x00FF0000)
342    );
343}
344
345#define CC_BSWAP64(x) \
346((uint64_t)((((uint64_t)(x) & 0xff00000000000000ULL) >> 56) | \
347(((uint64_t)(x) & 0x00ff000000000000ULL) >> 40) | \
348(((uint64_t)(x) & 0x0000ff0000000000ULL) >> 24) | \
349(((uint64_t)(x) & 0x000000ff00000000ULL) >>  8) | \
350(((uint64_t)(x) & 0x00000000ff000000ULL) <<  8) | \
351(((uint64_t)(x) & 0x0000000000ff0000ULL) << 24) | \
352(((uint64_t)(x) & 0x000000000000ff00ULL) << 40) | \
353(((uint64_t)(x) & 0x00000000000000ffULL) << 56)))
354
355#ifdef __LITTLE_ENDIAN__
356#define CC_H2BE32(x) CC_BSWAP(x)
357#define CC_H2LE32(x) (x)
358#else
359#define CC_H2BE32(x) (x)
360#define CC_H2LE32(x) CC_BSWAP(x)
361#endif
362
363
364/* extract a byte portably */
365#ifdef _MSC_VER
366#define cc_byte(x, n) ((unsigned char)((x) >> (8 * (n))))
367#else
368#define cc_byte(x, n) (((x) >> (8 * (n))) & 255)
369#endif
370
371/* HEAVISIDE_STEP (shifted by one)
372   function f(x): x->0, when x=0
373                  x->1, when x>0
374   Can also be seen as a bitwise operation:
375      f(x): x -> y
376        y[0]=(OR x[i]) for all i (all bits)
377        y[i]=0 for all i>0
378   Run in constant time (log2(<bitsize of x>))
379   Useful to run constant time checks
380*/
381#define HEAVISIDE_STEP_UINT64(x) {unsigned long t; \
382    t=(((uint64_t)x>>32) | (unsigned long)x); \
383    t=((t>>16) | t); \
384    t=((t>>8) | t); \
385    t=((t>>4) | t); \
386    t=((t>>2) | t); \
387    t=((t>>1) | t); \
388    x=t & 0x1;}
389
390#define HEAVISIDE_STEP_UINT32(x) {uint16_t t; \
391    t=(((unsigned long)x>>16) | (uint16_t)x); \
392    t=((t>>8) | t); \
393    t=((t>>4) | t); \
394    t=((t>>2) | t); \
395    t=((t>>1) | t); \
396    x=t & 0x1;}
397
398#define HEAVISIDE_STEP_UINT16(x) {uint8_t t; \
399    t=(((uint16_t)x>>8) | (uint8_t)x); \
400    t=((t>>4) | t); \
401    t=((t>>2) | t); \
402    t=((t>>1) | t); \
403    x=t & 0x1;}
404
405#define HEAVISIDE_STEP_UINT8(x) {uint8_t t; \
406    t=(((uint8_t)x>>4) | (uint8_t)x); \
407    t=((t>>2) | t); \
408    t=((t>>1) | t); \
409    x=t & 0x1;}
410
411#define CC_HEAVISIDE_STEP(x) { \
412    if (sizeof(x) == 1) {HEAVISIDE_STEP_UINT8(x);}  \
413    else if (sizeof(x) == 2) {HEAVISIDE_STEP_UINT16(x);} \
414    else if (sizeof(x) == 4) {HEAVISIDE_STEP_UINT32(x);} \
415    else if (sizeof(x) == 8) {HEAVISIDE_STEP_UINT64(x);} \
416    else {x=((x==0)?0:1);} \
417    }
418
419
420/* Set a variable to the biggest power of 2 which can be represented */
421#define MAX_POWER_OF_2(x)   ((__typeof__(x))1<<(8*sizeof(x)-1))
422
423
424#endif /* _CORECRYPTO_CC_PRIV_H_ */
425