1/* PAD32byte is used to compute a cacheline padded length of n (input) bytes */
2#define  PAD32byte(n) ((n)%32==0 ? (n) : (n) + 32 - (n)%32)
3/* PAD32dbl is used to compute a cacheline padded length of n (input) doubles */
4#define  PAD32dbl(n)  ((n)%(32/sizeof(double))==0 ? (n) : (n) + (32/sizeof(double)) \
5                      - (n)%(32/sizeof(double)))
6
7#define max(x,y)      ((x)>(y)? (x) : (y))
8#define min(x,y)      ((x)<(y)? (x) : (y))
9