1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22/* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27#ifndef _MD5_BYTESWAP_H 28#define _MD5_BYTESWAP_H 29 30/* 31 * definitions for inline functions for little-endian loads. 32 * 33 * This file has special definitions for UltraSPARC architectures, 34 * which have a special address space identifier for loading 32 and 16 bit 35 * integers in little-endian byte order. 36 * 37 * This file and common/crypto/md5/sparc/sun4[uv]/byteswap.il implement the 38 * same thing and must be changed together. 39 */ 40 41#include <sys/types.h> 42#if defined(__sparc) 43#include <v9/sys/asi.h> 44#elif defined(_LITTLE_ENDIAN) 45#include <sys/byteorder.h> 46#endif 47 48#ifdef __cplusplus 49extern "C" { 50#endif 51 52#if defined(_LITTLE_ENDIAN) 53 54/* 55 * Little-endian optimization: I don't need to do any weirdness. On 56 * some little-endian boxen, I'll have to do alignment checks, but I can do 57 * that below. 58 */ 59 60#if !defined(__i386) && !defined(__amd64) 61/* 62 * i386 and amd64 don't require aligned 4-byte loads. The symbol 63 * _MD5_CHECK_ALIGNMENT indicates below whether the MD5Transform function 64 * requires alignment checking. 65 */ 66#define _MD5_CHECK_ALIGNMENT 67#endif /* !__i386 && !__amd64 */ 68 69#define LOAD_LITTLE_32(addr) (*(uint32_t *)(void *)(addr)) 70 71#else /* !_LITTLE_ENDIAN */ 72 73/* 74 * sparc v9/v8plus optimization: 75 * 76 * on the sparc v9/v8plus, we can load data little endian. however, since 77 * the compiler doesn't have direct support for little endian, we 78 * link to an assembly-language routine `load_little_32' to do 79 * the magic. note that special care must be taken to ensure the 80 * address is 32-bit aligned -- in the interest of speed, we don't 81 * check to make sure, since careful programming can guarantee this 82 * for us. 83 */ 84#if defined(sun4u) 85 86/* Define alignment check because we can 4-byte load as little endian. */ 87#define _MD5_CHECK_ALIGNMENT 88#define LOAD_LITTLE_32(addr) load_little_32((uint32_t *)(void *)(addr)) 89 90#if !defined(__lint) && defined(__GNUC__) 91 92static __inline__ uint32_t 93load_little_32(uint32_t *addr) 94{ 95 uint32_t value; 96 97 __asm__( 98 "lduwa [%1] %2, %0\n\t" 99 : "=r" (value) 100 : "r" (addr), "i" (ASI_PL)); 101 102 return (value); 103} 104#endif /* !__lint && __GNUC__ */ 105 106#if !defined(__GNUC__) 107extern uint32_t load_little_32(uint32_t *); 108#endif /* !__GNUC__ */ 109 110/* Placate lint */ 111#if defined(__lint) 112uint32_t 113load_little_32(uint32_t *addr) 114{ 115 return (*addr); 116} 117#endif /* __lint */ 118 119#elif defined(_LITTLE_ENDIAN) 120#define LOAD_LITTLE_32(addr) htonl(addr) 121 122#else 123/* big endian -- will work on little endian, but slowly */ 124/* Since we do byte operations, we don't have to check for alignment. */ 125#define LOAD_LITTLE_32(addr) \ 126 ((addr)[0] | ((addr)[1] << 8) | ((addr)[2] << 16) | ((addr)[3] << 24)) 127#endif /* sun4u */ 128 129#if defined(sun4v) 130 131/* 132 * For N1 want to minimize number of arithmetic operations. This is best 133 * achieved by using the %asi register to specify ASI for the lduwa operations. 134 * Also, have a separate inline template for each word, so can utilize the 135 * immediate offset in lduwa, without relying on the compiler to do the right 136 * thing. 137 * 138 * Moving to 64-bit loads might also be beneficial. 139 */ 140#define LOAD_LITTLE_32_0(addr) load_little_32_0((uint32_t *)(addr)) 141#define LOAD_LITTLE_32_1(addr) load_little_32_1((uint32_t *)(addr)) 142#define LOAD_LITTLE_32_2(addr) load_little_32_2((uint32_t *)(addr)) 143#define LOAD_LITTLE_32_3(addr) load_little_32_3((uint32_t *)(addr)) 144#define LOAD_LITTLE_32_4(addr) load_little_32_4((uint32_t *)(addr)) 145#define LOAD_LITTLE_32_5(addr) load_little_32_5((uint32_t *)(addr)) 146#define LOAD_LITTLE_32_6(addr) load_little_32_6((uint32_t *)(addr)) 147#define LOAD_LITTLE_32_7(addr) load_little_32_7((uint32_t *)(addr)) 148#define LOAD_LITTLE_32_8(addr) load_little_32_8((uint32_t *)(addr)) 149#define LOAD_LITTLE_32_9(addr) load_little_32_9((uint32_t *)(addr)) 150#define LOAD_LITTLE_32_a(addr) load_little_32_a((uint32_t *)(addr)) 151#define LOAD_LITTLE_32_b(addr) load_little_32_b((uint32_t *)(addr)) 152#define LOAD_LITTLE_32_c(addr) load_little_32_c((uint32_t *)(addr)) 153#define LOAD_LITTLE_32_d(addr) load_little_32_d((uint32_t *)(addr)) 154#define LOAD_LITTLE_32_e(addr) load_little_32_e((uint32_t *)(addr)) 155#define LOAD_LITTLE_32_f(addr) load_little_32_f((uint32_t *)(addr)) 156 157#if !defined(__lint) && defined(__GNUC__) 158 159/* 160 * This actually sets the ASI register, not necessarily to ASI_PL. 161 */ 162static __inline__ void 163set_little(uint8_t asi) 164{ 165 __asm__ __volatile__( 166 "wr %%g0, %0, %%asi\n\t" 167 : /* Nothing */ 168 : "r" (asi)); 169} 170 171static __inline__ uint8_t 172get_little(void) 173{ 174 uint8_t asi; 175 176 __asm__ __volatile__( 177 "rd %%asi, %0\n\t" 178 : "=r" (asi)); 179 180 return (asi); 181} 182 183/* 184 * We have 16 functions which differ only in the offset from which they 185 * load. Use this preprocessor template to simplify maintenance. Its 186 * argument is the offset in hex, without the 0x. 187 */ 188#define LL_TEMPLATE(__off) \ 189static __inline__ uint32_t \ 190load_little_32_##__off(uint32_t *addr) \ 191{ \ 192 uint32_t value; \ 193 __asm__( \ 194 "lduwa [%1 + %2]%%asi, %0\n\t" \ 195 : "=r" (value) \ 196 : "r" (addr), "i" ((0x##__off) << 2)); \ 197 return (value); \ 198} 199 200LL_TEMPLATE(0) 201LL_TEMPLATE(1) 202LL_TEMPLATE(2) 203LL_TEMPLATE(3) 204LL_TEMPLATE(4) 205LL_TEMPLATE(5) 206LL_TEMPLATE(6) 207LL_TEMPLATE(7) 208LL_TEMPLATE(8) 209LL_TEMPLATE(9) 210LL_TEMPLATE(a) 211LL_TEMPLATE(b) 212LL_TEMPLATE(c) 213LL_TEMPLATE(d) 214LL_TEMPLATE(e) 215LL_TEMPLATE(f) 216#undef LL_TEMPLATE 217 218#endif /* !__lint && __GNUC__ */ 219 220#if !defined(__GNUC__) 221/* 222 * Using the %asi register to achieve little endian loads - register 223 * is set using a inline template. 224 * 225 * Saves a few arithmetic ops as can now use an immediate offset with the 226 * lduwa instructions. 227 */ 228extern void set_little(uint32_t); 229extern uint32_t get_little(void); 230 231extern uint32_t load_little_32_0(uint32_t *); 232extern uint32_t load_little_32_1(uint32_t *); 233extern uint32_t load_little_32_2(uint32_t *); 234extern uint32_t load_little_32_3(uint32_t *); 235extern uint32_t load_little_32_4(uint32_t *); 236extern uint32_t load_little_32_5(uint32_t *); 237extern uint32_t load_little_32_6(uint32_t *); 238extern uint32_t load_little_32_7(uint32_t *); 239extern uint32_t load_little_32_8(uint32_t *); 240extern uint32_t load_little_32_9(uint32_t *); 241extern uint32_t load_little_32_a(uint32_t *); 242extern uint32_t load_little_32_b(uint32_t *); 243extern uint32_t load_little_32_c(uint32_t *); 244extern uint32_t load_little_32_d(uint32_t *); 245extern uint32_t load_little_32_e(uint32_t *); 246extern uint32_t load_little_32_f(uint32_t *); 247#endif /* !__GNUC__ */ 248#endif /* sun4v */ 249 250#endif /* _LITTLE_ENDIAN */ 251 252#ifdef __cplusplus 253} 254#endif 255 256#endif /* !_MD5_BYTESWAP_H */ 257