1#ifndef _M68K_STRING_H_ 2#define _M68K_STRING_H_ 3 4#include <asm/setup.h> 5#include <asm/page.h> 6 7#define __HAVE_ARCH_STRCPY 8static inline char * strcpy(char * dest,const char *src) 9{ 10 char *xdest = dest; 11 12 __asm__ __volatile__ 13 ("1:\tmoveb %1@+,%0@+\n\t" 14 "jne 1b" 15 : "=a" (dest), "=a" (src) 16 : "0" (dest), "1" (src) : "memory"); 17 return xdest; 18} 19 20#define __HAVE_ARCH_STRNCPY 21static inline char * strncpy(char *dest, const char *src, size_t n) 22{ 23 char *xdest = dest; 24 25 if (n == 0) 26 return xdest; 27 28 __asm__ __volatile__ 29 ("1:\tmoveb %1@+,%0@+\n\t" 30 "jeq 2f\n\t" 31 "subql #1,%2\n\t" 32 "jne 1b\n\t" 33 "2:" 34 : "=a" (dest), "=a" (src), "=d" (n) 35 : "0" (dest), "1" (src), "2" (n) 36 : "memory"); 37 return xdest; 38} 39 40#define __HAVE_ARCH_STRCAT 41static inline char * strcat(char * dest, const char * src) 42{ 43 char *tmp = dest; 44 45 while (*dest) 46 dest++; 47 while ((*dest++ = *src++)) 48 ; 49 50 return tmp; 51} 52 53#define __HAVE_ARCH_STRNCAT 54static inline char * strncat(char *dest, const char *src, size_t count) 55{ 56 char *tmp = dest; 57 58 if (count) { 59 while (*dest) 60 dest++; 61 while ((*dest++ = *src++)) { 62 if (--count == 0) { 63 *dest++='\0'; 64 break; 65 } 66 } 67 } 68 69 return tmp; 70} 71 72#define __HAVE_ARCH_STRCHR 73static inline char * strchr(const char * s, int c) 74{ 75 const char ch = c; 76 77 for(; *s != ch; ++s) 78 if (*s == '\0') 79 return( NULL ); 80 return( (char *) s); 81} 82 83 84 85 86/* strstr !! */ 87 88#define __HAVE_ARCH_STRLEN 89static inline size_t strlen(const char * s) 90{ 91 const char *sc; 92 for (sc = s; *sc != '\0'; ++sc) ; 93 return(sc - s); 94} 95 96/* strnlen !! */ 97 98#define __HAVE_ARCH_STRCMP 99static inline int strcmp(const char * cs,const char * ct) 100{ 101 char __res; 102 103 __asm__ 104 ("1:\tmoveb %0@+,%2\n\t" /* get *cs */ 105 "cmpb %1@+,%2\n\t" /* compare a byte */ 106 "jne 2f\n\t" /* not equal, break out */ 107 "tstb %2\n\t" /* at end of cs? */ 108 "jne 1b\n\t" /* no, keep going */ 109 "jra 3f\n\t" /* strings are equal */ 110 "2:\tsubb %1@-,%2\n\t" /* *cs - *ct */ 111 "3:" 112 : "=a" (cs), "=a" (ct), "=d" (__res) 113 : "0" (cs), "1" (ct)); 114 return __res; 115} 116 117#define __HAVE_ARCH_STRNCMP 118static inline int strncmp(const char * cs,const char * ct,size_t count) 119{ 120 char __res; 121 122 if (!count) 123 return 0; 124 __asm__ 125 ("1:\tmovb %0@+,%3\n\t" /* get *cs */ 126 "cmpb %1@+,%3\n\t" /* compare a byte */ 127 "jne 3f\n\t" /* not equal, break out */ 128 "tstb %3\n\t" /* at end of cs? */ 129 "jeq 4f\n\t" /* yes, all done */ 130 "subql #1,%2\n\t" /* no, adjust count */ 131 "jne 1b\n\t" /* more to do, keep going */ 132 "2:\tmoveq #0,%3\n\t" /* strings are equal */ 133 "jra 4f\n\t" 134 "3:\tsubb %1@-,%3\n\t" /* *cs - *ct */ 135 "4:" 136 : "=a" (cs), "=a" (ct), "=d" (count), "=d" (__res) 137 : "0" (cs), "1" (ct), "2" (count)); 138 return __res; 139} 140 141#define __HAVE_ARCH_MEMSET 142/* 143 * This is really ugly, but its highly optimizatiable by the 144 * compiler and is meant as compensation for gcc's missing 145 * __builtin_memset(). For the 680[23]0 it might be worth considering 146 * the optimal number of misaligned writes compared to the number of 147 * tests'n'branches needed to align the destination address. The 148 * 680[46]0 doesn't really care due to their copy-back caches. 149 * 10/09/96 - Jes Sorensen 150 */ 151static inline void * __memset_g(void * s, int c, size_t count) 152{ 153 void *xs = s; 154 size_t temp; 155 156 if (!count) 157 return xs; 158 159 c &= 0xff; 160 c |= c << 8; 161 c |= c << 16; 162 163 if (count < 36){ 164 long *ls = s; 165 166 switch(count){ 167 case 32: case 33: case 34: case 35: 168 *ls++ = c; 169 case 28: case 29: case 30: case 31: 170 *ls++ = c; 171 case 24: case 25: case 26: case 27: 172 *ls++ = c; 173 case 20: case 21: case 22: case 23: 174 *ls++ = c; 175 case 16: case 17: case 18: case 19: 176 *ls++ = c; 177 case 12: case 13: case 14: case 15: 178 *ls++ = c; 179 case 8: case 9: case 10: case 11: 180 *ls++ = c; 181 case 4: case 5: case 6: case 7: 182 *ls++ = c; 183 break; 184 default: 185 break; 186 } 187 s = ls; 188 if (count & 0x02){ 189 short *ss = s; 190 *ss++ = c; 191 s = ss; 192 } 193 if (count & 0x01){ 194 char *cs = s; 195 *cs++ = c; 196 s = cs; 197 } 198 return xs; 199 } 200 201 if ((long) s & 1) 202 { 203 char *cs = s; 204 *cs++ = c; 205 s = cs; 206 count--; 207 } 208 if (count > 2 && (long) s & 2) 209 { 210 short *ss = s; 211 *ss++ = c; 212 s = ss; 213 count -= 2; 214 } 215 temp = count >> 2; 216 if (temp) 217 { 218 long *ls = s; 219 temp--; 220 do 221 *ls++ = c; 222 while (temp--); 223 s = ls; 224 } 225 if (count & 2) 226 { 227 short *ss = s; 228 *ss++ = c; 229 s = ss; 230 } 231 if (count & 1) 232 { 233 char *cs = s; 234 *cs = c; 235 } 236 return xs; 237} 238 239/* 240 * __memset_page assumes that data is longword aligned. Most, if not 241 * all, of these page sized memsets are performed on page aligned 242 * areas, thus we do not need to check if the destination is longword 243 * aligned. Of course we suffer a serious performance loss if this is 244 * not the case but I think the risk of this ever happening is 245 * extremely small. We spend a lot of time clearing pages in 246 * get_empty_page() so I think it is worth it anyway. Besides, the 247 * 680[46]0 do not really care about misaligned writes due to their 248 * copy-back cache. 249 * 250 * The optimized case for the 680[46]0 is implemented using the move16 251 * instruction. My tests showed that this implementation is 35-45% 252 * faster than the original implementation using movel, the only 253 * caveat is that the destination address must be 16-byte aligned. 254 * 01/09/96 - Jes Sorensen 255 */ 256static inline void * __memset_page(void * s,int c,size_t count) 257{ 258 unsigned long data, tmp; 259 void *xs, *sp; 260 261 xs = sp = s; 262 263 c = c & 255; 264 data = c | (c << 8); 265 data |= data << 16; 266 267#ifdef CPU_M68040_OR_M68060_ONLY 268 269 if (((unsigned long) s) & 0x0f) 270 __memset_g(s, c, count); 271 else{ 272 *((unsigned long *)(s))++ = data; 273 *((unsigned long *)(s))++ = data; 274 *((unsigned long *)(s))++ = data; 275 *((unsigned long *)(s))++ = data; 276 277 __asm__ __volatile__("1:\t" 278 ".chip 68040\n\t" 279 "move16 %2@+,%0@+\n\t" 280 ".chip 68k\n\t" 281 "subqw #8,%2\n\t" 282 "subqw #8,%2\n\t" 283 "dbra %1,1b\n\t" 284 : "=a" (s), "=d" (tmp) 285 : "a" (sp), "0" (s), "1" ((count - 16) / 16 - 1) 286 ); 287 } 288 289#else 290 __asm__ __volatile__("1:\t" 291 "movel %2,%0@+\n\t" 292 "movel %2,%0@+\n\t" 293 "movel %2,%0@+\n\t" 294 "movel %2,%0@+\n\t" 295 "movel %2,%0@+\n\t" 296 "movel %2,%0@+\n\t" 297 "movel %2,%0@+\n\t" 298 "movel %2,%0@+\n\t" 299 "dbra %1,1b\n\t" 300 : "=a" (s), "=d" (tmp) 301 : "d" (data), "0" (s), "1" (count / 32 - 1) 302 ); 303#endif 304 305 return xs; 306} 307 308extern void *memset(void *,int,__kernel_size_t); 309 310#define __memset_const(s,c,count) \ 311((count==PAGE_SIZE) ? \ 312 __memset_page((s),(c),(count)) : \ 313 __memset_g((s),(c),(count))) 314 315#define memset(s, c, count) \ 316(__builtin_constant_p(count) ? \ 317 __memset_const((s),(c),(count)) : \ 318 __memset_g((s),(c),(count))) 319 320#define __HAVE_ARCH_MEMCPY 321extern void * memcpy(void *, const void *, size_t ); 322/* 323 * __builtin_memcpy() does not handle page-sized memcpys very well, 324 * thus following the same assumptions as for page-sized memsets, this 325 * function copies page-sized areas using an unrolled loop, without 326 * considering alignment. 327 * 328 * For the 680[46]0 only kernels we use the move16 instruction instead 329 * as it writes through the data-cache, invalidating the cache-lines 330 * touched. In this way we do not use up the entire data-cache (well, 331 * half of it on the 68060) by copying a page. An unrolled loop of two 332 * move16 instructions seem to the fastest. The only caveat is that 333 * both source and destination must be 16-byte aligned, if not we fall 334 * back to the generic memcpy function. - Jes 335 */ 336static inline void * __memcpy_page(void * to, const void * from, size_t count) 337{ 338 unsigned long tmp; 339 void *xto = to; 340 341#ifdef CPU_M68040_OR_M68060_ONLY 342 343 if (((unsigned long) to | (unsigned long) from) & 0x0f) 344 return memcpy(to, from, count); 345 346 __asm__ __volatile__("1:\t" 347 ".chip 68040\n\t" 348 "move16 %1@+,%0@+\n\t" 349 "move16 %1@+,%0@+\n\t" 350 ".chip 68k\n\t" 351 "dbra %2,1b\n\t" 352 : "=a" (to), "=a" (from), "=d" (tmp) 353 : "0" (to), "1" (from) , "2" (count / 32 - 1) 354 ); 355#else 356 __asm__ __volatile__("1:\t" 357 "movel %1@+,%0@+\n\t" 358 "movel %1@+,%0@+\n\t" 359 "movel %1@+,%0@+\n\t" 360 "movel %1@+,%0@+\n\t" 361 "movel %1@+,%0@+\n\t" 362 "movel %1@+,%0@+\n\t" 363 "movel %1@+,%0@+\n\t" 364 "movel %1@+,%0@+\n\t" 365 "dbra %2,1b\n\t" 366 : "=a" (to), "=a" (from), "=d" (tmp) 367 : "0" (to), "1" (from) , "2" (count / 32 - 1) 368 ); 369#endif 370 return xto; 371} 372 373#define __memcpy_const(to, from, n) \ 374((n==PAGE_SIZE) ? \ 375 __memcpy_page((to),(from),(n)) : \ 376 __builtin_memcpy((to),(from),(n))) 377 378#define memcpy(to, from, n) \ 379(__builtin_constant_p(n) ? \ 380 __memcpy_const((to),(from),(n)) : \ 381 memcpy((to),(from),(n))) 382 383#define __HAVE_ARCH_MEMMOVE 384static inline void * memmove(void * dest,const void * src, size_t n) 385{ 386 void *xdest = dest; 387 size_t temp; 388 389 if (!n) 390 return xdest; 391 392 if (dest < src) 393 { 394 if ((long) dest & 1) 395 { 396 char *cdest = dest; 397 const char *csrc = src; 398 *cdest++ = *csrc++; 399 dest = cdest; 400 src = csrc; 401 n--; 402 } 403 if (n > 2 && (long) dest & 2) 404 { 405 short *sdest = dest; 406 const short *ssrc = src; 407 *sdest++ = *ssrc++; 408 dest = sdest; 409 src = ssrc; 410 n -= 2; 411 } 412 temp = n >> 2; 413 if (temp) 414 { 415 long *ldest = dest; 416 const long *lsrc = src; 417 temp--; 418 do 419 *ldest++ = *lsrc++; 420 while (temp--); 421 dest = ldest; 422 src = lsrc; 423 } 424 if (n & 2) 425 { 426 short *sdest = dest; 427 const short *ssrc = src; 428 *sdest++ = *ssrc++; 429 dest = sdest; 430 src = ssrc; 431 } 432 if (n & 1) 433 { 434 char *cdest = dest; 435 const char *csrc = src; 436 *cdest = *csrc; 437 } 438 } 439 else 440 { 441 dest = (char *) dest + n; 442 src = (const char *) src + n; 443 if ((long) dest & 1) 444 { 445 char *cdest = dest; 446 const char *csrc = src; 447 *--cdest = *--csrc; 448 dest = cdest; 449 src = csrc; 450 n--; 451 } 452 if (n > 2 && (long) dest & 2) 453 { 454 short *sdest = dest; 455 const short *ssrc = src; 456 *--sdest = *--ssrc; 457 dest = sdest; 458 src = ssrc; 459 n -= 2; 460 } 461 temp = n >> 2; 462 if (temp) 463 { 464 long *ldest = dest; 465 const long *lsrc = src; 466 temp--; 467 do 468 *--ldest = *--lsrc; 469 while (temp--); 470 dest = ldest; 471 src = lsrc; 472 } 473 if (n & 2) 474 { 475 short *sdest = dest; 476 const short *ssrc = src; 477 *--sdest = *--ssrc; 478 dest = sdest; 479 src = ssrc; 480 } 481 if (n & 1) 482 { 483 char *cdest = dest; 484 const char *csrc = src; 485 *--cdest = *--csrc; 486 } 487 } 488 return xdest; 489} 490 491#define __HAVE_ARCH_MEMCMP 492extern int memcmp(const void * ,const void * ,size_t ); 493#define memcmp(cs, ct, n) \ 494(__builtin_constant_p(n) ? \ 495 __builtin_memcmp((cs),(ct),(n)) : \ 496 memcmp((cs),(ct),(n))) 497 498#define __HAVE_ARCH_MEMCHR 499extern inline void * memchr(const void * cs, int c, size_t count) { 500 /* Someone else can optimize this, I don't care - tonym@mac.linux-m68k.org */ 501 unsigned char *ret = (unsigned char *)cs; 502 for(;count>0;count--,ret++) 503 if(*ret == c) return ret; 504 505 return NULL; 506} 507 508#endif /* _M68K_STRING_H_ */ 509