1#ifndef _M68K_STRING_H_
2#define _M68K_STRING_H_
3
4#include <asm/setup.h>
5#include <asm/page.h>
6
7#define __HAVE_ARCH_STRCPY
8static inline char * strcpy(char * dest,const char *src)
9{
10  char *xdest = dest;
11
12  __asm__ __volatile__
13       ("1:\tmoveb %1@+,%0@+\n\t"
14        "jne 1b"
15	: "=a" (dest), "=a" (src)
16        : "0" (dest), "1" (src) : "memory");
17  return xdest;
18}
19
20#define __HAVE_ARCH_STRNCPY
21static inline char * strncpy(char *dest, const char *src, size_t n)
22{
23  char *xdest = dest;
24
25  if (n == 0)
26    return xdest;
27
28  __asm__ __volatile__
29       ("1:\tmoveb %1@+,%0@+\n\t"
30	"jeq 2f\n\t"
31        "subql #1,%2\n\t"
32        "jne 1b\n\t"
33        "2:"
34        : "=a" (dest), "=a" (src), "=d" (n)
35        : "0" (dest), "1" (src), "2" (n)
36        : "memory");
37  return xdest;
38}
39
40#define __HAVE_ARCH_STRCAT
41static inline char * strcat(char * dest, const char * src)
42{
43	char *tmp = dest;
44
45	while (*dest)
46		dest++;
47	while ((*dest++ = *src++))
48		;
49
50	return tmp;
51}
52
53#define __HAVE_ARCH_STRNCAT
54static inline char * strncat(char *dest, const char *src, size_t count)
55{
56	char *tmp = dest;
57
58	if (count) {
59		while (*dest)
60			dest++;
61		while ((*dest++ = *src++)) {
62			if (--count == 0) {
63				*dest++='\0';
64				break;
65			}
66		}
67	}
68
69	return tmp;
70}
71
72#define __HAVE_ARCH_STRCHR
73static inline char * strchr(const char * s, int c)
74{
75  const char ch = c;
76
77  for(; *s != ch; ++s)
78    if (*s == '\0')
79      return( NULL );
80  return( (char *) s);
81}
82
83
84
85
86/* strstr !! */
87
88#define __HAVE_ARCH_STRLEN
89static inline size_t strlen(const char * s)
90{
91  const char *sc;
92  for (sc = s; *sc != '\0'; ++sc) ;
93  return(sc - s);
94}
95
96/* strnlen !! */
97
98#define __HAVE_ARCH_STRCMP
99static inline int strcmp(const char * cs,const char * ct)
100{
101  char __res;
102
103  __asm__
104       ("1:\tmoveb %0@+,%2\n\t" /* get *cs */
105        "cmpb %1@+,%2\n\t"      /* compare a byte */
106        "jne  2f\n\t"           /* not equal, break out */
107        "tstb %2\n\t"           /* at end of cs? */
108        "jne  1b\n\t"           /* no, keep going */
109        "jra  3f\n\t"		/* strings are equal */
110        "2:\tsubb %1@-,%2\n\t"  /* *cs - *ct */
111        "3:"
112        : "=a" (cs), "=a" (ct), "=d" (__res)
113        : "0" (cs), "1" (ct));
114  return __res;
115}
116
117#define __HAVE_ARCH_STRNCMP
118static inline int strncmp(const char * cs,const char * ct,size_t count)
119{
120  char __res;
121
122  if (!count)
123    return 0;
124  __asm__
125       ("1:\tmovb %0@+,%3\n\t"          /* get *cs */
126        "cmpb   %1@+,%3\n\t"            /* compare a byte */
127        "jne    3f\n\t"                 /* not equal, break out */
128        "tstb   %3\n\t"                 /* at end of cs? */
129        "jeq    4f\n\t"                 /* yes, all done */
130        "subql  #1,%2\n\t"              /* no, adjust count */
131        "jne    1b\n\t"                 /* more to do, keep going */
132        "2:\tmoveq #0,%3\n\t"           /* strings are equal */
133        "jra    4f\n\t"
134        "3:\tsubb %1@-,%3\n\t"          /* *cs - *ct */
135        "4:"
136        : "=a" (cs), "=a" (ct), "=d" (count), "=d" (__res)
137        : "0" (cs), "1" (ct), "2" (count));
138  return __res;
139}
140
141#define __HAVE_ARCH_MEMSET
142/*
143 * This is really ugly, but its highly optimizatiable by the
144 * compiler and is meant as compensation for gcc's missing
145 * __builtin_memset(). For the 680[23]0	it might be worth considering
146 * the optimal number of misaligned writes compared to the number of
147 * tests'n'branches needed to align the destination address. The
148 * 680[46]0 doesn't really care due to their copy-back caches.
149 *						10/09/96 - Jes Sorensen
150 */
151static inline void * __memset_g(void * s, int c, size_t count)
152{
153  void *xs = s;
154  size_t temp;
155
156  if (!count)
157    return xs;
158
159  c &= 0xff;
160  c |= c << 8;
161  c |= c << 16;
162
163  if (count < 36){
164	  long *ls = s;
165
166	  switch(count){
167	  case 32: case 33: case 34: case 35:
168		  *ls++ = c;
169	  case 28: case 29: case 30: case 31:
170		  *ls++ = c;
171	  case 24: case 25: case 26: case 27:
172		  *ls++ = c;
173	  case 20: case 21: case 22: case 23:
174		  *ls++ = c;
175	  case 16: case 17: case 18: case 19:
176		  *ls++ = c;
177	  case 12: case 13: case 14: case 15:
178		  *ls++ = c;
179	  case 8: case 9: case 10: case 11:
180		  *ls++ = c;
181	  case 4: case 5: case 6: case 7:
182		  *ls++ = c;
183		  break;
184	  default:
185		  break;
186	  }
187	  s = ls;
188	  if (count & 0x02){
189		  short *ss = s;
190		  *ss++ = c;
191		  s = ss;
192	  }
193	  if (count & 0x01){
194		  char *cs = s;
195		  *cs++ = c;
196		  s = cs;
197	  }
198	  return xs;
199  }
200
201  if ((long) s & 1)
202    {
203      char *cs = s;
204      *cs++ = c;
205      s = cs;
206      count--;
207    }
208  if (count > 2 && (long) s & 2)
209    {
210      short *ss = s;
211      *ss++ = c;
212      s = ss;
213      count -= 2;
214    }
215  temp = count >> 2;
216  if (temp)
217    {
218      long *ls = s;
219      temp--;
220      do
221	*ls++ = c;
222      while (temp--);
223      s = ls;
224    }
225  if (count & 2)
226    {
227      short *ss = s;
228      *ss++ = c;
229      s = ss;
230    }
231  if (count & 1)
232    {
233      char *cs = s;
234      *cs = c;
235    }
236  return xs;
237}
238
239/*
240 * __memset_page assumes that data is longword aligned. Most, if not
241 * all, of these page sized memsets are performed on page aligned
242 * areas, thus we do not need to check if the destination is longword
243 * aligned. Of course we suffer a serious performance loss if this is
244 * not the case but I think the risk of this ever happening is
245 * extremely small. We spend a lot of time clearing pages in
246 * get_empty_page() so I think it is worth it anyway. Besides, the
247 * 680[46]0 do not really care about misaligned writes due to their
248 * copy-back cache.
249 *
250 * The optimized case for the 680[46]0 is implemented using the move16
251 * instruction. My tests showed that this implementation is 35-45%
252 * faster than the original implementation using movel, the only
253 * caveat is that the destination address must be 16-byte aligned.
254 *                                            01/09/96 - Jes Sorensen
255 */
256static inline void * __memset_page(void * s,int c,size_t count)
257{
258  unsigned long data, tmp;
259  void *xs, *sp;
260
261  xs = sp = s;
262
263  c = c & 255;
264  data = c | (c << 8);
265  data |= data << 16;
266
267#ifdef CPU_M68040_OR_M68060_ONLY
268
269  if (((unsigned long) s) & 0x0f)
270	  __memset_g(s, c, count);
271  else{
272	  *((unsigned long *)(s))++ = data;
273	  *((unsigned long *)(s))++ = data;
274	  *((unsigned long *)(s))++ = data;
275	  *((unsigned long *)(s))++ = data;
276
277	  __asm__ __volatile__("1:\t"
278			       ".chip 68040\n\t"
279			       "move16 %2@+,%0@+\n\t"
280			       ".chip 68k\n\t"
281			       "subqw  #8,%2\n\t"
282			       "subqw  #8,%2\n\t"
283			       "dbra   %1,1b\n\t"
284			       : "=a" (s), "=d" (tmp)
285			       : "a" (sp), "0" (s), "1" ((count - 16) / 16 - 1)
286			       );
287  }
288
289#else
290  __asm__ __volatile__("1:\t"
291		       "movel %2,%0@+\n\t"
292		       "movel %2,%0@+\n\t"
293		       "movel %2,%0@+\n\t"
294		       "movel %2,%0@+\n\t"
295		       "movel %2,%0@+\n\t"
296		       "movel %2,%0@+\n\t"
297		       "movel %2,%0@+\n\t"
298		       "movel %2,%0@+\n\t"
299		       "dbra  %1,1b\n\t"
300		       : "=a" (s), "=d" (tmp)
301		       : "d" (data), "0" (s), "1" (count / 32 - 1)
302		       );
303#endif
304
305  return xs;
306}
307
308extern void *memset(void *,int,__kernel_size_t);
309
310#define __memset_const(s,c,count) \
311((count==PAGE_SIZE) ? \
312  __memset_page((s),(c),(count)) : \
313  __memset_g((s),(c),(count)))
314
315#define memset(s, c, count) \
316(__builtin_constant_p(count) ? \
317 __memset_const((s),(c),(count)) : \
318 __memset_g((s),(c),(count)))
319
320#define __HAVE_ARCH_MEMCPY
321extern void * memcpy(void *, const void *, size_t );
322/*
323 * __builtin_memcpy() does not handle page-sized memcpys very well,
324 * thus following the same assumptions as for page-sized memsets, this
325 * function copies page-sized areas using an unrolled loop, without
326 * considering alignment.
327 *
328 * For the 680[46]0 only kernels we use the move16 instruction instead
329 * as it writes through the data-cache, invalidating the cache-lines
330 * touched. In this way we do not use up the entire data-cache (well,
331 * half of it on the 68060) by copying a page. An unrolled loop of two
332 * move16 instructions seem to the fastest. The only caveat is that
333 * both source and destination must be 16-byte aligned, if not we fall
334 * back to the generic memcpy function.  - Jes
335 */
336static inline void * __memcpy_page(void * to, const void * from, size_t count)
337{
338  unsigned long tmp;
339  void *xto = to;
340
341#ifdef CPU_M68040_OR_M68060_ONLY
342
343  if (((unsigned long) to | (unsigned long) from) & 0x0f)
344	  return memcpy(to, from, count);
345
346  __asm__ __volatile__("1:\t"
347		       ".chip 68040\n\t"
348		       "move16 %1@+,%0@+\n\t"
349		       "move16 %1@+,%0@+\n\t"
350		       ".chip 68k\n\t"
351		       "dbra  %2,1b\n\t"
352		       : "=a" (to), "=a" (from), "=d" (tmp)
353		       : "0" (to), "1" (from) , "2" (count / 32 - 1)
354		       );
355#else
356  __asm__ __volatile__("1:\t"
357		       "movel %1@+,%0@+\n\t"
358		       "movel %1@+,%0@+\n\t"
359		       "movel %1@+,%0@+\n\t"
360		       "movel %1@+,%0@+\n\t"
361		       "movel %1@+,%0@+\n\t"
362		       "movel %1@+,%0@+\n\t"
363		       "movel %1@+,%0@+\n\t"
364		       "movel %1@+,%0@+\n\t"
365		       "dbra  %2,1b\n\t"
366		       : "=a" (to), "=a" (from), "=d" (tmp)
367		       : "0" (to), "1" (from) , "2" (count / 32 - 1)
368		       );
369#endif
370  return xto;
371}
372
373#define __memcpy_const(to, from, n) \
374((n==PAGE_SIZE) ? \
375  __memcpy_page((to),(from),(n)) : \
376  __builtin_memcpy((to),(from),(n)))
377
378#define memcpy(to, from, n) \
379(__builtin_constant_p(n) ? \
380 __memcpy_const((to),(from),(n)) : \
381 memcpy((to),(from),(n)))
382
383#define __HAVE_ARCH_MEMMOVE
384static inline void * memmove(void * dest,const void * src, size_t n)
385{
386  void *xdest = dest;
387  size_t temp;
388
389  if (!n)
390    return xdest;
391
392  if (dest < src)
393    {
394      if ((long) dest & 1)
395	{
396	  char *cdest = dest;
397	  const char *csrc = src;
398	  *cdest++ = *csrc++;
399	  dest = cdest;
400	  src = csrc;
401	  n--;
402	}
403      if (n > 2 && (long) dest & 2)
404	{
405	  short *sdest = dest;
406	  const short *ssrc = src;
407	  *sdest++ = *ssrc++;
408	  dest = sdest;
409	  src = ssrc;
410	  n -= 2;
411	}
412      temp = n >> 2;
413      if (temp)
414	{
415	  long *ldest = dest;
416	  const long *lsrc = src;
417	  temp--;
418	  do
419	    *ldest++ = *lsrc++;
420	  while (temp--);
421	  dest = ldest;
422	  src = lsrc;
423	}
424      if (n & 2)
425	{
426	  short *sdest = dest;
427	  const short *ssrc = src;
428	  *sdest++ = *ssrc++;
429	  dest = sdest;
430	  src = ssrc;
431	}
432      if (n & 1)
433	{
434	  char *cdest = dest;
435	  const char *csrc = src;
436	  *cdest = *csrc;
437	}
438    }
439  else
440    {
441      dest = (char *) dest + n;
442      src = (const char *) src + n;
443      if ((long) dest & 1)
444	{
445	  char *cdest = dest;
446	  const char *csrc = src;
447	  *--cdest = *--csrc;
448	  dest = cdest;
449	  src = csrc;
450	  n--;
451	}
452      if (n > 2 && (long) dest & 2)
453	{
454	  short *sdest = dest;
455	  const short *ssrc = src;
456	  *--sdest = *--ssrc;
457	  dest = sdest;
458	  src = ssrc;
459	  n -= 2;
460	}
461      temp = n >> 2;
462      if (temp)
463	{
464	  long *ldest = dest;
465	  const long *lsrc = src;
466	  temp--;
467	  do
468	    *--ldest = *--lsrc;
469	  while (temp--);
470	  dest = ldest;
471	  src = lsrc;
472	}
473      if (n & 2)
474	{
475	  short *sdest = dest;
476	  const short *ssrc = src;
477	  *--sdest = *--ssrc;
478	  dest = sdest;
479	  src = ssrc;
480	}
481      if (n & 1)
482	{
483	  char *cdest = dest;
484	  const char *csrc = src;
485	  *--cdest = *--csrc;
486	}
487    }
488  return xdest;
489}
490
491#define __HAVE_ARCH_MEMCMP
492extern int memcmp(const void * ,const void * ,size_t );
493#define memcmp(cs, ct, n) \
494(__builtin_constant_p(n) ? \
495 __builtin_memcmp((cs),(ct),(n)) : \
496 memcmp((cs),(ct),(n)))
497
498#define __HAVE_ARCH_MEMCHR
499extern inline void * memchr(const void * cs, int c, size_t count) {
500	/* Someone else can optimize this, I don't care - tonym@mac.linux-m68k.org */
501	unsigned char *ret = (unsigned char *)cs;
502	for(;count>0;count--,ret++)
503		if(*ret == c) return ret;
504
505	return NULL;
506}
507
508#endif /* _M68K_STRING_H_ */
509