1/*
2 * arch/parisc/lib/io.c
3 *
4 * Copyright (c) Matthew Wilcox 2001 for Hewlett-Packard
5 * Copyright (c) Randolph Chung 2001 <tausq@debian.org>
6 *
7 * IO accessing functions which shouldn't be inlined because they're too big
8 */
9
10#include <linux/kernel.h>
11#include <linux/module.h>
12#include <asm/io.h>
13
14/* Copies a block of memory to a device in an efficient manner.
15 * Assumes the device can cope with 32-bit transfers.  If it can't,
16 * don't use this function.
17 */
18void memcpy_toio(volatile void __iomem *dst, const void *src, int count)
19{
20	if (((unsigned long)dst & 3) != ((unsigned long)src & 3))
21		goto bytecopy;
22	while ((unsigned long)dst & 3) {
23		writeb(*(char *)src, dst++);
24		src++;
25		count--;
26	}
27	while (count > 3) {
28		__raw_writel(*(u32 *)src, dst);
29		src += 4;
30		dst += 4;
31		count -= 4;
32	}
33 bytecopy:
34	while (count--) {
35		writeb(*(char *)src, dst++);
36		src++;
37	}
38}
39
40/*
41** Copies a block of memory from a device in an efficient manner.
42** Assumes the device can cope with 32-bit transfers.  If it can't,
43** don't use this function.
44**
45** CR16 counts on C3000 reading 256 bytes from Symbios 896 RAM:
46**	27341/64    = 427 cyc per int
47**	61311/128   = 478 cyc per short
48**	122637/256  = 479 cyc per byte
49** Ergo bus latencies dominant (not transfer size).
50**      Minimize total number of transfers at cost of CPU cycles.
51**	TODO: only look at src alignment and adjust the stores to dest.
52*/
53void memcpy_fromio(void *dst, const volatile void __iomem *src, int count)
54{
55	/* first compare alignment of src/dst */
56	if ( (((unsigned long)dst ^ (unsigned long)src) & 1) || (count < 2) )
57		goto bytecopy;
58
59	if ( (((unsigned long)dst ^ (unsigned long)src) & 2) || (count < 4) )
60		goto shortcopy;
61
62	/* Then check for misaligned start address */
63	if ((unsigned long)src & 1) {
64		*(u8 *)dst = readb(src);
65		src++;
66		dst++;
67		count--;
68		if (count < 2) goto bytecopy;
69	}
70
71	if ((unsigned long)src & 2) {
72		*(u16 *)dst = __raw_readw(src);
73		src += 2;
74		dst += 2;
75		count -= 2;
76	}
77
78	while (count > 3) {
79		*(u32 *)dst = __raw_readl(src);
80		dst += 4;
81		src += 4;
82		count -= 4;
83	}
84
85 shortcopy:
86	while (count > 1) {
87		*(u16 *)dst = __raw_readw(src);
88		src += 2;
89		dst += 2;
90		count -= 2;
91	}
92
93 bytecopy:
94	while (count--) {
95		*(char *)dst = readb(src);
96		src++;
97		dst++;
98	}
99}
100
101/* Sets a block of memory on a device to a given value.
102 * Assumes the device can cope with 32-bit transfers.  If it can't,
103 * don't use this function.
104 */
105void memset_io(volatile void __iomem *addr, unsigned char val, int count)
106{
107	u32 val32 = (val << 24) | (val << 16) | (val << 8) | val;
108	while ((unsigned long)addr & 3) {
109		writeb(val, addr++);
110		count--;
111	}
112	while (count > 3) {
113		__raw_writel(val32, addr);
114		addr += 4;
115		count -= 4;
116	}
117	while (count--) {
118		writeb(val, addr++);
119	}
120}
121
122/*
123 * Read COUNT 8-bit bytes from port PORT into memory starting at
124 * SRC.
125 */
126void insb (unsigned long port, void *dst, unsigned long count)
127{
128	unsigned char *p;
129
130	p = (unsigned char *)dst;
131
132	while (((unsigned long)p) & 0x3) {
133		if (!count)
134			return;
135		count--;
136		*p = inb(port);
137		p++;
138	}
139
140	while (count >= 4) {
141		unsigned int w;
142		count -= 4;
143		w = inb(port) << 24;
144		w |= inb(port) << 16;
145		w |= inb(port) << 8;
146		w |= inb(port);
147		*(unsigned int *) p = w;
148		p += 4;
149	}
150
151	while (count) {
152		--count;
153		*p = inb(port);
154		p++;
155	}
156}
157
158
159/*
160 * Read COUNT 16-bit words from port PORT into memory starting at
161 * SRC.  SRC must be at least short aligned.  This is used by the
162 * IDE driver to read disk sectors.  Performance is important, but
163 * the interfaces seems to be slow: just using the inlined version
164 * of the inw() breaks things.
165 */
166void insw (unsigned long port, void *dst, unsigned long count)
167{
168	unsigned int l = 0, l2;
169	unsigned char *p;
170
171	p = (unsigned char *)dst;
172
173	if (!count)
174		return;
175
176	switch (((unsigned long)p) & 0x3)
177	{
178	 case 0x00:			/* Buffer 32-bit aligned */
179		while (count>=2) {
180
181			count -= 2;
182			l = cpu_to_le16(inw(port)) << 16;
183			l |= cpu_to_le16(inw(port));
184			*(unsigned int *)p = l;
185			p += 4;
186		}
187		if (count) {
188			*(unsigned short *)p = cpu_to_le16(inw(port));
189		}
190		break;
191
192	 case 0x02:			/* Buffer 16-bit aligned */
193		*(unsigned short *)p = cpu_to_le16(inw(port));
194		p += 2;
195		count--;
196		while (count>=2) {
197
198			count -= 2;
199			l = cpu_to_le16(inw(port)) << 16;
200			l |= cpu_to_le16(inw(port));
201			*(unsigned int *)p = l;
202			p += 4;
203		}
204		if (count) {
205			*(unsigned short *)p = cpu_to_le16(inw(port));
206		}
207		break;
208
209	 case 0x01:			/* Buffer 8-bit aligned */
210	 case 0x03:
211		/* I don't bother with 32bit transfers
212		 * in this case, 16bit will have to do -- DE */
213		--count;
214
215		l = cpu_to_le16(inw(port));
216		*p = l >> 8;
217		p++;
218		while (count--)
219		{
220			l2 = cpu_to_le16(inw(port));
221			*(unsigned short *)p = (l & 0xff) << 8 | (l2 >> 8);
222			p += 2;
223			l = l2;
224		}
225		*p = l & 0xff;
226		break;
227	}
228}
229
230
231
232/*
233 * Read COUNT 32-bit words from port PORT into memory starting at
234 * SRC. Now works with any alignment in SRC. Performance is important,
235 * but the interfaces seems to be slow: just using the inlined version
236 * of the inl() breaks things.
237 */
238void insl (unsigned long port, void *dst, unsigned long count)
239{
240	unsigned int l = 0, l2;
241	unsigned char *p;
242
243	p = (unsigned char *)dst;
244
245	if (!count)
246		return;
247
248	switch (((unsigned long) dst) & 0x3)
249	{
250	 case 0x00:			/* Buffer 32-bit aligned */
251		while (count--)
252		{
253			*(unsigned int *)p = cpu_to_le32(inl(port));
254			p += 4;
255		}
256		break;
257
258	 case 0x02:			/* Buffer 16-bit aligned */
259		--count;
260
261		l = cpu_to_le32(inl(port));
262		*(unsigned short *)p = l >> 16;
263		p += 2;
264
265		while (count--)
266		{
267			l2 = cpu_to_le32(inl(port));
268			*(unsigned int *)p = (l & 0xffff) << 16 | (l2 >> 16);
269			p += 4;
270			l = l2;
271		}
272		*(unsigned short *)p = l & 0xffff;
273		break;
274	 case 0x01:			/* Buffer 8-bit aligned */
275		--count;
276
277		l = cpu_to_le32(inl(port));
278		*(unsigned char *)p = l >> 24;
279		p++;
280		*(unsigned short *)p = (l >> 8) & 0xffff;
281		p += 2;
282		while (count--)
283		{
284			l2 = cpu_to_le32(inl(port));
285			*(unsigned int *)p = (l & 0xff) << 24 | (l2 >> 8);
286			p += 4;
287			l = l2;
288		}
289		*p = l & 0xff;
290		break;
291	 case 0x03:			/* Buffer 8-bit aligned */
292		--count;
293
294		l = cpu_to_le32(inl(port));
295		*p = l >> 24;
296		p++;
297		while (count--)
298		{
299			l2 = cpu_to_le32(inl(port));
300			*(unsigned int *)p = (l & 0xffffff) << 8 | l2 >> 24;
301			p += 4;
302			l = l2;
303		}
304		*(unsigned short *)p = (l >> 8) & 0xffff;
305		p += 2;
306		*p = l & 0xff;
307		break;
308	}
309}
310
311
312/*
313 * Like insb but in the opposite direction.
314 * Don't worry as much about doing aligned memory transfers:
315 * doing byte reads the "slow" way isn't nearly as slow as
316 * doing byte writes the slow way (no r-m-w cycle).
317 */
318void outsb(unsigned long port, const void * src, unsigned long count)
319{
320	const unsigned char *p;
321
322	p = (const unsigned char *)src;
323	while (count) {
324		count--;
325		outb(*p, port);
326		p++;
327	}
328}
329
330/*
331 * Like insw but in the opposite direction.  This is used by the IDE
332 * driver to write disk sectors.  Performance is important, but the
333 * interfaces seems to be slow: just using the inlined version of the
334 * outw() breaks things.
335 */
336void outsw (unsigned long port, const void *src, unsigned long count)
337{
338	unsigned int l = 0, l2;
339	const unsigned char *p;
340
341	p = (const unsigned char *)src;
342
343	if (!count)
344		return;
345
346	switch (((unsigned long)p) & 0x3)
347	{
348	 case 0x00:			/* Buffer 32-bit aligned */
349		while (count>=2) {
350			count -= 2;
351			l = *(unsigned int *)p;
352			p += 4;
353			outw(le16_to_cpu(l >> 16), port);
354			outw(le16_to_cpu(l & 0xffff), port);
355		}
356		if (count) {
357			outw(le16_to_cpu(*(unsigned short*)p), port);
358		}
359		break;
360
361	 case 0x02:			/* Buffer 16-bit aligned */
362
363		outw(le16_to_cpu(*(unsigned short*)p), port);
364		p += 2;
365		count--;
366
367		while (count>=2) {
368			count -= 2;
369			l = *(unsigned int *)p;
370			p += 4;
371			outw(le16_to_cpu(l >> 16), port);
372			outw(le16_to_cpu(l & 0xffff), port);
373		}
374		if (count) {
375			outw(le16_to_cpu(*(unsigned short *)p), port);
376		}
377		break;
378
379	 case 0x01:			/* Buffer 8-bit aligned */
380		/* I don't bother with 32bit transfers
381		 * in this case, 16bit will have to do -- DE */
382
383		l  = *p << 8;
384		p++;
385		count--;
386		while (count)
387		{
388			count--;
389			l2 = *(unsigned short *)p;
390			p += 2;
391			outw(le16_to_cpu(l | l2 >> 8), port);
392		        l = l2 << 8;
393		}
394		l2 = *(unsigned char *)p;
395		outw (le16_to_cpu(l | l2>>8), port);
396		break;
397
398	}
399}
400
401
402/*
403 * Like insl but in the opposite direction.  This is used by the IDE
404 * driver to write disk sectors.  Works with any alignment in SRC.
405 *  Performance is important, but the interfaces seems to be slow:
406 * just using the inlined version of the outl() breaks things.
407 */
408void outsl (unsigned long port, const void *src, unsigned long count)
409{
410	unsigned int l = 0, l2;
411	const unsigned char *p;
412
413	p = (const unsigned char *)src;
414
415	if (!count)
416		return;
417
418	switch (((unsigned long)p) & 0x3)
419	{
420	 case 0x00:			/* Buffer 32-bit aligned */
421		while (count--)
422		{
423			outl(le32_to_cpu(*(unsigned int *)p), port);
424			p += 4;
425		}
426		break;
427
428	 case 0x02:			/* Buffer 16-bit aligned */
429		--count;
430
431		l = *(unsigned short *)p;
432		p += 2;
433
434		while (count--)
435		{
436			l2 = *(unsigned int *)p;
437			p += 4;
438			outl (le32_to_cpu(l << 16 | l2 >> 16), port);
439			l = l2;
440		}
441		l2 = *(unsigned short *)p;
442		outl (le32_to_cpu(l << 16 | l2), port);
443		break;
444	 case 0x01:			/* Buffer 8-bit aligned */
445		--count;
446
447		l = *p << 24;
448		p++;
449		l |= *(unsigned short *)p << 8;
450		p += 2;
451
452		while (count--)
453		{
454			l2 = *(unsigned int *)p;
455			p += 4;
456			outl (le32_to_cpu(l | l2 >> 24), port);
457			l = l2 << 8;
458		}
459		l2 = *p;
460		outl (le32_to_cpu(l | l2), port);
461		break;
462	 case 0x03:			/* Buffer 8-bit aligned */
463		--count;
464
465		l = *p << 24;
466		p++;
467
468		while (count--)
469		{
470			l2 = *(unsigned int *)p;
471			p += 4;
472			outl (le32_to_cpu(l | l2 >> 8), port);
473			l = l2 << 24;
474		}
475		l2 = *(unsigned short *)p << 16;
476		p += 2;
477		l2 |= *p;
478		outl (le32_to_cpu(l | l2), port);
479		break;
480	}
481}
482
483EXPORT_SYMBOL(insb);
484EXPORT_SYMBOL(insw);
485EXPORT_SYMBOL(insl);
486EXPORT_SYMBOL(outsb);
487EXPORT_SYMBOL(outsw);
488EXPORT_SYMBOL(outsl);
489