1// SPDX-License-Identifier: GPL-2.0
2/*
3 * arch/parisc/lib/io.c
4 *
5 * Copyright (c) Matthew Wilcox 2001 for Hewlett-Packard
6 * Copyright (c) Randolph Chung 2001 <tausq@debian.org>
7 *
8 * IO accessing functions which shouldn't be inlined because they're too big
9 */
10
11#include <linux/kernel.h>
12#include <linux/module.h>
13#include <asm/io.h>
14
15/* Copies a block of memory to a device in an efficient manner.
16 * Assumes the device can cope with 32-bit transfers.  If it can't,
17 * don't use this function.
18 */
19void memcpy_toio(volatile void __iomem *dst, const void *src, int count)
20{
21	if (((unsigned long)dst & 3) != ((unsigned long)src & 3))
22		goto bytecopy;
23	while ((unsigned long)dst & 3) {
24		writeb(*(char *)src, dst++);
25		src++;
26		count--;
27	}
28	while (count > 3) {
29		__raw_writel(*(u32 *)src, dst);
30		src += 4;
31		dst += 4;
32		count -= 4;
33	}
34 bytecopy:
35	while (count--) {
36		writeb(*(char *)src, dst++);
37		src++;
38	}
39}
40
41/*
42** Copies a block of memory from a device in an efficient manner.
43** Assumes the device can cope with 32-bit transfers.  If it can't,
44** don't use this function.
45**
46** CR16 counts on C3000 reading 256 bytes from Symbios 896 RAM:
47**	27341/64    = 427 cyc per int
48**	61311/128   = 478 cyc per short
49**	122637/256  = 479 cyc per byte
50** Ergo bus latencies dominant (not transfer size).
51**      Minimize total number of transfers at cost of CPU cycles.
52**	TODO: only look at src alignment and adjust the stores to dest.
53*/
54void memcpy_fromio(void *dst, const volatile void __iomem *src, int count)
55{
56	/* first compare alignment of src/dst */
57	if ( (((unsigned long)dst ^ (unsigned long)src) & 1) || (count < 2) )
58		goto bytecopy;
59
60	if ( (((unsigned long)dst ^ (unsigned long)src) & 2) || (count < 4) )
61		goto shortcopy;
62
63	/* Then check for misaligned start address */
64	if ((unsigned long)src & 1) {
65		*(u8 *)dst = readb(src);
66		src++;
67		dst++;
68		count--;
69		if (count < 2) goto bytecopy;
70	}
71
72	if ((unsigned long)src & 2) {
73		*(u16 *)dst = __raw_readw(src);
74		src += 2;
75		dst += 2;
76		count -= 2;
77	}
78
79	while (count > 3) {
80		*(u32 *)dst = __raw_readl(src);
81		dst += 4;
82		src += 4;
83		count -= 4;
84	}
85
86 shortcopy:
87	while (count > 1) {
88		*(u16 *)dst = __raw_readw(src);
89		src += 2;
90		dst += 2;
91		count -= 2;
92	}
93
94 bytecopy:
95	while (count--) {
96		*(char *)dst = readb(src);
97		src++;
98		dst++;
99	}
100}
101
102/* Sets a block of memory on a device to a given value.
103 * Assumes the device can cope with 32-bit transfers.  If it can't,
104 * don't use this function.
105 */
106void memset_io(volatile void __iomem *addr, unsigned char val, int count)
107{
108	u32 val32 = (val << 24) | (val << 16) | (val << 8) | val;
109	while ((unsigned long)addr & 3) {
110		writeb(val, addr++);
111		count--;
112	}
113	while (count > 3) {
114		__raw_writel(val32, addr);
115		addr += 4;
116		count -= 4;
117	}
118	while (count--) {
119		writeb(val, addr++);
120	}
121}
122
123/*
124 * Read COUNT 8-bit bytes from port PORT into memory starting at
125 * SRC.
126 */
127void insb (unsigned long port, void *dst, unsigned long count)
128{
129	unsigned char *p;
130
131	p = (unsigned char *)dst;
132
133	while (((unsigned long)p) & 0x3) {
134		if (!count)
135			return;
136		count--;
137		*p = inb(port);
138		p++;
139	}
140
141	while (count >= 4) {
142		unsigned int w;
143		count -= 4;
144		w = inb(port) << 24;
145		w |= inb(port) << 16;
146		w |= inb(port) << 8;
147		w |= inb(port);
148		*(unsigned int *) p = w;
149		p += 4;
150	}
151
152	while (count) {
153		--count;
154		*p = inb(port);
155		p++;
156	}
157}
158
159
160/*
161 * Read COUNT 16-bit words from port PORT into memory starting at
162 * SRC.  SRC must be at least short aligned.  This is used by the
163 * IDE driver to read disk sectors.  Performance is important, but
164 * the interfaces seems to be slow: just using the inlined version
165 * of the inw() breaks things.
166 */
167void insw (unsigned long port, void *dst, unsigned long count)
168{
169	unsigned int l = 0, l2;
170	unsigned char *p;
171
172	p = (unsigned char *)dst;
173
174	if (!count)
175		return;
176
177	switch (((unsigned long)p) & 0x3)
178	{
179	 case 0x00:			/* Buffer 32-bit aligned */
180		while (count>=2) {
181
182			count -= 2;
183			l = cpu_to_le16(inw(port)) << 16;
184			l |= cpu_to_le16(inw(port));
185			*(unsigned int *)p = l;
186			p += 4;
187		}
188		if (count) {
189			*(unsigned short *)p = cpu_to_le16(inw(port));
190		}
191		break;
192
193	 case 0x02:			/* Buffer 16-bit aligned */
194		*(unsigned short *)p = cpu_to_le16(inw(port));
195		p += 2;
196		count--;
197		while (count>=2) {
198
199			count -= 2;
200			l = cpu_to_le16(inw(port)) << 16;
201			l |= cpu_to_le16(inw(port));
202			*(unsigned int *)p = l;
203			p += 4;
204		}
205		if (count) {
206			*(unsigned short *)p = cpu_to_le16(inw(port));
207		}
208		break;
209
210	 case 0x01:			/* Buffer 8-bit aligned */
211	 case 0x03:
212		/* I don't bother with 32bit transfers
213		 * in this case, 16bit will have to do -- DE */
214		--count;
215
216		l = cpu_to_le16(inw(port));
217		*p = l >> 8;
218		p++;
219		while (count--)
220		{
221			l2 = cpu_to_le16(inw(port));
222			*(unsigned short *)p = (l & 0xff) << 8 | (l2 >> 8);
223			p += 2;
224			l = l2;
225		}
226		*p = l & 0xff;
227		break;
228	}
229}
230
231
232
233/*
234 * Read COUNT 32-bit words from port PORT into memory starting at
235 * SRC. Now works with any alignment in SRC. Performance is important,
236 * but the interfaces seems to be slow: just using the inlined version
237 * of the inl() breaks things.
238 */
239void insl (unsigned long port, void *dst, unsigned long count)
240{
241	unsigned int l = 0, l2;
242	unsigned char *p;
243
244	p = (unsigned char *)dst;
245
246	if (!count)
247		return;
248
249	switch (((unsigned long) dst) & 0x3)
250	{
251	 case 0x00:			/* Buffer 32-bit aligned */
252		while (count--)
253		{
254			*(unsigned int *)p = cpu_to_le32(inl(port));
255			p += 4;
256		}
257		break;
258
259	 case 0x02:			/* Buffer 16-bit aligned */
260		--count;
261
262		l = cpu_to_le32(inl(port));
263		*(unsigned short *)p = l >> 16;
264		p += 2;
265
266		while (count--)
267		{
268			l2 = cpu_to_le32(inl(port));
269			*(unsigned int *)p = (l & 0xffff) << 16 | (l2 >> 16);
270			p += 4;
271			l = l2;
272		}
273		*(unsigned short *)p = l & 0xffff;
274		break;
275	 case 0x01:			/* Buffer 8-bit aligned */
276		--count;
277
278		l = cpu_to_le32(inl(port));
279		*(unsigned char *)p = l >> 24;
280		p++;
281		*(unsigned short *)p = (l >> 8) & 0xffff;
282		p += 2;
283		while (count--)
284		{
285			l2 = cpu_to_le32(inl(port));
286			*(unsigned int *)p = (l & 0xff) << 24 | (l2 >> 8);
287			p += 4;
288			l = l2;
289		}
290		*p = l & 0xff;
291		break;
292	 case 0x03:			/* Buffer 8-bit aligned */
293		--count;
294
295		l = cpu_to_le32(inl(port));
296		*p = l >> 24;
297		p++;
298		while (count--)
299		{
300			l2 = cpu_to_le32(inl(port));
301			*(unsigned int *)p = (l & 0xffffff) << 8 | l2 >> 24;
302			p += 4;
303			l = l2;
304		}
305		*(unsigned short *)p = (l >> 8) & 0xffff;
306		p += 2;
307		*p = l & 0xff;
308		break;
309	}
310}
311
312
313/*
314 * Like insb but in the opposite direction.
315 * Don't worry as much about doing aligned memory transfers:
316 * doing byte reads the "slow" way isn't nearly as slow as
317 * doing byte writes the slow way (no r-m-w cycle).
318 */
319void outsb(unsigned long port, const void * src, unsigned long count)
320{
321	const unsigned char *p;
322
323	p = (const unsigned char *)src;
324	while (count) {
325		count--;
326		outb(*p, port);
327		p++;
328	}
329}
330
331/*
332 * Like insw but in the opposite direction.  This is used by the IDE
333 * driver to write disk sectors.  Performance is important, but the
334 * interfaces seems to be slow: just using the inlined version of the
335 * outw() breaks things.
336 */
337void outsw (unsigned long port, const void *src, unsigned long count)
338{
339	unsigned int l = 0, l2;
340	const unsigned char *p;
341
342	p = (const unsigned char *)src;
343
344	if (!count)
345		return;
346
347	switch (((unsigned long)p) & 0x3)
348	{
349	 case 0x00:			/* Buffer 32-bit aligned */
350		while (count>=2) {
351			count -= 2;
352			l = *(unsigned int *)p;
353			p += 4;
354			outw(le16_to_cpu(l >> 16), port);
355			outw(le16_to_cpu(l & 0xffff), port);
356		}
357		if (count) {
358			outw(le16_to_cpu(*(unsigned short*)p), port);
359		}
360		break;
361
362	 case 0x02:			/* Buffer 16-bit aligned */
363
364		outw(le16_to_cpu(*(unsigned short*)p), port);
365		p += 2;
366		count--;
367
368		while (count>=2) {
369			count -= 2;
370			l = *(unsigned int *)p;
371			p += 4;
372			outw(le16_to_cpu(l >> 16), port);
373			outw(le16_to_cpu(l & 0xffff), port);
374		}
375		if (count) {
376			outw(le16_to_cpu(*(unsigned short *)p), port);
377		}
378		break;
379
380	 case 0x01:			/* Buffer 8-bit aligned */
381		/* I don't bother with 32bit transfers
382		 * in this case, 16bit will have to do -- DE */
383
384		l  = *p << 8;
385		p++;
386		count--;
387		while (count)
388		{
389			count--;
390			l2 = *(unsigned short *)p;
391			p += 2;
392			outw(le16_to_cpu(l | l2 >> 8), port);
393		        l = l2 << 8;
394		}
395		l2 = *(unsigned char *)p;
396		outw (le16_to_cpu(l | l2>>8), port);
397		break;
398
399	}
400}
401
402
403/*
404 * Like insl but in the opposite direction.  This is used by the IDE
405 * driver to write disk sectors.  Works with any alignment in SRC.
406 *  Performance is important, but the interfaces seems to be slow:
407 * just using the inlined version of the outl() breaks things.
408 */
409void outsl (unsigned long port, const void *src, unsigned long count)
410{
411	unsigned int l = 0, l2;
412	const unsigned char *p;
413
414	p = (const unsigned char *)src;
415
416	if (!count)
417		return;
418
419	switch (((unsigned long)p) & 0x3)
420	{
421	 case 0x00:			/* Buffer 32-bit aligned */
422		while (count--)
423		{
424			outl(le32_to_cpu(*(unsigned int *)p), port);
425			p += 4;
426		}
427		break;
428
429	 case 0x02:			/* Buffer 16-bit aligned */
430		--count;
431
432		l = *(unsigned short *)p;
433		p += 2;
434
435		while (count--)
436		{
437			l2 = *(unsigned int *)p;
438			p += 4;
439			outl (le32_to_cpu(l << 16 | l2 >> 16), port);
440			l = l2;
441		}
442		l2 = *(unsigned short *)p;
443		outl (le32_to_cpu(l << 16 | l2), port);
444		break;
445	 case 0x01:			/* Buffer 8-bit aligned */
446		--count;
447
448		l = *p << 24;
449		p++;
450		l |= *(unsigned short *)p << 8;
451		p += 2;
452
453		while (count--)
454		{
455			l2 = *(unsigned int *)p;
456			p += 4;
457			outl (le32_to_cpu(l | l2 >> 24), port);
458			l = l2 << 8;
459		}
460		l2 = *p;
461		outl (le32_to_cpu(l | l2), port);
462		break;
463	 case 0x03:			/* Buffer 8-bit aligned */
464		--count;
465
466		l = *p << 24;
467		p++;
468
469		while (count--)
470		{
471			l2 = *(unsigned int *)p;
472			p += 4;
473			outl (le32_to_cpu(l | l2 >> 8), port);
474			l = l2 << 24;
475		}
476		l2 = *(unsigned short *)p << 16;
477		p += 2;
478		l2 |= *p;
479		outl (le32_to_cpu(l | l2), port);
480		break;
481	}
482}
483
484EXPORT_SYMBOL(insb);
485EXPORT_SYMBOL(insw);
486EXPORT_SYMBOL(insl);
487EXPORT_SYMBOL(outsb);
488EXPORT_SYMBOL(outsw);
489EXPORT_SYMBOL(outsl);
490