1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * linux/drivers/misc/xillybus_core.c
4 *
5 * Copyright 2011 Xillybus Ltd, http://xillybus.com
6 *
7 * Driver for the Xillybus FPGA/host framework.
8 *
9 * This driver interfaces with a special IP core in an FPGA, setting up
10 * a pipe between a hardware FIFO in the programmable logic and a device
11 * file in the host. The number of such pipes and their attributes are
12 * set up on the logic. This driver detects these automatically and
13 * creates the device files accordingly.
14 */
15
16#include <linux/list.h>
17#include <linux/device.h>
18#include <linux/module.h>
19#include <linux/io.h>
20#include <linux/dma-mapping.h>
21#include <linux/interrupt.h>
22#include <linux/sched.h>
23#include <linux/fs.h>
24#include <linux/spinlock.h>
25#include <linux/mutex.h>
26#include <linux/crc32.h>
27#include <linux/poll.h>
28#include <linux/delay.h>
29#include <linux/slab.h>
30#include <linux/workqueue.h>
31#include "xillybus.h"
32#include "xillybus_class.h"
33
34MODULE_DESCRIPTION("Xillybus core functions");
35MODULE_AUTHOR("Eli Billauer, Xillybus Ltd.");
36MODULE_ALIAS("xillybus_core");
37MODULE_LICENSE("GPL v2");
38
39/* General timeout is 100 ms, rx timeout is 10 ms */
40#define XILLY_RX_TIMEOUT (10*HZ/1000)
41#define XILLY_TIMEOUT (100*HZ/1000)
42
43#define fpga_msg_ctrl_reg              0x0008
44#define fpga_dma_control_reg           0x0020
45#define fpga_dma_bufno_reg             0x0024
46#define fpga_dma_bufaddr_lowaddr_reg   0x0028
47#define fpga_dma_bufaddr_highaddr_reg  0x002c
48#define fpga_buf_ctrl_reg              0x0030
49#define fpga_buf_offset_reg            0x0034
50#define fpga_endian_reg                0x0040
51
52#define XILLYMSG_OPCODE_RELEASEBUF 1
53#define XILLYMSG_OPCODE_QUIESCEACK 2
54#define XILLYMSG_OPCODE_FIFOEOF 3
55#define XILLYMSG_OPCODE_FATAL_ERROR 4
56#define XILLYMSG_OPCODE_NONEMPTY 5
57
58static const char xillyname[] = "xillybus";
59
60static struct workqueue_struct *xillybus_wq;
61
62/*
63 * Locking scheme: Mutexes protect invocations of character device methods.
64 * If both locks are taken, wr_mutex is taken first, rd_mutex second.
65 *
66 * wr_spinlock protects wr_*_buf_idx, wr_empty, wr_sleepy, wr_ready and the
67 * buffers' end_offset fields against changes made by IRQ handler (and in
68 * theory, other file request handlers, but the mutex handles that). Nothing
69 * else.
70 * They are held for short direct memory manipulations. Needless to say,
71 * no mutex locking is allowed when a spinlock is held.
72 *
73 * rd_spinlock does the same with rd_*_buf_idx, rd_empty and end_offset.
74 *
75 * register_mutex is endpoint-specific, and is held when non-atomic
76 * register operations are performed. wr_mutex and rd_mutex may be
77 * held when register_mutex is taken, but none of the spinlocks. Note that
78 * register_mutex doesn't protect against sporadic buf_ctrl_reg writes
79 * which are unrelated to buf_offset_reg, since they are harmless.
80 *
81 * Blocking on the wait queues is allowed with mutexes held, but not with
82 * spinlocks.
83 *
84 * Only interruptible blocking is allowed on mutexes and wait queues.
85 *
86 * All in all, the locking order goes (with skips allowed, of course):
87 * wr_mutex -> rd_mutex -> register_mutex -> wr_spinlock -> rd_spinlock
88 */
89
90static void malformed_message(struct xilly_endpoint *endpoint, u32 *buf)
91{
92	int opcode;
93	int msg_channel, msg_bufno, msg_data, msg_dir;
94
95	opcode = (buf[0] >> 24) & 0xff;
96	msg_dir = buf[0] & 1;
97	msg_channel = (buf[0] >> 1) & 0x7ff;
98	msg_bufno = (buf[0] >> 12) & 0x3ff;
99	msg_data = buf[1] & 0xfffffff;
100
101	dev_warn(endpoint->dev,
102		 "Malformed message (skipping): opcode=%d, channel=%03x, dir=%d, bufno=%03x, data=%07x\n",
103		 opcode, msg_channel, msg_dir, msg_bufno, msg_data);
104}
105
106/*
107 * xillybus_isr assumes the interrupt is allocated exclusively to it,
108 * which is the natural case MSI and several other hardware-oriented
109 * interrupts. Sharing is not allowed.
110 */
111
112irqreturn_t xillybus_isr(int irq, void *data)
113{
114	struct xilly_endpoint *ep = data;
115	u32 *buf;
116	unsigned int buf_size;
117	int i;
118	int opcode;
119	unsigned int msg_channel, msg_bufno, msg_data, msg_dir;
120	struct xilly_channel *channel;
121
122	buf = ep->msgbuf_addr;
123	buf_size = ep->msg_buf_size/sizeof(u32);
124
125	dma_sync_single_for_cpu(ep->dev, ep->msgbuf_dma_addr,
126				ep->msg_buf_size, DMA_FROM_DEVICE);
127
128	for (i = 0; i < buf_size; i += 2) {
129		if (((buf[i+1] >> 28) & 0xf) != ep->msg_counter) {
130			malformed_message(ep, &buf[i]);
131			dev_warn(ep->dev,
132				 "Sending a NACK on counter %x (instead of %x) on entry %d\n",
133				 ((buf[i+1] >> 28) & 0xf),
134				 ep->msg_counter,
135				 i/2);
136
137			if (++ep->failed_messages > 10) {
138				dev_err(ep->dev,
139					"Lost sync with interrupt messages. Stopping.\n");
140			} else {
141				dma_sync_single_for_device(ep->dev,
142							   ep->msgbuf_dma_addr,
143							   ep->msg_buf_size,
144							   DMA_FROM_DEVICE);
145
146				iowrite32(0x01,  /* Message NACK */
147					  ep->registers + fpga_msg_ctrl_reg);
148			}
149			return IRQ_HANDLED;
150		} else if (buf[i] & (1 << 22)) /* Last message */
151			break;
152	}
153
154	if (i >= buf_size) {
155		dev_err(ep->dev, "Bad interrupt message. Stopping.\n");
156		return IRQ_HANDLED;
157	}
158
159	buf_size = i + 2;
160
161	for (i = 0; i < buf_size; i += 2) { /* Scan through messages */
162		opcode = (buf[i] >> 24) & 0xff;
163
164		msg_dir = buf[i] & 1;
165		msg_channel = (buf[i] >> 1) & 0x7ff;
166		msg_bufno = (buf[i] >> 12) & 0x3ff;
167		msg_data = buf[i+1] & 0xfffffff;
168
169		switch (opcode) {
170		case XILLYMSG_OPCODE_RELEASEBUF:
171			if ((msg_channel > ep->num_channels) ||
172			    (msg_channel == 0)) {
173				malformed_message(ep, &buf[i]);
174				break;
175			}
176
177			channel = ep->channels[msg_channel];
178
179			if (msg_dir) { /* Write channel */
180				if (msg_bufno >= channel->num_wr_buffers) {
181					malformed_message(ep, &buf[i]);
182					break;
183				}
184				spin_lock(&channel->wr_spinlock);
185				channel->wr_buffers[msg_bufno]->end_offset =
186					msg_data;
187				channel->wr_fpga_buf_idx = msg_bufno;
188				channel->wr_empty = 0;
189				channel->wr_sleepy = 0;
190				spin_unlock(&channel->wr_spinlock);
191
192				wake_up_interruptible(&channel->wr_wait);
193
194			} else {
195				/* Read channel */
196
197				if (msg_bufno >= channel->num_rd_buffers) {
198					malformed_message(ep, &buf[i]);
199					break;
200				}
201
202				spin_lock(&channel->rd_spinlock);
203				channel->rd_fpga_buf_idx = msg_bufno;
204				channel->rd_full = 0;
205				spin_unlock(&channel->rd_spinlock);
206
207				wake_up_interruptible(&channel->rd_wait);
208				if (!channel->rd_synchronous)
209					queue_delayed_work(
210						xillybus_wq,
211						&channel->rd_workitem,
212						XILLY_RX_TIMEOUT);
213			}
214
215			break;
216		case XILLYMSG_OPCODE_NONEMPTY:
217			if ((msg_channel > ep->num_channels) ||
218			    (msg_channel == 0) || (!msg_dir) ||
219			    !ep->channels[msg_channel]->wr_supports_nonempty) {
220				malformed_message(ep, &buf[i]);
221				break;
222			}
223
224			channel = ep->channels[msg_channel];
225
226			if (msg_bufno >= channel->num_wr_buffers) {
227				malformed_message(ep, &buf[i]);
228				break;
229			}
230			spin_lock(&channel->wr_spinlock);
231			if (msg_bufno == channel->wr_host_buf_idx)
232				channel->wr_ready = 1;
233			spin_unlock(&channel->wr_spinlock);
234
235			wake_up_interruptible(&channel->wr_ready_wait);
236
237			break;
238		case XILLYMSG_OPCODE_QUIESCEACK:
239			ep->idtlen = msg_data;
240			wake_up_interruptible(&ep->ep_wait);
241
242			break;
243		case XILLYMSG_OPCODE_FIFOEOF:
244			if ((msg_channel > ep->num_channels) ||
245			    (msg_channel == 0) || (!msg_dir) ||
246			    !ep->channels[msg_channel]->num_wr_buffers) {
247				malformed_message(ep, &buf[i]);
248				break;
249			}
250			channel = ep->channels[msg_channel];
251			spin_lock(&channel->wr_spinlock);
252			channel->wr_eof = msg_bufno;
253			channel->wr_sleepy = 0;
254
255			channel->wr_hangup = channel->wr_empty &&
256				(channel->wr_host_buf_idx == msg_bufno);
257
258			spin_unlock(&channel->wr_spinlock);
259
260			wake_up_interruptible(&channel->wr_wait);
261
262			break;
263		case XILLYMSG_OPCODE_FATAL_ERROR:
264			ep->fatal_error = 1;
265			wake_up_interruptible(&ep->ep_wait); /* For select() */
266			dev_err(ep->dev,
267				"FPGA reported a fatal error. This means that the low-level communication with the device has failed. This hardware problem is most likely unrelated to Xillybus (neither kernel module nor FPGA core), but reports are still welcome. All I/O is aborted.\n");
268			break;
269		default:
270			malformed_message(ep, &buf[i]);
271			break;
272		}
273	}
274
275	dma_sync_single_for_device(ep->dev, ep->msgbuf_dma_addr,
276				   ep->msg_buf_size, DMA_FROM_DEVICE);
277
278	ep->msg_counter = (ep->msg_counter + 1) & 0xf;
279	ep->failed_messages = 0;
280	iowrite32(0x03, ep->registers + fpga_msg_ctrl_reg); /* Message ACK */
281
282	return IRQ_HANDLED;
283}
284EXPORT_SYMBOL(xillybus_isr);
285
286/*
287 * A few trivial memory management functions.
288 * NOTE: These functions are used only on probe and remove, and therefore
289 * no locks are applied!
290 */
291
292static void xillybus_autoflush(struct work_struct *work);
293
294struct xilly_alloc_state {
295	void *salami;
296	int left_of_salami;
297	int nbuffer;
298	enum dma_data_direction direction;
299	u32 regdirection;
300};
301
302static void xilly_unmap(void *ptr)
303{
304	struct xilly_mapping *data = ptr;
305
306	dma_unmap_single(data->device, data->dma_addr,
307			 data->size, data->direction);
308
309	kfree(ptr);
310}
311
312static int xilly_map_single(struct xilly_endpoint *ep,
313			    void *ptr,
314			    size_t size,
315			    int direction,
316			    dma_addr_t *ret_dma_handle
317	)
318{
319	dma_addr_t addr;
320	struct xilly_mapping *this;
321
322	this = kzalloc(sizeof(*this), GFP_KERNEL);
323	if (!this)
324		return -ENOMEM;
325
326	addr = dma_map_single(ep->dev, ptr, size, direction);
327
328	if (dma_mapping_error(ep->dev, addr)) {
329		kfree(this);
330		return -ENODEV;
331	}
332
333	this->device = ep->dev;
334	this->dma_addr = addr;
335	this->size = size;
336	this->direction = direction;
337
338	*ret_dma_handle = addr;
339
340	return devm_add_action_or_reset(ep->dev, xilly_unmap, this);
341}
342
343static int xilly_get_dma_buffers(struct xilly_endpoint *ep,
344				 struct xilly_alloc_state *s,
345				 struct xilly_buffer **buffers,
346				 int bufnum, int bytebufsize)
347{
348	int i, rc;
349	dma_addr_t dma_addr;
350	struct device *dev = ep->dev;
351	struct xilly_buffer *this_buffer = NULL; /* Init to silence warning */
352
353	if (buffers) { /* Not the message buffer */
354		this_buffer = devm_kcalloc(dev, bufnum,
355					   sizeof(struct xilly_buffer),
356					   GFP_KERNEL);
357		if (!this_buffer)
358			return -ENOMEM;
359	}
360
361	for (i = 0; i < bufnum; i++) {
362		/*
363		 * Buffers are expected in descending size order, so there
364		 * is either enough space for this buffer or none at all.
365		 */
366
367		if ((s->left_of_salami < bytebufsize) &&
368		    (s->left_of_salami > 0)) {
369			dev_err(ep->dev,
370				"Corrupt buffer allocation in IDT. Aborting.\n");
371			return -ENODEV;
372		}
373
374		if (s->left_of_salami == 0) {
375			int allocorder, allocsize;
376
377			allocsize = PAGE_SIZE;
378			allocorder = 0;
379			while (bytebufsize > allocsize) {
380				allocsize *= 2;
381				allocorder++;
382			}
383
384			s->salami = (void *) devm_get_free_pages(
385				dev,
386				GFP_KERNEL | __GFP_DMA32 | __GFP_ZERO,
387				allocorder);
388			if (!s->salami)
389				return -ENOMEM;
390
391			s->left_of_salami = allocsize;
392		}
393
394		rc = xilly_map_single(ep, s->salami,
395				      bytebufsize, s->direction,
396				      &dma_addr);
397		if (rc)
398			return rc;
399
400		iowrite32((u32) (dma_addr & 0xffffffff),
401			  ep->registers + fpga_dma_bufaddr_lowaddr_reg);
402		iowrite32(((u32) ((((u64) dma_addr) >> 32) & 0xffffffff)),
403			  ep->registers + fpga_dma_bufaddr_highaddr_reg);
404
405		if (buffers) { /* Not the message buffer */
406			this_buffer->addr = s->salami;
407			this_buffer->dma_addr = dma_addr;
408			buffers[i] = this_buffer++;
409
410			iowrite32(s->regdirection | s->nbuffer++,
411				  ep->registers + fpga_dma_bufno_reg);
412		} else {
413			ep->msgbuf_addr = s->salami;
414			ep->msgbuf_dma_addr = dma_addr;
415			ep->msg_buf_size = bytebufsize;
416
417			iowrite32(s->regdirection,
418				  ep->registers + fpga_dma_bufno_reg);
419		}
420
421		s->left_of_salami -= bytebufsize;
422		s->salami += bytebufsize;
423	}
424	return 0;
425}
426
427static int xilly_setupchannels(struct xilly_endpoint *ep,
428			       unsigned char *chandesc,
429			       int entries)
430{
431	struct device *dev = ep->dev;
432	int i, entry, rc;
433	struct xilly_channel *channel;
434	int channelnum, bufnum, bufsize, format, is_writebuf;
435	int bytebufsize;
436	int synchronous, allowpartial, exclusive_open, seekable;
437	int supports_nonempty;
438	int msg_buf_done = 0;
439
440	struct xilly_alloc_state rd_alloc = {
441		.salami = NULL,
442		.left_of_salami = 0,
443		.nbuffer = 1,
444		.direction = DMA_TO_DEVICE,
445		.regdirection = 0,
446	};
447
448	struct xilly_alloc_state wr_alloc = {
449		.salami = NULL,
450		.left_of_salami = 0,
451		.nbuffer = 1,
452		.direction = DMA_FROM_DEVICE,
453		.regdirection = 0x80000000,
454	};
455
456	channel = devm_kcalloc(dev, ep->num_channels,
457			       sizeof(struct xilly_channel), GFP_KERNEL);
458	if (!channel)
459		return -ENOMEM;
460
461	ep->channels = devm_kcalloc(dev, ep->num_channels + 1,
462				    sizeof(struct xilly_channel *),
463				    GFP_KERNEL);
464	if (!ep->channels)
465		return -ENOMEM;
466
467	ep->channels[0] = NULL; /* Channel 0 is message buf. */
468
469	/* Initialize all channels with defaults */
470
471	for (i = 1; i <= ep->num_channels; i++) {
472		channel->wr_buffers = NULL;
473		channel->rd_buffers = NULL;
474		channel->num_wr_buffers = 0;
475		channel->num_rd_buffers = 0;
476		channel->wr_fpga_buf_idx = -1;
477		channel->wr_host_buf_idx = 0;
478		channel->wr_host_buf_pos = 0;
479		channel->wr_empty = 1;
480		channel->wr_ready = 0;
481		channel->wr_sleepy = 1;
482		channel->rd_fpga_buf_idx = 0;
483		channel->rd_host_buf_idx = 0;
484		channel->rd_host_buf_pos = 0;
485		channel->rd_full = 0;
486		channel->wr_ref_count = 0;
487		channel->rd_ref_count = 0;
488
489		spin_lock_init(&channel->wr_spinlock);
490		spin_lock_init(&channel->rd_spinlock);
491		mutex_init(&channel->wr_mutex);
492		mutex_init(&channel->rd_mutex);
493		init_waitqueue_head(&channel->rd_wait);
494		init_waitqueue_head(&channel->wr_wait);
495		init_waitqueue_head(&channel->wr_ready_wait);
496
497		INIT_DELAYED_WORK(&channel->rd_workitem, xillybus_autoflush);
498
499		channel->endpoint = ep;
500		channel->chan_num = i;
501
502		channel->log2_element_size = 0;
503
504		ep->channels[i] = channel++;
505	}
506
507	for (entry = 0; entry < entries; entry++, chandesc += 4) {
508		struct xilly_buffer **buffers = NULL;
509
510		is_writebuf = chandesc[0] & 0x01;
511		channelnum = (chandesc[0] >> 1) | ((chandesc[1] & 0x0f) << 7);
512		format = (chandesc[1] >> 4) & 0x03;
513		allowpartial = (chandesc[1] >> 6) & 0x01;
514		synchronous = (chandesc[1] >> 7) & 0x01;
515		bufsize = 1 << (chandesc[2] & 0x1f);
516		bufnum = 1 << (chandesc[3] & 0x0f);
517		exclusive_open = (chandesc[2] >> 7) & 0x01;
518		seekable = (chandesc[2] >> 6) & 0x01;
519		supports_nonempty = (chandesc[2] >> 5) & 0x01;
520
521		if ((channelnum > ep->num_channels) ||
522		    ((channelnum == 0) && !is_writebuf)) {
523			dev_err(ep->dev,
524				"IDT requests channel out of range. Aborting.\n");
525			return -ENODEV;
526		}
527
528		channel = ep->channels[channelnum]; /* NULL for msg channel */
529
530		if (!is_writebuf || channelnum > 0) {
531			channel->log2_element_size = ((format > 2) ?
532						      2 : format);
533
534			bytebufsize = bufsize *
535				(1 << channel->log2_element_size);
536
537			buffers = devm_kcalloc(dev, bufnum,
538					       sizeof(struct xilly_buffer *),
539					       GFP_KERNEL);
540			if (!buffers)
541				return -ENOMEM;
542		} else {
543			bytebufsize = bufsize << 2;
544		}
545
546		if (!is_writebuf) {
547			channel->num_rd_buffers = bufnum;
548			channel->rd_buf_size = bytebufsize;
549			channel->rd_allow_partial = allowpartial;
550			channel->rd_synchronous = synchronous;
551			channel->rd_exclusive_open = exclusive_open;
552			channel->seekable = seekable;
553
554			channel->rd_buffers = buffers;
555			rc = xilly_get_dma_buffers(ep, &rd_alloc, buffers,
556						   bufnum, bytebufsize);
557		} else if (channelnum > 0) {
558			channel->num_wr_buffers = bufnum;
559			channel->wr_buf_size = bytebufsize;
560
561			channel->seekable = seekable;
562			channel->wr_supports_nonempty = supports_nonempty;
563
564			channel->wr_allow_partial = allowpartial;
565			channel->wr_synchronous = synchronous;
566			channel->wr_exclusive_open = exclusive_open;
567
568			channel->wr_buffers = buffers;
569			rc = xilly_get_dma_buffers(ep, &wr_alloc, buffers,
570						   bufnum, bytebufsize);
571		} else {
572			rc = xilly_get_dma_buffers(ep, &wr_alloc, NULL,
573						   bufnum, bytebufsize);
574			msg_buf_done++;
575		}
576
577		if (rc)
578			return -ENOMEM;
579	}
580
581	if (!msg_buf_done) {
582		dev_err(ep->dev,
583			"Corrupt IDT: No message buffer. Aborting.\n");
584		return -ENODEV;
585	}
586	return 0;
587}
588
589static int xilly_scan_idt(struct xilly_endpoint *endpoint,
590			  struct xilly_idt_handle *idt_handle)
591{
592	int count = 0;
593	unsigned char *idt = endpoint->channels[1]->wr_buffers[0]->addr;
594	unsigned char *end_of_idt = idt + endpoint->idtlen - 4;
595	unsigned char *scan;
596	int len;
597
598	scan = idt + 1;
599	idt_handle->names = scan;
600
601	while ((scan <= end_of_idt) && *scan) {
602		while ((scan <= end_of_idt) && *scan++)
603			/* Do nothing, just scan thru string */;
604		count++;
605	}
606
607	idt_handle->names_len = scan - idt_handle->names;
608
609	scan++;
610
611	if (scan > end_of_idt) {
612		dev_err(endpoint->dev,
613			"IDT device name list overflow. Aborting.\n");
614		return -ENODEV;
615	}
616	idt_handle->chandesc = scan;
617
618	len = endpoint->idtlen - (3 + ((int) (scan - idt)));
619
620	if (len & 0x03) {
621		dev_err(endpoint->dev,
622			"Corrupt IDT device name list. Aborting.\n");
623		return -ENODEV;
624	}
625
626	idt_handle->entries = len >> 2;
627	endpoint->num_channels = count;
628
629	return 0;
630}
631
632static int xilly_obtain_idt(struct xilly_endpoint *endpoint)
633{
634	struct xilly_channel *channel;
635	unsigned char *version;
636	long t;
637
638	channel = endpoint->channels[1]; /* This should be generated ad-hoc */
639
640	channel->wr_sleepy = 1;
641
642	iowrite32(1 |
643		  (3 << 24), /* Opcode 3 for channel 0 = Send IDT */
644		  endpoint->registers + fpga_buf_ctrl_reg);
645
646	t = wait_event_interruptible_timeout(channel->wr_wait,
647					     (!channel->wr_sleepy),
648					     XILLY_TIMEOUT);
649
650	if (t <= 0) {
651		dev_err(endpoint->dev, "Failed to obtain IDT. Aborting.\n");
652
653		if (endpoint->fatal_error)
654			return -EIO;
655
656		return -ENODEV;
657	}
658
659	dma_sync_single_for_cpu(channel->endpoint->dev,
660				channel->wr_buffers[0]->dma_addr,
661				channel->wr_buf_size,
662				DMA_FROM_DEVICE);
663
664	if (channel->wr_buffers[0]->end_offset != endpoint->idtlen) {
665		dev_err(endpoint->dev,
666			"IDT length mismatch (%d != %d). Aborting.\n",
667			channel->wr_buffers[0]->end_offset, endpoint->idtlen);
668		return -ENODEV;
669	}
670
671	if (crc32_le(~0, channel->wr_buffers[0]->addr,
672		     endpoint->idtlen+1) != 0) {
673		dev_err(endpoint->dev, "IDT failed CRC check. Aborting.\n");
674		return -ENODEV;
675	}
676
677	version = channel->wr_buffers[0]->addr;
678
679	/* Check version number. Reject anything above 0x82. */
680	if (*version > 0x82) {
681		dev_err(endpoint->dev,
682			"No support for IDT version 0x%02x. Maybe the xillybus driver needs an upgrade. Aborting.\n",
683			*version);
684		return -ENODEV;
685	}
686
687	return 0;
688}
689
690static ssize_t xillybus_read(struct file *filp, char __user *userbuf,
691			     size_t count, loff_t *f_pos)
692{
693	ssize_t rc;
694	unsigned long flags;
695	int bytes_done = 0;
696	int no_time_left = 0;
697	long deadline, left_to_sleep;
698	struct xilly_channel *channel = filp->private_data;
699
700	int empty, reached_eof, exhausted, ready;
701	/* Initializations are there only to silence warnings */
702
703	int howmany = 0, bufpos = 0, bufidx = 0, bufferdone = 0;
704	int waiting_bufidx;
705
706	if (channel->endpoint->fatal_error)
707		return -EIO;
708
709	deadline = jiffies + 1 + XILLY_RX_TIMEOUT;
710
711	rc = mutex_lock_interruptible(&channel->wr_mutex);
712	if (rc)
713		return rc;
714
715	while (1) { /* Note that we may drop mutex within this loop */
716		int bytes_to_do = count - bytes_done;
717
718		spin_lock_irqsave(&channel->wr_spinlock, flags);
719
720		empty = channel->wr_empty;
721		ready = !empty || channel->wr_ready;
722
723		if (!empty) {
724			bufidx = channel->wr_host_buf_idx;
725			bufpos = channel->wr_host_buf_pos;
726			howmany = ((channel->wr_buffers[bufidx]->end_offset
727				    + 1) << channel->log2_element_size)
728				- bufpos;
729
730			/* Update wr_host_* to its post-operation state */
731			if (howmany > bytes_to_do) {
732				bufferdone = 0;
733
734				howmany = bytes_to_do;
735				channel->wr_host_buf_pos += howmany;
736			} else {
737				bufferdone = 1;
738
739				channel->wr_host_buf_pos = 0;
740
741				if (bufidx == channel->wr_fpga_buf_idx) {
742					channel->wr_empty = 1;
743					channel->wr_sleepy = 1;
744					channel->wr_ready = 0;
745				}
746
747				if (bufidx >= (channel->num_wr_buffers - 1))
748					channel->wr_host_buf_idx = 0;
749				else
750					channel->wr_host_buf_idx++;
751			}
752		}
753
754		/*
755		 * Marking our situation after the possible changes above,
756		 * for use after releasing the spinlock.
757		 *
758		 * empty = empty before change
759		 * exhasted = empty after possible change
760		 */
761
762		reached_eof = channel->wr_empty &&
763			(channel->wr_host_buf_idx == channel->wr_eof);
764		channel->wr_hangup = reached_eof;
765		exhausted = channel->wr_empty;
766		waiting_bufidx = channel->wr_host_buf_idx;
767
768		spin_unlock_irqrestore(&channel->wr_spinlock, flags);
769
770		if (!empty) { /* Go on, now without the spinlock */
771
772			if (bufpos == 0) /* Position zero means it's virgin */
773				dma_sync_single_for_cpu(channel->endpoint->dev,
774							channel->wr_buffers[bufidx]->dma_addr,
775							channel->wr_buf_size,
776							DMA_FROM_DEVICE);
777
778			if (copy_to_user(
779				    userbuf,
780				    channel->wr_buffers[bufidx]->addr
781				    + bufpos, howmany))
782				rc = -EFAULT;
783
784			userbuf += howmany;
785			bytes_done += howmany;
786
787			if (bufferdone) {
788				dma_sync_single_for_device(channel->endpoint->dev,
789							   channel->wr_buffers[bufidx]->dma_addr,
790							   channel->wr_buf_size,
791							   DMA_FROM_DEVICE);
792
793				/*
794				 * Tell FPGA the buffer is done with. It's an
795				 * atomic operation to the FPGA, so what
796				 * happens with other channels doesn't matter,
797				 * and the certain channel is protected with
798				 * the channel-specific mutex.
799				 */
800
801				iowrite32(1 | (channel->chan_num << 1) |
802					  (bufidx << 12),
803					  channel->endpoint->registers +
804					  fpga_buf_ctrl_reg);
805			}
806
807			if (rc) {
808				mutex_unlock(&channel->wr_mutex);
809				return rc;
810			}
811		}
812
813		/* This includes a zero-count return = EOF */
814		if ((bytes_done >= count) || reached_eof)
815			break;
816
817		if (!exhausted)
818			continue; /* More in RAM buffer(s)? Just go on. */
819
820		if ((bytes_done > 0) &&
821		    (no_time_left ||
822		     (channel->wr_synchronous && channel->wr_allow_partial)))
823			break;
824
825		/*
826		 * Nonblocking read: The "ready" flag tells us that the FPGA
827		 * has data to send. In non-blocking mode, if it isn't on,
828		 * just return. But if there is, we jump directly to the point
829		 * where we ask for the FPGA to send all it has, and wait
830		 * until that data arrives. So in a sense, we *do* block in
831		 * nonblocking mode, but only for a very short time.
832		 */
833
834		if (!no_time_left && (filp->f_flags & O_NONBLOCK)) {
835			if (bytes_done > 0)
836				break;
837
838			if (ready)
839				goto desperate;
840
841			rc = -EAGAIN;
842			break;
843		}
844
845		if (!no_time_left || (bytes_done > 0)) {
846			/*
847			 * Note that in case of an element-misaligned read
848			 * request, offsetlimit will include the last element,
849			 * which will be partially read from.
850			 */
851			int offsetlimit = ((count - bytes_done) - 1) >>
852				channel->log2_element_size;
853			int buf_elements = channel->wr_buf_size >>
854				channel->log2_element_size;
855
856			/*
857			 * In synchronous mode, always send an offset limit.
858			 * Just don't send a value too big.
859			 */
860
861			if (channel->wr_synchronous) {
862				/* Don't request more than one buffer */
863				if (channel->wr_allow_partial &&
864				    (offsetlimit >= buf_elements))
865					offsetlimit = buf_elements - 1;
866
867				/* Don't request more than all buffers */
868				if (!channel->wr_allow_partial &&
869				    (offsetlimit >=
870				     (buf_elements * channel->num_wr_buffers)))
871					offsetlimit = buf_elements *
872						channel->num_wr_buffers - 1;
873			}
874
875			/*
876			 * In asynchronous mode, force early flush of a buffer
877			 * only if that will allow returning a full count. The
878			 * "offsetlimit < ( ... )" rather than "<=" excludes
879			 * requesting a full buffer, which would obviously
880			 * cause a buffer transmission anyhow
881			 */
882
883			if (channel->wr_synchronous ||
884			    (offsetlimit < (buf_elements - 1))) {
885				mutex_lock(&channel->endpoint->register_mutex);
886
887				iowrite32(offsetlimit,
888					  channel->endpoint->registers +
889					  fpga_buf_offset_reg);
890
891				iowrite32(1 | (channel->chan_num << 1) |
892					  (2 << 24) |  /* 2 = offset limit */
893					  (waiting_bufidx << 12),
894					  channel->endpoint->registers +
895					  fpga_buf_ctrl_reg);
896
897				mutex_unlock(&channel->endpoint->
898					     register_mutex);
899			}
900		}
901
902		/*
903		 * If partial completion is disallowed, there is no point in
904		 * timeout sleeping. Neither if no_time_left is set and
905		 * there's no data.
906		 */
907
908		if (!channel->wr_allow_partial ||
909		    (no_time_left && (bytes_done == 0))) {
910			/*
911			 * This do-loop will run more than once if another
912			 * thread reasserted wr_sleepy before we got the mutex
913			 * back, so we try again.
914			 */
915
916			do {
917				mutex_unlock(&channel->wr_mutex);
918
919				if (wait_event_interruptible(
920					    channel->wr_wait,
921					    (!channel->wr_sleepy)))
922					goto interrupted;
923
924				if (mutex_lock_interruptible(
925					    &channel->wr_mutex))
926					goto interrupted;
927			} while (channel->wr_sleepy);
928
929			continue;
930
931interrupted: /* Mutex is not held if got here */
932			if (channel->endpoint->fatal_error)
933				return -EIO;
934			if (bytes_done)
935				return bytes_done;
936			if (filp->f_flags & O_NONBLOCK)
937				return -EAGAIN; /* Don't admit snoozing */
938			return -EINTR;
939		}
940
941		left_to_sleep = deadline - ((long) jiffies);
942
943		/*
944		 * If our time is out, skip the waiting. We may miss wr_sleepy
945		 * being deasserted but hey, almost missing the train is like
946		 * missing it.
947		 */
948
949		if (left_to_sleep > 0) {
950			left_to_sleep =
951				wait_event_interruptible_timeout(
952					channel->wr_wait,
953					(!channel->wr_sleepy),
954					left_to_sleep);
955
956			if (left_to_sleep > 0) /* wr_sleepy deasserted */
957				continue;
958
959			if (left_to_sleep < 0) { /* Interrupt */
960				mutex_unlock(&channel->wr_mutex);
961				if (channel->endpoint->fatal_error)
962					return -EIO;
963				if (bytes_done)
964					return bytes_done;
965				return -EINTR;
966			}
967		}
968
969desperate:
970		no_time_left = 1; /* We're out of sleeping time. Desperate! */
971
972		if (bytes_done == 0) {
973			/*
974			 * Reaching here means that we allow partial return,
975			 * that we've run out of time, and that we have
976			 * nothing to return.
977			 * So tell the FPGA to send anything it has or gets.
978			 */
979
980			iowrite32(1 | (channel->chan_num << 1) |
981				  (3 << 24) |  /* Opcode 3, flush it all! */
982				  (waiting_bufidx << 12),
983				  channel->endpoint->registers +
984				  fpga_buf_ctrl_reg);
985		}
986
987		/*
988		 * Reaching here means that we *do* have data in the buffer,
989		 * but the "partial" flag disallows returning less than
990		 * required. And we don't have as much. So loop again,
991		 * which is likely to end up blocking indefinitely until
992		 * enough data has arrived.
993		 */
994	}
995
996	mutex_unlock(&channel->wr_mutex);
997
998	if (channel->endpoint->fatal_error)
999		return -EIO;
1000
1001	if (rc)
1002		return rc;
1003
1004	return bytes_done;
1005}
1006
1007/*
1008 * The timeout argument takes values as follows:
1009 *  >0 : Flush with timeout
1010 * ==0 : Flush, and wait idefinitely for the flush to complete
1011 *  <0 : Autoflush: Flush only if there's a single buffer occupied
1012 */
1013
1014static int xillybus_myflush(struct xilly_channel *channel, long timeout)
1015{
1016	int rc;
1017	unsigned long flags;
1018
1019	int end_offset_plus1;
1020	int bufidx, bufidx_minus1;
1021	int i;
1022	int empty;
1023	int new_rd_host_buf_pos;
1024
1025	if (channel->endpoint->fatal_error)
1026		return -EIO;
1027	rc = mutex_lock_interruptible(&channel->rd_mutex);
1028	if (rc)
1029		return rc;
1030
1031	/*
1032	 * Don't flush a closed channel. This can happen when the work queued
1033	 * autoflush thread fires off after the file has closed. This is not
1034	 * an error, just something to dismiss.
1035	 */
1036
1037	if (!channel->rd_ref_count)
1038		goto done;
1039
1040	bufidx = channel->rd_host_buf_idx;
1041
1042	bufidx_minus1 = (bufidx == 0) ?
1043		channel->num_rd_buffers - 1 :
1044		bufidx - 1;
1045
1046	end_offset_plus1 = channel->rd_host_buf_pos >>
1047		channel->log2_element_size;
1048
1049	new_rd_host_buf_pos = channel->rd_host_buf_pos -
1050		(end_offset_plus1 << channel->log2_element_size);
1051
1052	/* Submit the current buffer if it's nonempty */
1053	if (end_offset_plus1) {
1054		unsigned char *tail = channel->rd_buffers[bufidx]->addr +
1055			(end_offset_plus1 << channel->log2_element_size);
1056
1057		/* Copy  unflushed data, so we can put it in next buffer */
1058		for (i = 0; i < new_rd_host_buf_pos; i++)
1059			channel->rd_leftovers[i] = *tail++;
1060
1061		spin_lock_irqsave(&channel->rd_spinlock, flags);
1062
1063		/* Autoflush only if a single buffer is occupied */
1064
1065		if ((timeout < 0) &&
1066		    (channel->rd_full ||
1067		     (bufidx_minus1 != channel->rd_fpga_buf_idx))) {
1068			spin_unlock_irqrestore(&channel->rd_spinlock, flags);
1069			/*
1070			 * A new work item may be queued by the ISR exactly
1071			 * now, since the execution of a work item allows the
1072			 * queuing of a new one while it's running.
1073			 */
1074			goto done;
1075		}
1076
1077		/* The 4th element is never needed for data, so it's a flag */
1078		channel->rd_leftovers[3] = (new_rd_host_buf_pos != 0);
1079
1080		/* Set up rd_full to reflect a certain moment's state */
1081
1082		if (bufidx == channel->rd_fpga_buf_idx)
1083			channel->rd_full = 1;
1084		spin_unlock_irqrestore(&channel->rd_spinlock, flags);
1085
1086		if (bufidx >= (channel->num_rd_buffers - 1))
1087			channel->rd_host_buf_idx = 0;
1088		else
1089			channel->rd_host_buf_idx++;
1090
1091		dma_sync_single_for_device(channel->endpoint->dev,
1092					   channel->rd_buffers[bufidx]->dma_addr,
1093					   channel->rd_buf_size,
1094					   DMA_TO_DEVICE);
1095
1096		mutex_lock(&channel->endpoint->register_mutex);
1097
1098		iowrite32(end_offset_plus1 - 1,
1099			  channel->endpoint->registers + fpga_buf_offset_reg);
1100
1101		iowrite32((channel->chan_num << 1) | /* Channel ID */
1102			  (2 << 24) |  /* Opcode 2, submit buffer */
1103			  (bufidx << 12),
1104			  channel->endpoint->registers + fpga_buf_ctrl_reg);
1105
1106		mutex_unlock(&channel->endpoint->register_mutex);
1107	} else if (bufidx == 0) {
1108		bufidx = channel->num_rd_buffers - 1;
1109	} else {
1110		bufidx--;
1111	}
1112
1113	channel->rd_host_buf_pos = new_rd_host_buf_pos;
1114
1115	if (timeout < 0)
1116		goto done; /* Autoflush */
1117
1118	/*
1119	 * bufidx is now the last buffer written to (or equal to
1120	 * rd_fpga_buf_idx if buffer was never written to), and
1121	 * channel->rd_host_buf_idx the one after it.
1122	 *
1123	 * If bufidx == channel->rd_fpga_buf_idx we're either empty or full.
1124	 */
1125
1126	while (1) { /* Loop waiting for draining of buffers */
1127		spin_lock_irqsave(&channel->rd_spinlock, flags);
1128
1129		if (bufidx != channel->rd_fpga_buf_idx)
1130			channel->rd_full = 1; /*
1131					       * Not really full,
1132					       * but needs waiting.
1133					       */
1134
1135		empty = !channel->rd_full;
1136
1137		spin_unlock_irqrestore(&channel->rd_spinlock, flags);
1138
1139		if (empty)
1140			break;
1141
1142		/*
1143		 * Indefinite sleep with mutex taken. With data waiting for
1144		 * flushing user should not be surprised if open() for write
1145		 * sleeps.
1146		 */
1147		if (timeout == 0)
1148			wait_event_interruptible(channel->rd_wait,
1149						 (!channel->rd_full));
1150
1151		else if (wait_event_interruptible_timeout(
1152				 channel->rd_wait,
1153				 (!channel->rd_full),
1154				 timeout) == 0) {
1155			dev_warn(channel->endpoint->dev,
1156				 "Timed out while flushing. Output data may be lost.\n");
1157
1158			rc = -ETIMEDOUT;
1159			break;
1160		}
1161
1162		if (channel->rd_full) {
1163			rc = -EINTR;
1164			break;
1165		}
1166	}
1167
1168done:
1169	mutex_unlock(&channel->rd_mutex);
1170
1171	if (channel->endpoint->fatal_error)
1172		return -EIO;
1173
1174	return rc;
1175}
1176
1177static int xillybus_flush(struct file *filp, fl_owner_t id)
1178{
1179	if (!(filp->f_mode & FMODE_WRITE))
1180		return 0;
1181
1182	return xillybus_myflush(filp->private_data, HZ); /* 1 second timeout */
1183}
1184
1185static void xillybus_autoflush(struct work_struct *work)
1186{
1187	struct delayed_work *workitem = container_of(
1188		work, struct delayed_work, work);
1189	struct xilly_channel *channel = container_of(
1190		workitem, struct xilly_channel, rd_workitem);
1191	int rc;
1192
1193	rc = xillybus_myflush(channel, -1);
1194	if (rc == -EINTR)
1195		dev_warn(channel->endpoint->dev,
1196			 "Autoflush failed because work queue thread got a signal.\n");
1197	else if (rc)
1198		dev_err(channel->endpoint->dev,
1199			"Autoflush failed under weird circumstances.\n");
1200}
1201
1202static ssize_t xillybus_write(struct file *filp, const char __user *userbuf,
1203			      size_t count, loff_t *f_pos)
1204{
1205	ssize_t rc;
1206	unsigned long flags;
1207	int bytes_done = 0;
1208	struct xilly_channel *channel = filp->private_data;
1209
1210	int full, exhausted;
1211	/* Initializations are there only to silence warnings */
1212
1213	int howmany = 0, bufpos = 0, bufidx = 0, bufferdone = 0;
1214	int end_offset_plus1 = 0;
1215
1216	if (channel->endpoint->fatal_error)
1217		return -EIO;
1218
1219	rc = mutex_lock_interruptible(&channel->rd_mutex);
1220	if (rc)
1221		return rc;
1222
1223	while (1) {
1224		int bytes_to_do = count - bytes_done;
1225
1226		spin_lock_irqsave(&channel->rd_spinlock, flags);
1227
1228		full = channel->rd_full;
1229
1230		if (!full) {
1231			bufidx = channel->rd_host_buf_idx;
1232			bufpos = channel->rd_host_buf_pos;
1233			howmany = channel->rd_buf_size - bufpos;
1234
1235			/*
1236			 * Update rd_host_* to its state after this operation.
1237			 * count=0 means committing the buffer immediately,
1238			 * which is like flushing, but not necessarily block.
1239			 */
1240
1241			if ((howmany > bytes_to_do) &&
1242			    (count ||
1243			     ((bufpos >> channel->log2_element_size) == 0))) {
1244				bufferdone = 0;
1245
1246				howmany = bytes_to_do;
1247				channel->rd_host_buf_pos += howmany;
1248			} else {
1249				bufferdone = 1;
1250
1251				if (count) {
1252					end_offset_plus1 =
1253						channel->rd_buf_size >>
1254						channel->log2_element_size;
1255					channel->rd_host_buf_pos = 0;
1256				} else {
1257					unsigned char *tail;
1258					int i;
1259
1260					howmany = 0;
1261
1262					end_offset_plus1 = bufpos >>
1263						channel->log2_element_size;
1264
1265					channel->rd_host_buf_pos -=
1266						end_offset_plus1 <<
1267						channel->log2_element_size;
1268
1269					tail = channel->
1270						rd_buffers[bufidx]->addr +
1271						(end_offset_plus1 <<
1272						 channel->log2_element_size);
1273
1274					for (i = 0;
1275					     i < channel->rd_host_buf_pos;
1276					     i++)
1277						channel->rd_leftovers[i] =
1278							*tail++;
1279				}
1280
1281				if (bufidx == channel->rd_fpga_buf_idx)
1282					channel->rd_full = 1;
1283
1284				if (bufidx >= (channel->num_rd_buffers - 1))
1285					channel->rd_host_buf_idx = 0;
1286				else
1287					channel->rd_host_buf_idx++;
1288			}
1289		}
1290
1291		/*
1292		 * Marking our situation after the possible changes above,
1293		 * for use  after releasing the spinlock.
1294		 *
1295		 * full = full before change
1296		 * exhasted = full after possible change
1297		 */
1298
1299		exhausted = channel->rd_full;
1300
1301		spin_unlock_irqrestore(&channel->rd_spinlock, flags);
1302
1303		if (!full) { /* Go on, now without the spinlock */
1304			unsigned char *head =
1305				channel->rd_buffers[bufidx]->addr;
1306			int i;
1307
1308			if ((bufpos == 0) || /* Zero means it's virgin */
1309			    (channel->rd_leftovers[3] != 0)) {
1310				dma_sync_single_for_cpu(channel->endpoint->dev,
1311							channel->rd_buffers[bufidx]->dma_addr,
1312							channel->rd_buf_size,
1313							DMA_TO_DEVICE);
1314
1315				/* Virgin, but leftovers are due */
1316				for (i = 0; i < bufpos; i++)
1317					*head++ = channel->rd_leftovers[i];
1318
1319				channel->rd_leftovers[3] = 0; /* Clear flag */
1320			}
1321
1322			if (copy_from_user(
1323				    channel->rd_buffers[bufidx]->addr + bufpos,
1324				    userbuf, howmany))
1325				rc = -EFAULT;
1326
1327			userbuf += howmany;
1328			bytes_done += howmany;
1329
1330			if (bufferdone) {
1331				dma_sync_single_for_device(channel->endpoint->dev,
1332							   channel->rd_buffers[bufidx]->dma_addr,
1333							   channel->rd_buf_size,
1334							   DMA_TO_DEVICE);
1335
1336				mutex_lock(&channel->endpoint->register_mutex);
1337
1338				iowrite32(end_offset_plus1 - 1,
1339					  channel->endpoint->registers +
1340					  fpga_buf_offset_reg);
1341
1342				iowrite32((channel->chan_num << 1) |
1343					  (2 << 24) |  /* 2 = submit buffer */
1344					  (bufidx << 12),
1345					  channel->endpoint->registers +
1346					  fpga_buf_ctrl_reg);
1347
1348				mutex_unlock(&channel->endpoint->
1349					     register_mutex);
1350
1351				channel->rd_leftovers[3] =
1352					(channel->rd_host_buf_pos != 0);
1353			}
1354
1355			if (rc) {
1356				mutex_unlock(&channel->rd_mutex);
1357
1358				if (channel->endpoint->fatal_error)
1359					return -EIO;
1360
1361				if (!channel->rd_synchronous)
1362					queue_delayed_work(
1363						xillybus_wq,
1364						&channel->rd_workitem,
1365						XILLY_RX_TIMEOUT);
1366
1367				return rc;
1368			}
1369		}
1370
1371		if (bytes_done >= count)
1372			break;
1373
1374		if (!exhausted)
1375			continue; /* If there's more space, just go on */
1376
1377		if ((bytes_done > 0) && channel->rd_allow_partial)
1378			break;
1379
1380		/*
1381		 * Indefinite sleep with mutex taken. With data waiting for
1382		 * flushing, user should not be surprised if open() for write
1383		 * sleeps.
1384		 */
1385
1386		if (filp->f_flags & O_NONBLOCK) {
1387			rc = -EAGAIN;
1388			break;
1389		}
1390
1391		if (wait_event_interruptible(channel->rd_wait,
1392					     (!channel->rd_full))) {
1393			mutex_unlock(&channel->rd_mutex);
1394
1395			if (channel->endpoint->fatal_error)
1396				return -EIO;
1397
1398			if (bytes_done)
1399				return bytes_done;
1400			return -EINTR;
1401		}
1402	}
1403
1404	mutex_unlock(&channel->rd_mutex);
1405
1406	if (!channel->rd_synchronous)
1407		queue_delayed_work(xillybus_wq,
1408				   &channel->rd_workitem,
1409				   XILLY_RX_TIMEOUT);
1410
1411	if (channel->endpoint->fatal_error)
1412		return -EIO;
1413
1414	if (rc)
1415		return rc;
1416
1417	if ((channel->rd_synchronous) && (bytes_done > 0)) {
1418		rc = xillybus_myflush(filp->private_data, 0); /* No timeout */
1419
1420		if (rc && (rc != -EINTR))
1421			return rc;
1422	}
1423
1424	return bytes_done;
1425}
1426
1427static int xillybus_open(struct inode *inode, struct file *filp)
1428{
1429	int rc;
1430	unsigned long flags;
1431	struct xilly_endpoint *endpoint;
1432	struct xilly_channel *channel;
1433	int index;
1434
1435	rc = xillybus_find_inode(inode, (void **)&endpoint, &index);
1436	if (rc)
1437		return rc;
1438
1439	if (endpoint->fatal_error)
1440		return -EIO;
1441
1442	channel = endpoint->channels[1 + index];
1443	filp->private_data = channel;
1444
1445	/*
1446	 * It gets complicated because:
1447	 * 1. We don't want to take a mutex we don't have to
1448	 * 2. We don't want to open one direction if the other will fail.
1449	 */
1450
1451	if ((filp->f_mode & FMODE_READ) && (!channel->num_wr_buffers))
1452		return -ENODEV;
1453
1454	if ((filp->f_mode & FMODE_WRITE) && (!channel->num_rd_buffers))
1455		return -ENODEV;
1456
1457	if ((filp->f_mode & FMODE_READ) && (filp->f_flags & O_NONBLOCK) &&
1458	    (channel->wr_synchronous || !channel->wr_allow_partial ||
1459	     !channel->wr_supports_nonempty)) {
1460		dev_err(endpoint->dev,
1461			"open() failed: O_NONBLOCK not allowed for read on this device\n");
1462		return -ENODEV;
1463	}
1464
1465	if ((filp->f_mode & FMODE_WRITE) && (filp->f_flags & O_NONBLOCK) &&
1466	    (channel->rd_synchronous || !channel->rd_allow_partial)) {
1467		dev_err(endpoint->dev,
1468			"open() failed: O_NONBLOCK not allowed for write on this device\n");
1469		return -ENODEV;
1470	}
1471
1472	/*
1473	 * Note: open() may block on getting mutexes despite O_NONBLOCK.
1474	 * This shouldn't occur normally, since multiple open of the same
1475	 * file descriptor is almost always prohibited anyhow
1476	 * (*_exclusive_open is normally set in real-life systems).
1477	 */
1478
1479	if (filp->f_mode & FMODE_READ) {
1480		rc = mutex_lock_interruptible(&channel->wr_mutex);
1481		if (rc)
1482			return rc;
1483	}
1484
1485	if (filp->f_mode & FMODE_WRITE) {
1486		rc = mutex_lock_interruptible(&channel->rd_mutex);
1487		if (rc)
1488			goto unlock_wr;
1489	}
1490
1491	if ((filp->f_mode & FMODE_READ) &&
1492	    (channel->wr_ref_count != 0) &&
1493	    (channel->wr_exclusive_open)) {
1494		rc = -EBUSY;
1495		goto unlock;
1496	}
1497
1498	if ((filp->f_mode & FMODE_WRITE) &&
1499	    (channel->rd_ref_count != 0) &&
1500	    (channel->rd_exclusive_open)) {
1501		rc = -EBUSY;
1502		goto unlock;
1503	}
1504
1505	if (filp->f_mode & FMODE_READ) {
1506		if (channel->wr_ref_count == 0) { /* First open of file */
1507			/* Move the host to first buffer */
1508			spin_lock_irqsave(&channel->wr_spinlock, flags);
1509			channel->wr_host_buf_idx = 0;
1510			channel->wr_host_buf_pos = 0;
1511			channel->wr_fpga_buf_idx = -1;
1512			channel->wr_empty = 1;
1513			channel->wr_ready = 0;
1514			channel->wr_sleepy = 1;
1515			channel->wr_eof = -1;
1516			channel->wr_hangup = 0;
1517
1518			spin_unlock_irqrestore(&channel->wr_spinlock, flags);
1519
1520			iowrite32(1 | (channel->chan_num << 1) |
1521				  (4 << 24) |  /* Opcode 4, open channel */
1522				  ((channel->wr_synchronous & 1) << 23),
1523				  channel->endpoint->registers +
1524				  fpga_buf_ctrl_reg);
1525		}
1526
1527		channel->wr_ref_count++;
1528	}
1529
1530	if (filp->f_mode & FMODE_WRITE) {
1531		if (channel->rd_ref_count == 0) { /* First open of file */
1532			/* Move the host to first buffer */
1533			spin_lock_irqsave(&channel->rd_spinlock, flags);
1534			channel->rd_host_buf_idx = 0;
1535			channel->rd_host_buf_pos = 0;
1536			channel->rd_leftovers[3] = 0; /* No leftovers. */
1537			channel->rd_fpga_buf_idx = channel->num_rd_buffers - 1;
1538			channel->rd_full = 0;
1539
1540			spin_unlock_irqrestore(&channel->rd_spinlock, flags);
1541
1542			iowrite32((channel->chan_num << 1) |
1543				  (4 << 24),   /* Opcode 4, open channel */
1544				  channel->endpoint->registers +
1545				  fpga_buf_ctrl_reg);
1546		}
1547
1548		channel->rd_ref_count++;
1549	}
1550
1551unlock:
1552	if (filp->f_mode & FMODE_WRITE)
1553		mutex_unlock(&channel->rd_mutex);
1554unlock_wr:
1555	if (filp->f_mode & FMODE_READ)
1556		mutex_unlock(&channel->wr_mutex);
1557
1558	if (!rc && (!channel->seekable))
1559		return nonseekable_open(inode, filp);
1560
1561	return rc;
1562}
1563
1564static int xillybus_release(struct inode *inode, struct file *filp)
1565{
1566	unsigned long flags;
1567	struct xilly_channel *channel = filp->private_data;
1568
1569	int buf_idx;
1570	int eof;
1571
1572	if (channel->endpoint->fatal_error)
1573		return -EIO;
1574
1575	if (filp->f_mode & FMODE_WRITE) {
1576		mutex_lock(&channel->rd_mutex);
1577
1578		channel->rd_ref_count--;
1579
1580		if (channel->rd_ref_count == 0) {
1581			/*
1582			 * We rely on the kernel calling flush()
1583			 * before we get here.
1584			 */
1585
1586			iowrite32((channel->chan_num << 1) | /* Channel ID */
1587				  (5 << 24),  /* Opcode 5, close channel */
1588				  channel->endpoint->registers +
1589				  fpga_buf_ctrl_reg);
1590		}
1591		mutex_unlock(&channel->rd_mutex);
1592	}
1593
1594	if (filp->f_mode & FMODE_READ) {
1595		mutex_lock(&channel->wr_mutex);
1596
1597		channel->wr_ref_count--;
1598
1599		if (channel->wr_ref_count == 0) {
1600			iowrite32(1 | (channel->chan_num << 1) |
1601				  (5 << 24),  /* Opcode 5, close channel */
1602				  channel->endpoint->registers +
1603				  fpga_buf_ctrl_reg);
1604
1605			/*
1606			 * This is crazily cautious: We make sure that not
1607			 * only that we got an EOF (be it because we closed
1608			 * the channel or because of a user's EOF), but verify
1609			 * that it's one beyond the last buffer arrived, so
1610			 * we have no leftover buffers pending before wrapping
1611			 * up (which can only happen in asynchronous channels,
1612			 * BTW)
1613			 */
1614
1615			while (1) {
1616				spin_lock_irqsave(&channel->wr_spinlock,
1617						  flags);
1618				buf_idx = channel->wr_fpga_buf_idx;
1619				eof = channel->wr_eof;
1620				channel->wr_sleepy = 1;
1621				spin_unlock_irqrestore(&channel->wr_spinlock,
1622						       flags);
1623
1624				/*
1625				 * Check if eof points at the buffer after
1626				 * the last one the FPGA submitted. Note that
1627				 * no EOF is marked by negative eof.
1628				 */
1629
1630				buf_idx++;
1631				if (buf_idx == channel->num_wr_buffers)
1632					buf_idx = 0;
1633
1634				if (buf_idx == eof)
1635					break;
1636
1637				/*
1638				 * Steal extra 100 ms if awaken by interrupt.
1639				 * This is a simple workaround for an
1640				 * interrupt pending when entering, which would
1641				 * otherwise result in declaring the hardware
1642				 * non-responsive.
1643				 */
1644
1645				if (wait_event_interruptible(
1646					    channel->wr_wait,
1647					    (!channel->wr_sleepy)))
1648					msleep(100);
1649
1650				if (channel->wr_sleepy) {
1651					mutex_unlock(&channel->wr_mutex);
1652					dev_warn(channel->endpoint->dev,
1653						 "Hardware failed to respond to close command, therefore left in messy state.\n");
1654					return -EINTR;
1655				}
1656			}
1657		}
1658
1659		mutex_unlock(&channel->wr_mutex);
1660	}
1661
1662	return 0;
1663}
1664
1665static loff_t xillybus_llseek(struct file *filp, loff_t offset, int whence)
1666{
1667	struct xilly_channel *channel = filp->private_data;
1668	loff_t pos = filp->f_pos;
1669	int rc = 0;
1670
1671	/*
1672	 * Take both mutexes not allowing interrupts, since it seems like
1673	 * common applications don't expect an -EINTR here. Besides, multiple
1674	 * access to a single file descriptor on seekable devices is a mess
1675	 * anyhow.
1676	 */
1677
1678	if (channel->endpoint->fatal_error)
1679		return -EIO;
1680
1681	mutex_lock(&channel->wr_mutex);
1682	mutex_lock(&channel->rd_mutex);
1683
1684	switch (whence) {
1685	case SEEK_SET:
1686		pos = offset;
1687		break;
1688	case SEEK_CUR:
1689		pos += offset;
1690		break;
1691	case SEEK_END:
1692		pos = offset; /* Going to the end => to the beginning */
1693		break;
1694	default:
1695		rc = -EINVAL;
1696		goto end;
1697	}
1698
1699	/* In any case, we must finish on an element boundary */
1700	if (pos & ((1 << channel->log2_element_size) - 1)) {
1701		rc = -EINVAL;
1702		goto end;
1703	}
1704
1705	mutex_lock(&channel->endpoint->register_mutex);
1706
1707	iowrite32(pos >> channel->log2_element_size,
1708		  channel->endpoint->registers + fpga_buf_offset_reg);
1709
1710	iowrite32((channel->chan_num << 1) |
1711		  (6 << 24),  /* Opcode 6, set address */
1712		  channel->endpoint->registers + fpga_buf_ctrl_reg);
1713
1714	mutex_unlock(&channel->endpoint->register_mutex);
1715
1716end:
1717	mutex_unlock(&channel->rd_mutex);
1718	mutex_unlock(&channel->wr_mutex);
1719
1720	if (rc) /* Return error after releasing mutexes */
1721		return rc;
1722
1723	filp->f_pos = pos;
1724
1725	/*
1726	 * Since seekable devices are allowed only when the channel is
1727	 * synchronous, we assume that there is no data pending in either
1728	 * direction (which holds true as long as no concurrent access on the
1729	 * file descriptor takes place).
1730	 * The only thing we may need to throw away is leftovers from partial
1731	 * write() flush.
1732	 */
1733
1734	channel->rd_leftovers[3] = 0;
1735
1736	return pos;
1737}
1738
1739static __poll_t xillybus_poll(struct file *filp, poll_table *wait)
1740{
1741	struct xilly_channel *channel = filp->private_data;
1742	__poll_t mask = 0;
1743	unsigned long flags;
1744
1745	poll_wait(filp, &channel->endpoint->ep_wait, wait);
1746
1747	/*
1748	 * poll() won't play ball regarding read() channels which
1749	 * aren't asynchronous and support the nonempty message. Allowing
1750	 * that will create situations where data has been delivered at
1751	 * the FPGA, and users expecting select() to wake up, which it may
1752	 * not.
1753	 */
1754
1755	if (!channel->wr_synchronous && channel->wr_supports_nonempty) {
1756		poll_wait(filp, &channel->wr_wait, wait);
1757		poll_wait(filp, &channel->wr_ready_wait, wait);
1758
1759		spin_lock_irqsave(&channel->wr_spinlock, flags);
1760		if (!channel->wr_empty || channel->wr_ready)
1761			mask |= EPOLLIN | EPOLLRDNORM;
1762
1763		if (channel->wr_hangup)
1764			/*
1765			 * Not EPOLLHUP, because its behavior is in the
1766			 * mist, and EPOLLIN does what we want: Wake up
1767			 * the read file descriptor so it sees EOF.
1768			 */
1769			mask |=  EPOLLIN | EPOLLRDNORM;
1770		spin_unlock_irqrestore(&channel->wr_spinlock, flags);
1771	}
1772
1773	/*
1774	 * If partial data write is disallowed on a write() channel,
1775	 * it's pointless to ever signal OK to write, because is could
1776	 * block despite some space being available.
1777	 */
1778
1779	if (channel->rd_allow_partial) {
1780		poll_wait(filp, &channel->rd_wait, wait);
1781
1782		spin_lock_irqsave(&channel->rd_spinlock, flags);
1783		if (!channel->rd_full)
1784			mask |= EPOLLOUT | EPOLLWRNORM;
1785		spin_unlock_irqrestore(&channel->rd_spinlock, flags);
1786	}
1787
1788	if (channel->endpoint->fatal_error)
1789		mask |= EPOLLERR;
1790
1791	return mask;
1792}
1793
1794static const struct file_operations xillybus_fops = {
1795	.owner      = THIS_MODULE,
1796	.read       = xillybus_read,
1797	.write      = xillybus_write,
1798	.open       = xillybus_open,
1799	.flush      = xillybus_flush,
1800	.release    = xillybus_release,
1801	.llseek     = xillybus_llseek,
1802	.poll       = xillybus_poll,
1803};
1804
1805struct xilly_endpoint *xillybus_init_endpoint(struct device *dev)
1806{
1807	struct xilly_endpoint *endpoint;
1808
1809	endpoint = devm_kzalloc(dev, sizeof(*endpoint), GFP_KERNEL);
1810	if (!endpoint)
1811		return NULL;
1812
1813	endpoint->dev = dev;
1814	endpoint->msg_counter = 0x0b;
1815	endpoint->failed_messages = 0;
1816	endpoint->fatal_error = 0;
1817
1818	init_waitqueue_head(&endpoint->ep_wait);
1819	mutex_init(&endpoint->register_mutex);
1820
1821	return endpoint;
1822}
1823EXPORT_SYMBOL(xillybus_init_endpoint);
1824
1825static int xilly_quiesce(struct xilly_endpoint *endpoint)
1826{
1827	long t;
1828
1829	endpoint->idtlen = -1;
1830
1831	iowrite32((u32) (endpoint->dma_using_dac & 0x0001),
1832		  endpoint->registers + fpga_dma_control_reg);
1833
1834	t = wait_event_interruptible_timeout(endpoint->ep_wait,
1835					     (endpoint->idtlen >= 0),
1836					     XILLY_TIMEOUT);
1837	if (t <= 0) {
1838		dev_err(endpoint->dev,
1839			"Failed to quiesce the device on exit.\n");
1840		return -ENODEV;
1841	}
1842	return 0;
1843}
1844
1845int xillybus_endpoint_discovery(struct xilly_endpoint *endpoint)
1846{
1847	int rc;
1848	long t;
1849
1850	void *bootstrap_resources;
1851	int idtbuffersize = (1 << PAGE_SHIFT);
1852	struct device *dev = endpoint->dev;
1853
1854	/*
1855	 * The bogus IDT is used during bootstrap for allocating the initial
1856	 * message buffer, and then the message buffer and space for the IDT
1857	 * itself. The initial message buffer is of a single page's size, but
1858	 * it's soon replaced with a more modest one (and memory is freed).
1859	 */
1860
1861	unsigned char bogus_idt[8] = { 1, 224, (PAGE_SHIFT)-2, 0,
1862				       3, 192, PAGE_SHIFT, 0 };
1863	struct xilly_idt_handle idt_handle;
1864
1865	/*
1866	 * Writing the value 0x00000001 to Endianness register signals which
1867	 * endianness this processor is using, so the FPGA can swap words as
1868	 * necessary.
1869	 */
1870
1871	iowrite32(1, endpoint->registers + fpga_endian_reg);
1872
1873	/* Bootstrap phase I: Allocate temporary message buffer */
1874
1875	bootstrap_resources = devres_open_group(dev, NULL, GFP_KERNEL);
1876	if (!bootstrap_resources)
1877		return -ENOMEM;
1878
1879	endpoint->num_channels = 0;
1880
1881	rc = xilly_setupchannels(endpoint, bogus_idt, 1);
1882	if (rc)
1883		return rc;
1884
1885	/* Clear the message subsystem (and counter in particular) */
1886	iowrite32(0x04, endpoint->registers + fpga_msg_ctrl_reg);
1887
1888	endpoint->idtlen = -1;
1889
1890	/*
1891	 * Set DMA 32/64 bit mode, quiesce the device (?!) and get IDT
1892	 * buffer size.
1893	 */
1894	iowrite32((u32) (endpoint->dma_using_dac & 0x0001),
1895		  endpoint->registers + fpga_dma_control_reg);
1896
1897	t = wait_event_interruptible_timeout(endpoint->ep_wait,
1898					     (endpoint->idtlen >= 0),
1899					     XILLY_TIMEOUT);
1900	if (t <= 0) {
1901		dev_err(endpoint->dev, "No response from FPGA. Aborting.\n");
1902		return -ENODEV;
1903	}
1904
1905	/* Enable DMA */
1906	iowrite32((u32) (0x0002 | (endpoint->dma_using_dac & 0x0001)),
1907		  endpoint->registers + fpga_dma_control_reg);
1908
1909	/* Bootstrap phase II: Allocate buffer for IDT and obtain it */
1910	while (endpoint->idtlen >= idtbuffersize) {
1911		idtbuffersize *= 2;
1912		bogus_idt[6]++;
1913	}
1914
1915	endpoint->num_channels = 1;
1916
1917	rc = xilly_setupchannels(endpoint, bogus_idt, 2);
1918	if (rc)
1919		goto failed_idt;
1920
1921	rc = xilly_obtain_idt(endpoint);
1922	if (rc)
1923		goto failed_idt;
1924
1925	rc = xilly_scan_idt(endpoint, &idt_handle);
1926	if (rc)
1927		goto failed_idt;
1928
1929	devres_close_group(dev, bootstrap_resources);
1930
1931	/* Bootstrap phase III: Allocate buffers according to IDT */
1932
1933	rc = xilly_setupchannels(endpoint,
1934				 idt_handle.chandesc,
1935				 idt_handle.entries);
1936	if (rc)
1937		goto failed_idt;
1938
1939	rc = xillybus_init_chrdev(dev, &xillybus_fops,
1940				  endpoint->owner, endpoint,
1941				  idt_handle.names,
1942				  idt_handle.names_len,
1943				  endpoint->num_channels,
1944				  xillyname, false);
1945
1946	if (rc)
1947		goto failed_idt;
1948
1949	devres_release_group(dev, bootstrap_resources);
1950
1951	return 0;
1952
1953failed_idt:
1954	xilly_quiesce(endpoint);
1955	flush_workqueue(xillybus_wq);
1956
1957	return rc;
1958}
1959EXPORT_SYMBOL(xillybus_endpoint_discovery);
1960
1961void xillybus_endpoint_remove(struct xilly_endpoint *endpoint)
1962{
1963	xillybus_cleanup_chrdev(endpoint, endpoint->dev);
1964
1965	xilly_quiesce(endpoint);
1966
1967	/*
1968	 * Flushing is done upon endpoint release to prevent access to memory
1969	 * just about to be released. This makes the quiesce complete.
1970	 */
1971	flush_workqueue(xillybus_wq);
1972}
1973EXPORT_SYMBOL(xillybus_endpoint_remove);
1974
1975static int __init xillybus_init(void)
1976{
1977	xillybus_wq = alloc_workqueue(xillyname, 0, 0);
1978	if (!xillybus_wq)
1979		return -ENOMEM;
1980
1981	return 0;
1982}
1983
1984static void __exit xillybus_exit(void)
1985{
1986	/* flush_workqueue() was called for each endpoint released */
1987	destroy_workqueue(xillybus_wq);
1988}
1989
1990module_init(xillybus_init);
1991module_exit(xillybus_exit);
1992