1/* -*- c-basic-offset: 8 -*-
2 *
3 * amdtp.c - Audio and Music Data Transmission Protocol Driver
4 * Copyright (C) 2001 Kristian H�gsberg
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software Foundation,
18 * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
19 */
20
21/* OVERVIEW
22 * --------
23 *
24 * The AMDTP driver is designed to expose the IEEE1394 bus as a
25 * regular OSS soundcard, i.e. you can link /dev/dsp to /dev/amdtp and
26 * then your favourite MP3 player, game or whatever sound program will
27 * output to an IEEE1394 isochronous channel.  The signal destination
28 * could be a set of IEEE1394 loudspeakers (if and when such things
29 * become available) or an amplifier with IEEE1394 input (like the
30 * Sony STR-LSA1).  The driver only handles the actual streaming, some
31 * connection management is also required for this to actually work.
32 * That is outside the scope of this driver, and furthermore it is not
33 * really standardized yet.
34 *
35 * The Audio and Music Data Tranmission Protocol is available at
36 *
37 *     http://www.1394ta.org/Download/Technology/Specifications/2001/AM20Final-jf2.pdf
38 *
39 *
40 * TODO
41 * ----
42 *
43 * - We should be able to change input sample format between LE/BE, as
44 *   we already shift the bytes around when we construct the iso
45 *   packets.
46 *
47 * - Fix DMA stop after bus reset!
48 *
49 * - Clean up iso context handling in ohci1394.
50 *
51 *
52 * MAYBE TODO
53 * ----------
54 *
55 * - Receive data for local playback or recording.  Playback requires
56 *   soft syncing with the sound card.
57 *
58 * - Signal processing, i.e. receive packets, do some processing, and
59 *   transmit them again using the same packet structure and timestamps
60 *   offset by processing time.
61 *
62 * - Maybe make an ALSA interface, that is, create a file_ops
63 *   implementation that recognizes ALSA ioctls and uses defaults for
64 *   things that can't be controlled through ALSA (iso channel).
65 */
66
67#include <linux/module.h>
68#include <linux/list.h>
69#include <linux/sched.h>
70#include <linux/types.h>
71#include <linux/fs.h>
72#include <linux/ioctl.h>
73#include <linux/wait.h>
74#include <linux/pci.h>
75#include <linux/interrupt.h>
76#include <linux/poll.h>
77#include <asm/uaccess.h>
78#include <asm/atomic.h>
79
80#include "hosts.h"
81#include "highlevel.h"
82#include "ieee1394.h"
83#include "ieee1394_core.h"
84#include "ohci1394.h"
85
86#include "amdtp.h"
87#include "cmp.h"
88
89#define FMT_AMDTP 0x10
90#define FDF_AM824 0x00
91#define FDF_SFC_32KHZ   0x00
92#define FDF_SFC_44K1HZ  0x01
93#define FDF_SFC_48KHZ   0x02
94#define FDF_SFC_88K2HZ  0x03
95#define FDF_SFC_96KHZ   0x04
96#define FDF_SFC_176K4HZ 0x05
97#define FDF_SFC_192KHZ  0x06
98
99struct descriptor_block {
100	struct output_more_immediate {
101		u32 control;
102		u32 pad0;
103		u32 skip;
104		u32 pad1;
105		u32 header[4];
106	} header_desc;
107
108	struct output_last {
109		u32 control;
110		u32 data_address;
111		u32 branch;
112		u32 status;
113	} payload_desc;
114};
115
116struct packet {
117	struct descriptor_block *db;
118	dma_addr_t db_bus;
119	struct iso_packet *payload;
120	dma_addr_t payload_bus;
121};
122
123#include <asm/byteorder.h>
124
125#if defined __BIG_ENDIAN_BITFIELD
126
127struct iso_packet {
128	/* First quadlet */
129	unsigned int dbs      : 8;
130	unsigned int eoh0     : 2;
131	unsigned int sid      : 6;
132
133	unsigned int dbc      : 8;
134	unsigned int fn       : 2;
135	unsigned int qpc      : 3;
136	unsigned int sph      : 1;
137	unsigned int reserved : 2;
138
139	/* Second quadlet */
140	unsigned int fdf      : 8;
141	unsigned int eoh1     : 2;
142	unsigned int fmt      : 6;
143
144	unsigned int syt      : 16;
145
146        quadlet_t data[0];
147};
148
149#elif defined __LITTLE_ENDIAN_BITFIELD
150
151struct iso_packet {
152	/* First quadlet */
153	unsigned int sid      : 6;
154	unsigned int eoh0     : 2;
155	unsigned int dbs      : 8;
156
157	unsigned int reserved : 2;
158	unsigned int sph      : 1;
159	unsigned int qpc      : 3;
160	unsigned int fn       : 2;
161	unsigned int dbc      : 8;
162
163	/* Second quadlet */
164	unsigned int fmt      : 6;
165	unsigned int eoh1     : 2;
166	unsigned int fdf      : 8;
167
168	unsigned int syt      : 16;
169
170	quadlet_t data[0];
171};
172
173#else
174
175#error Unknown bitfield type
176
177#endif
178
179struct fraction {
180	int integer;
181	int numerator;
182	int denominator;
183};
184
185#define PACKET_LIST_SIZE 256
186#define MAX_PACKET_LISTS 4
187
188struct packet_list {
189	struct list_head link;
190	int last_cycle_count;
191	struct packet packets[PACKET_LIST_SIZE];
192};
193
194#define BUFFER_SIZE 128
195
196/* This implements a circular buffer for incoming samples. */
197
198struct buffer {
199	size_t head, tail, length, size;
200	unsigned char data[0];
201};
202
203struct stream {
204	int iso_channel;
205	int format;
206	int rate;
207	int dimension;
208	int fdf;
209	int mode;
210	int sample_format;
211	struct cmp_pcr *opcr;
212
213	/* Input samples are copied here. */
214	struct buffer *input;
215
216	/* ISO Packer state */
217	unsigned char dbc;
218	struct packet_list *current_packet_list;
219	int current_packet;
220	struct fraction ready_samples, samples_per_cycle;
221
222	/* We use these to generate control bits when we are packing
223	 * iec958 data.
224	 */
225	int iec958_frame_count;
226	int iec958_rate_code;
227
228	/* The cycle_count and cycle_offset fields are used for the
229	 * synchronization timestamps (syt) in the cip header.  They
230	 * are incremented by at least a cycle every time we put a
231	 * time stamp in a packet.  As we dont time stamp all
232	 * packages, cycle_count isn't updated in every cycle, and
233	 * sometimes it's incremented by 2.  Thus, we have
234	 * cycle_count2, which is simply incremented by one with each
235	 * packet, so we can compare it to the transmission time
236	 * written back in the dma programs.
237	 */
238	atomic_t cycle_count, cycle_count2;
239	struct fraction cycle_offset, ticks_per_syt_offset;
240	int syt_interval;
241	int stale_count;
242
243	/* Theses fields control the sample output to the DMA engine.
244	 * The dma_packet_lists list holds packet lists currently
245	 * queued for dma; the head of the list is currently being
246	 * processed.  The last program in a packet list generates an
247	 * interrupt, which removes the head from dma_packet_lists and
248	 * puts it back on the free list.
249	 */
250	struct list_head dma_packet_lists;
251	struct list_head free_packet_lists;
252        wait_queue_head_t packet_list_wait;
253	spinlock_t packet_list_lock;
254	struct ohci1394_iso_tasklet iso_tasklet;
255	struct pci_pool *descriptor_pool, *packet_pool;
256
257	/* Streams at a host controller are chained through this field. */
258	struct list_head link;
259	struct amdtp_host *host;
260};
261
262struct amdtp_host {
263	struct hpsb_host *host;
264	struct ti_ohci *ohci;
265	struct list_head stream_list;
266	spinlock_t stream_list_lock;
267	struct list_head link;
268};
269
270static struct hpsb_highlevel *amdtp_highlevel;
271static LIST_HEAD(host_list);
272static spinlock_t host_list_lock = SPIN_LOCK_UNLOCKED;
273
274
275#define OHCI1394_CONTEXT_CYCLE_MATCH 0x80000000
276#define OHCI1394_CONTEXT_RUN         0x00008000
277#define OHCI1394_CONTEXT_WAKE        0x00001000
278#define OHCI1394_CONTEXT_DEAD        0x00000800
279#define OHCI1394_CONTEXT_ACTIVE      0x00000400
280
281void ohci1394_start_it_ctx(struct ti_ohci *ohci, int ctx,
282			   dma_addr_t first_cmd, int z, int cycle_match)
283{
284	reg_write(ohci, OHCI1394_IsoXmitIntMaskSet, 1 << ctx);
285	reg_write(ohci, OHCI1394_IsoXmitCommandPtr + ctx * 16, first_cmd | z);
286	reg_write(ohci, OHCI1394_IsoXmitContextControlClear + ctx * 16, ~0);
287	wmb();
288	reg_write(ohci, OHCI1394_IsoXmitContextControlSet + ctx * 16,
289		  OHCI1394_CONTEXT_CYCLE_MATCH | (cycle_match << 16) |
290		  OHCI1394_CONTEXT_RUN);
291}
292
293void ohci1394_wake_it_ctx(struct ti_ohci *ohci, int ctx)
294{
295	reg_write(ohci, OHCI1394_IsoXmitContextControlSet + ctx * 16,
296		  OHCI1394_CONTEXT_WAKE);
297}
298
299void ohci1394_stop_it_ctx(struct ti_ohci *ohci, int ctx, int synchronous)
300{
301	u32 control;
302	int wait;
303
304	reg_write(ohci, OHCI1394_IsoXmitIntMaskClear, 1 << ctx);
305	reg_write(ohci, OHCI1394_IsoXmitContextControlClear + ctx * 16,
306		  OHCI1394_CONTEXT_RUN);
307	wmb();
308
309	if (synchronous) {
310		for (wait = 0; wait < 5; wait++) {
311			control = reg_read(ohci, OHCI1394_IsoXmitContextControlSet + ctx * 16);
312			if ((control & OHCI1394_CONTEXT_ACTIVE) == 0)
313				break;
314
315			set_current_state(TASK_INTERRUPTIBLE);
316			schedule_timeout(1);
317		}
318	}
319}
320
321/* Note: we can test if free_packet_lists is empty without aquiring
322 * the packet_list_lock.  The interrupt handler only adds to the free
323 * list, there is no race condition between testing the list non-empty
324 * and acquiring the lock.
325 */
326
327static struct packet_list *stream_get_free_packet_list(struct stream *s)
328{
329	struct packet_list *pl;
330	unsigned long flags;
331
332	if (list_empty(&s->free_packet_lists))
333		return NULL;
334
335	spin_lock_irqsave(&s->packet_list_lock, flags);
336	pl = list_entry(s->free_packet_lists.next, struct packet_list, link);
337	list_del(&pl->link);
338	spin_unlock_irqrestore(&s->packet_list_lock, flags);
339
340	return pl;
341}
342
343static void stream_start_dma(struct stream *s, struct packet_list *pl)
344{
345	u32 syt_cycle, cycle_count, start_cycle;
346
347	cycle_count = reg_read(s->host->host->hostdata,
348			       OHCI1394_IsochronousCycleTimer) >> 12;
349	syt_cycle = (pl->last_cycle_count - PACKET_LIST_SIZE + 1) & 0x0f;
350
351	/* We program the DMA controller to start transmission at
352	 * least 17 cycles from now - this happens when the lower four
353	 * bits of cycle_count is 0x0f and syt_cycle is 0, in this
354	 * case the start cycle is cycle_count - 15 + 32. */
355	start_cycle = (cycle_count & ~0x0f) + 32 + syt_cycle;
356	if ((start_cycle & 0x1fff) >= 8000)
357		start_cycle = start_cycle - 8000 + 0x2000;
358
359	ohci1394_start_it_ctx(s->host->ohci, s->iso_tasklet.context,
360			      pl->packets[0].db_bus, 3,
361			      start_cycle & 0x7fff);
362}
363
364static void stream_put_dma_packet_list(struct stream *s,
365				       struct packet_list *pl)
366{
367	unsigned long flags;
368	struct packet_list *prev;
369
370	/* Remember the cycle_count used for timestamping the last packet. */
371	pl->last_cycle_count = atomic_read(&s->cycle_count2) - 1;
372	pl->packets[PACKET_LIST_SIZE - 1].db->payload_desc.branch = 0;
373
374	spin_lock_irqsave(&s->packet_list_lock, flags);
375	list_add_tail(&pl->link, &s->dma_packet_lists);
376	spin_unlock_irqrestore(&s->packet_list_lock, flags);
377
378	prev = list_entry(pl->link.prev, struct packet_list, link);
379	if (pl->link.prev != &s->dma_packet_lists) {
380		struct packet *last = &prev->packets[PACKET_LIST_SIZE - 1];
381		last->db->payload_desc.branch = pl->packets[0].db_bus | 3;
382		last->db->header_desc.skip = pl->packets[0].db_bus | 3;
383		ohci1394_wake_it_ctx(s->host->ohci, s->iso_tasklet.context);
384	}
385	else
386		stream_start_dma(s, pl);
387}
388
389static void stream_shift_packet_lists(unsigned long l)
390{
391	struct stream *s = (struct stream *) l;
392	struct packet_list *pl;
393	struct packet *last;
394	int diff;
395
396	if (list_empty(&s->dma_packet_lists)) {
397		HPSB_ERR("empty dma_packet_lists in %s", __FUNCTION__);
398		return;
399	}
400
401	/* Now that we know the list is non-empty, we can get the head
402	 * of the list without locking, because the process context
403	 * only adds to the tail.
404	 */
405	pl = list_entry(s->dma_packet_lists.next, struct packet_list, link);
406	last = &pl->packets[PACKET_LIST_SIZE - 1];
407
408	/* This is weird... if we stop dma processing in the middle of
409	 * a packet list, the dma context immediately generates an
410	 * interrupt if we enable it again later.  This only happens
411	 * when amdtp_release is interrupted while waiting for dma to
412	 * complete, though.  Anyway, we detect this by seeing that
413	 * the status of the dma descriptor that we expected an
414	 * interrupt from is still 0.
415	 */
416	if (last->db->payload_desc.status == 0) {
417		HPSB_INFO("weird interrupt...");
418		return;
419	}
420
421	/* If the last descriptor block does not specify a branch
422	 * address, we have a sample underflow.
423	 */
424	if (last->db->payload_desc.branch == 0)
425		HPSB_INFO("FIXME: sample underflow...");
426
427	/* Here we check when (which cycle) the last packet was sent
428	 * and compare it to what the iso packer was using at the
429	 * time.  If there is a mismatch, we adjust the cycle count in
430	 * the iso packer.  However, there are still up to
431	 * MAX_PACKET_LISTS packet lists queued with bad time stamps,
432	 * so we disable time stamp monitoring for the next
433	 * MAX_PACKET_LISTS packet lists.
434	 */
435	diff = (last->db->payload_desc.status - pl->last_cycle_count) & 0xf;
436	if (diff > 0 && s->stale_count == 0) {
437		atomic_add(diff, &s->cycle_count);
438		atomic_add(diff, &s->cycle_count2);
439		s->stale_count = MAX_PACKET_LISTS;
440	}
441
442	if (s->stale_count > 0)
443		s->stale_count--;
444
445	/* Finally, we move the packet list that was just processed
446	 * back to the free list, and notify any waiters.
447	 */
448	spin_lock(&s->packet_list_lock);
449	list_del(&pl->link);
450	list_add_tail(&pl->link, &s->free_packet_lists);
451	spin_unlock(&s->packet_list_lock);
452
453	wake_up_interruptible(&s->packet_list_wait);
454}
455
456static struct packet *stream_current_packet(struct stream *s)
457{
458	if (s->current_packet_list == NULL &&
459	    (s->current_packet_list = stream_get_free_packet_list(s)) == NULL)
460		return NULL;
461
462	return &s->current_packet_list->packets[s->current_packet];
463}
464
465static void stream_queue_packet(struct stream *s)
466{
467	s->current_packet++;
468	if (s->current_packet == PACKET_LIST_SIZE) {
469		stream_put_dma_packet_list(s, s->current_packet_list);
470		s->current_packet_list = NULL;
471		s->current_packet = 0;
472	}
473}
474
475/* Integer fractional math.  When we transmit a 44k1Hz signal we must
476 * send 5 41/80 samples per isochronous cycle, as these occur 8000
477 * times a second.  Of course, we must send an integral number of
478 * samples in a packet, so we use the integer math to alternate
479 * between sending 5 and 6 samples per packet.
480 */
481
482static void fraction_init(struct fraction *f, int numerator, int denominator)
483{
484	f->integer = numerator / denominator;
485	f->numerator = numerator % denominator;
486	f->denominator = denominator;
487}
488
489static __inline__ void fraction_add(struct fraction *dst,
490				    struct fraction *src1,
491				    struct fraction *src2)
492{
493	/* assert: src1->denominator == src2->denominator */
494
495	int sum, denom;
496
497	/* We use these two local variables to allow gcc to optimize
498	 * the division and the modulo into only one division. */
499
500	sum = src1->numerator + src2->numerator;
501	denom = src1->denominator;
502	dst->integer = src1->integer + src2->integer + sum / denom;
503	dst->numerator = sum % denom;
504	dst->denominator = denom;
505}
506
507static __inline__ void fraction_sub_int(struct fraction *dst,
508					struct fraction *src, int integer)
509{
510	dst->integer = src->integer - integer;
511	dst->numerator = src->numerator;
512	dst->denominator = src->denominator;
513}
514
515static __inline__ int fraction_floor(struct fraction *frac)
516{
517	return frac->integer;
518}
519
520static __inline__ int fraction_ceil(struct fraction *frac)
521{
522	return frac->integer + (frac->numerator > 0 ? 1 : 0);
523}
524
525void packet_initialize(struct packet *p, struct packet *next)
526{
527	/* Here we initialize the dma descriptor block for
528	 * transferring one iso packet.  We use two descriptors per
529	 * packet: an OUTPUT_MORE_IMMMEDIATE descriptor for the
530	 * IEEE1394 iso packet header and an OUTPUT_LAST descriptor
531	 * for the payload.
532	 */
533
534	p->db->header_desc.control =
535		DMA_CTL_OUTPUT_MORE | DMA_CTL_IMMEDIATE | 8;
536
537	if (next) {
538		p->db->payload_desc.control =
539			DMA_CTL_OUTPUT_LAST | DMA_CTL_BRANCH;
540		p->db->payload_desc.branch = next->db_bus | 3;
541		p->db->header_desc.skip = next->db_bus | 3;
542	}
543	else {
544		p->db->payload_desc.control =
545			DMA_CTL_OUTPUT_LAST | DMA_CTL_BRANCH |
546			DMA_CTL_UPDATE | DMA_CTL_IRQ;
547		p->db->payload_desc.branch = 0;
548		p->db->header_desc.skip = 0;
549	}
550	p->db->payload_desc.data_address = p->payload_bus;
551	p->db->payload_desc.status = 0;
552}
553
554struct packet_list *packet_list_alloc(struct stream *s)
555{
556	int i;
557	struct packet_list *pl;
558	struct packet *next;
559
560	pl = kmalloc(sizeof *pl, SLAB_KERNEL);
561	if (pl == NULL)
562		return NULL;
563
564	for (i = 0; i < PACKET_LIST_SIZE; i++) {
565		struct packet *p = &pl->packets[i];
566		p->db = pci_pool_alloc(s->descriptor_pool, SLAB_KERNEL,
567				       &p->db_bus);
568		p->payload = pci_pool_alloc(s->packet_pool, SLAB_KERNEL,
569					    &p->payload_bus);
570	}
571
572	for (i = 0; i < PACKET_LIST_SIZE; i++) {
573		if (i < PACKET_LIST_SIZE - 1)
574			next = &pl->packets[i + 1];
575		else
576			next = NULL;
577		packet_initialize(&pl->packets[i], next);
578	}
579
580	return pl;
581}
582
583void packet_list_free(struct packet_list *pl, struct stream *s)
584{
585	int i;
586
587	for (i = 0; i < PACKET_LIST_SIZE; i++) {
588		struct packet *p = &pl->packets[i];
589		pci_pool_free(s->descriptor_pool, p->db, p->db_bus);
590		pci_pool_free(s->packet_pool, p->payload, p->payload_bus);
591	}
592	kfree(pl);
593}
594
595static struct buffer *buffer_alloc(int size)
596{
597	struct buffer *b;
598
599	b = kmalloc(sizeof *b + size, SLAB_KERNEL);
600	b->head = 0;
601	b->tail = 0;
602	b->length = 0;
603	b->size = size;
604
605	return b;
606}
607
608static unsigned char *buffer_get_bytes(struct buffer *buffer, int size)
609{
610	unsigned char *p;
611
612	if (buffer->head + size > buffer->size)
613		BUG();
614
615	p = &buffer->data[buffer->head];
616	buffer->head += size;
617	if (buffer->head == buffer->size)
618		buffer->head = 0;
619	buffer->length -= size;
620
621	return p;
622}
623
624static unsigned char *buffer_put_bytes(struct buffer *buffer,
625				       size_t max, size_t *actual)
626{
627	size_t length;
628	unsigned char *p;
629
630	p = &buffer->data[buffer->tail];
631	length = min(buffer->size - buffer->length, max);
632	if (buffer->tail + length < buffer->size) {
633		*actual = length;
634		buffer->tail += length;
635	}
636	else {
637		*actual = buffer->size - buffer->tail;
638		 buffer->tail = 0;
639	}
640
641	buffer->length += *actual;
642	return p;
643}
644
645static u32 get_iec958_header_bits(struct stream *s, int sub_frame, u32 sample)
646{
647	int csi, parity, shift;
648	int block_start;
649	u32 bits;
650
651	switch (s->iec958_frame_count) {
652	case 1:
653		csi = s->format == AMDTP_FORMAT_IEC958_AC3;
654		break;
655	case 2:
656	case 9:
657		csi = 1;
658		break;
659	case 24 ... 27:
660		csi = (s->iec958_rate_code >> (27 - s->iec958_frame_count)) & 0x01;
661		break;
662	default:
663		csi = 0;
664		break;
665	}
666
667	block_start = (s->iec958_frame_count == 0 && sub_frame == 0);
668
669	/* The parity bit is the xor of the sample bits and the
670	 * channel status info bit. */
671	for (shift = 16, parity = sample ^ csi; shift > 0; shift >>= 1)
672		parity ^= (parity >> shift);
673
674	bits =  (block_start << 5) |		/* Block start bit */
675		((sub_frame == 0) << 4) |	/* Subframe bit */
676		((parity & 1) << 3) |		/* Parity bit */
677		(csi << 2);			/* Channel status info bit */
678
679	return bits;
680}
681
682static u32 get_header_bits(struct stream *s, int sub_frame, u32 sample)
683{
684	switch (s->format) {
685	case AMDTP_FORMAT_IEC958_PCM:
686	case AMDTP_FORMAT_IEC958_AC3:
687		return get_iec958_header_bits(s, sub_frame, sample);
688
689	case AMDTP_FORMAT_RAW:
690		return 0x40000000;
691
692	default:
693		return 0;
694	}
695}
696
697static void fill_payload_le16(struct stream *s, quadlet_t *data, int nevents)
698{
699	quadlet_t *event, sample, bits;
700	unsigned char *p;
701	int i, j;
702
703	for (i = 0, event = data; i < nevents; i++) {
704
705		for (j = 0; j < s->dimension; j++) {
706			p = buffer_get_bytes(s->input, 2);
707			sample = (p[1] << 16) | (p[0] << 8);
708			bits = get_header_bits(s, j, sample);
709			event[j] = cpu_to_be32((bits << 24) | sample);
710		}
711
712		event += s->dimension;
713		if (++s->iec958_frame_count == 192)
714			s->iec958_frame_count = 0;
715	}
716}
717
718static void fill_packet(struct stream *s, struct packet *packet, int nevents)
719{
720	int syt_index, syt, size;
721	u32 control;
722
723	size = (nevents * s->dimension + 2) * sizeof(quadlet_t);
724
725	/* Update DMA descriptors */
726	packet->db->payload_desc.status = 0;
727	control = packet->db->payload_desc.control & 0xffff0000;
728	packet->db->payload_desc.control = control | size;
729
730	/* Fill IEEE1394 headers */
731	packet->db->header_desc.header[0] =
732		(SPEED_100 << 16) | (0x01 << 14) |
733		(s->iso_channel << 8) | (TCODE_ISO_DATA << 4);
734	packet->db->header_desc.header[1] = size << 16;
735
736	/* Calculate synchronization timestamp (syt). First we
737	 * determine syt_index, that is, the index in the packet of
738	 * the sample for which the timestamp is valid. */
739	syt_index = (s->syt_interval - s->dbc) & (s->syt_interval - 1);
740	if (syt_index < nevents) {
741		syt = ((atomic_read(&s->cycle_count) << 12) |
742		       s->cycle_offset.integer) & 0xffff;
743		fraction_add(&s->cycle_offset,
744			     &s->cycle_offset, &s->ticks_per_syt_offset);
745
746		/* This next addition should be modulo 8000 (0x1f40),
747		 * but we only use the lower 4 bits of cycle_count, so
748		 * we dont need the modulo. */
749		atomic_add(s->cycle_offset.integer / 3072, &s->cycle_count);
750		s->cycle_offset.integer %= 3072;
751	}
752	else
753		syt = 0xffff;
754
755	atomic_inc(&s->cycle_count2);
756
757	/* Fill cip header */
758	packet->payload->eoh0 = 0;
759	packet->payload->sid = s->host->host->node_id & 0x3f;
760	packet->payload->dbs = s->dimension;
761	packet->payload->fn = 0;
762	packet->payload->qpc = 0;
763	packet->payload->sph = 0;
764	packet->payload->reserved = 0;
765	packet->payload->dbc = s->dbc;
766	packet->payload->eoh1 = 2;
767	packet->payload->fmt = FMT_AMDTP;
768	packet->payload->fdf = s->fdf;
769	packet->payload->syt = cpu_to_be16(syt);
770
771	switch (s->sample_format) {
772	case AMDTP_INPUT_LE16:
773		fill_payload_le16(s, packet->payload->data, nevents);
774		break;
775	}
776
777	s->dbc += nevents;
778}
779
780static void stream_flush(struct stream *s)
781{
782	struct packet *p;
783	int nevents;
784	struct fraction next;
785
786	/* The AMDTP specifies two transmission modes: blocking and
787	 * non-blocking.  In blocking mode you always transfer
788	 * syt_interval or zero samples, whereas in non-blocking mode
789	 * you send as many samples as you have available at transfer
790	 * time.
791	 *
792	 * The fraction samples_per_cycle specifies the number of
793	 * samples that become available per cycle.  We add this to
794	 * the fraction ready_samples, which specifies the number of
795	 * leftover samples from the previous transmission.  The sum,
796	 * stored in the fraction next, specifies the number of
797	 * samples available for transmission, and from this we
798	 * determine the number of samples to actually transmit.
799	 */
800
801	while (1) {
802		fraction_add(&next, &s->ready_samples, &s->samples_per_cycle);
803		if (s->mode == AMDTP_MODE_BLOCKING) {
804			if (fraction_floor(&next) >= s->syt_interval)
805				nevents = s->syt_interval;
806			else
807				nevents = 0;
808		}
809		else
810			nevents = fraction_floor(&next);
811
812		p = stream_current_packet(s);
813		if (s->input->length < nevents * s->dimension * 2 || p == NULL)
814			break;
815
816		fill_packet(s, p, nevents);
817		stream_queue_packet(s);
818
819		/* Now that we have successfully queued the packet for
820		 * transmission, we update the fraction ready_samples. */
821		fraction_sub_int(&s->ready_samples, &next, nevents);
822	}
823}
824
825static int stream_alloc_packet_lists(struct stream *s)
826{
827	int max_nevents, max_packet_size, i;
828
829	if (s->mode == AMDTP_MODE_BLOCKING)
830		max_nevents = s->syt_interval;
831	else
832		max_nevents = fraction_ceil(&s->samples_per_cycle);
833
834	max_packet_size = max_nevents * s->dimension * 4 + 8;
835	s->packet_pool = pci_pool_create("packet pool", s->host->ohci->dev,
836					 max_packet_size, 0, 0, SLAB_KERNEL);
837	if (s->packet_pool == NULL)
838		return -1;
839
840	INIT_LIST_HEAD(&s->free_packet_lists);
841	INIT_LIST_HEAD(&s->dma_packet_lists);
842	for (i = 0; i < MAX_PACKET_LISTS; i++) {
843		struct packet_list *pl = packet_list_alloc(s);
844		if (pl == NULL)
845			break;
846		list_add_tail(&pl->link, &s->free_packet_lists);
847	}
848
849	return i < MAX_PACKET_LISTS ? -1 : 0;
850}
851
852static void stream_free_packet_lists(struct stream *s)
853{
854	struct list_head *lh, *next;
855
856	if (s->current_packet_list != NULL)
857		packet_list_free(s->current_packet_list, s);
858	list_for_each_safe(lh, next, &s->dma_packet_lists)
859		packet_list_free(list_entry(lh, struct packet_list, link), s);
860	list_for_each_safe(lh, next, &s->free_packet_lists)
861		packet_list_free(list_entry(lh, struct packet_list, link), s);
862	if (s->packet_pool != NULL)
863		pci_pool_destroy(s->packet_pool);
864
865	s->current_packet_list = NULL;
866	INIT_LIST_HEAD(&s->free_packet_lists);
867	INIT_LIST_HEAD(&s->dma_packet_lists);
868	s->packet_pool = NULL;
869}
870
871static void plug_update(struct cmp_pcr *plug, void *data)
872{
873	struct stream *s = data;
874
875	HPSB_INFO("plug update: p2p_count=%d, channel=%d",
876		  plug->p2p_count, plug->channel);
877	s->iso_channel = plug->channel;
878	if (plug->p2p_count > 0) {
879		struct packet_list *pl;
880
881		pl = list_entry(s->dma_packet_lists.next, struct packet_list, link);
882		stream_start_dma(s, pl);
883	}
884	else {
885		ohci1394_stop_it_ctx(s->host->ohci, s->iso_tasklet.context, 0);
886	}
887}
888
889static int stream_configure(struct stream *s, int cmd, struct amdtp_ioctl *cfg)
890{
891	const int transfer_delay = 9000;
892
893	if (cfg->format <= AMDTP_FORMAT_IEC958_AC3)
894		s->format = cfg->format;
895	else
896		return -EINVAL;
897
898	switch (cfg->rate) {
899	case 32000:
900		s->syt_interval = 8;
901		s->fdf = FDF_SFC_32KHZ;
902		s->iec958_rate_code = 0x0c;
903		break;
904	case 44100:
905		s->syt_interval = 8;
906		s->fdf = FDF_SFC_44K1HZ;
907		s->iec958_rate_code = 0x00;
908		break;
909	case 48000:
910		s->syt_interval = 8;
911		s->fdf = FDF_SFC_48KHZ;
912		s->iec958_rate_code = 0x04;
913		break;
914	case 88200:
915		s->syt_interval = 16;
916		s->fdf = FDF_SFC_88K2HZ;
917		s->iec958_rate_code = 0x00;
918		break;
919	case 96000:
920		s->syt_interval = 16;
921		s->fdf = FDF_SFC_96KHZ;
922		s->iec958_rate_code = 0x00;
923		break;
924	case 176400:
925		s->syt_interval = 32;
926		s->fdf = FDF_SFC_176K4HZ;
927		s->iec958_rate_code = 0x00;
928		break;
929	case 192000:
930		s->syt_interval = 32;
931		s->fdf = FDF_SFC_192KHZ;
932		s->iec958_rate_code = 0x00;
933		break;
934
935	default:
936		return -EINVAL;
937	}
938
939	s->rate = cfg->rate;
940	fraction_init(&s->samples_per_cycle, s->rate, 8000);
941	fraction_init(&s->ready_samples, 0, 8000);
942
943	/* The ticks_per_syt_offset is initialized to the number of
944	 * ticks between syt_interval events.  The number of ticks per
945	 * second is 24.576e6, so the number of ticks between
946	 * syt_interval events is 24.576e6 * syt_interval / rate.
947	 */
948	fraction_init(&s->ticks_per_syt_offset,
949		      24576000 * s->syt_interval, s->rate);
950	fraction_init(&s->cycle_offset, (transfer_delay % 3072) * s->rate, s->rate);
951	atomic_set(&s->cycle_count, transfer_delay / 3072);
952	atomic_set(&s->cycle_count2, 0);
953
954	s->mode = cfg->mode;
955	s->sample_format = AMDTP_INPUT_LE16;
956
957	/* When using the AM824 raw subformat we can stream signals of
958	 * any dimension.  The IEC958 subformat, however, only
959	 * supports 2 channels.
960	 */
961	if (s->format == AMDTP_FORMAT_RAW || cfg->dimension == 2)
962		s->dimension = cfg->dimension;
963	else
964		return -EINVAL;
965
966	if (s->opcr != NULL) {
967		cmp_unregister_opcr(s->host->host, s->opcr);
968		s->opcr = NULL;
969	}
970
971	switch(cmd) {
972	case AMDTP_IOC_PLUG:
973		s->opcr = cmp_register_opcr(s->host->host, cfg->u.plug,
974					   /*payload*/ 12, plug_update, s);
975		if (s->opcr == NULL)
976			return -EINVAL;
977		s->iso_channel = s->opcr->channel;
978		break;
979
980	case AMDTP_IOC_CHANNEL:
981		if (cfg->u.channel >= 0 && cfg->u.channel < 64)
982			s->iso_channel = cfg->u.channel;
983		else
984			return -EINVAL;
985		break;
986	}
987
988	/* The ioctl settings were all valid, so we realloc the packet
989	 * lists to make sure the packet size is big enough.
990	 */
991	if (s->packet_pool != NULL)
992		stream_free_packet_lists(s);
993
994	if (stream_alloc_packet_lists(s) < 0) {
995		stream_free_packet_lists(s);
996		return -ENOMEM;
997	}
998
999	return 0;
1000}
1001
1002struct stream *stream_alloc(struct amdtp_host *host)
1003{
1004	struct stream *s;
1005	unsigned long flags;
1006
1007        s = kmalloc(sizeof(struct stream), SLAB_KERNEL);
1008        if (s == NULL)
1009                return NULL;
1010
1011        memset(s, 0, sizeof(struct stream));
1012	s->host = host;
1013
1014	s->input = buffer_alloc(BUFFER_SIZE);
1015	if (s->input == NULL) {
1016		kfree(s);
1017		return NULL;
1018	}
1019
1020	s->descriptor_pool = pci_pool_create("descriptor pool", host->ohci->dev,
1021					     sizeof(struct descriptor_block),
1022					     16, 0, SLAB_KERNEL);
1023	if (s->descriptor_pool == NULL) {
1024		kfree(s->input);
1025		kfree(s);
1026		return NULL;
1027	}
1028
1029	INIT_LIST_HEAD(&s->free_packet_lists);
1030	INIT_LIST_HEAD(&s->dma_packet_lists);
1031
1032        init_waitqueue_head(&s->packet_list_wait);
1033        spin_lock_init(&s->packet_list_lock);
1034
1035	ohci1394_init_iso_tasklet(&s->iso_tasklet, OHCI_ISO_TRANSMIT,
1036				  stream_shift_packet_lists,
1037				  (unsigned long) s);
1038
1039	if (ohci1394_register_iso_tasklet(host->ohci, &s->iso_tasklet) < 0) {
1040		pci_pool_destroy(s->descriptor_pool);
1041		kfree(s->input);
1042		kfree(s);
1043		return NULL;
1044	}
1045
1046	spin_lock_irqsave(&host->stream_list_lock, flags);
1047	list_add_tail(&s->link, &host->stream_list);
1048	spin_unlock_irqrestore(&host->stream_list_lock, flags);
1049
1050	return s;
1051}
1052
1053void stream_free(struct stream *s)
1054{
1055	unsigned long flags;
1056
1057	/* Stop the DMA.  We wait for the dma packet list to become
1058	 * empty and let the dma controller run out of programs.  This
1059	 * seems to be more reliable than stopping it directly, since
1060	 * that sometimes generates an it transmit interrupt if we
1061	 * later re-enable the context.
1062	 */
1063	wait_event_interruptible(s->packet_list_wait,
1064				 list_empty(&s->dma_packet_lists));
1065
1066	ohci1394_stop_it_ctx(s->host->ohci, s->iso_tasklet.context, 1);
1067	ohci1394_unregister_iso_tasklet(s->host->ohci, &s->iso_tasklet);
1068
1069	if (s->opcr != NULL)
1070		cmp_unregister_opcr(s->host->host, s->opcr);
1071
1072	spin_lock_irqsave(&s->host->stream_list_lock, flags);
1073	list_del(&s->link);
1074	spin_unlock_irqrestore(&s->host->stream_list_lock, flags);
1075
1076	kfree(s->input);
1077
1078	stream_free_packet_lists(s);
1079	pci_pool_destroy(s->descriptor_pool);
1080
1081	kfree(s);
1082}
1083
1084/* File operations */
1085
1086static ssize_t amdtp_write(struct file *file, const char *buffer, size_t count,
1087			   loff_t *offset_is_ignored)
1088{
1089	struct stream *s = file->private_data;
1090	unsigned char *p;
1091	int i;
1092	size_t length;
1093
1094	if (s->packet_pool == NULL)
1095		return -EBADFD;
1096
1097	/* Fill the circular buffer from the input buffer and call the
1098	 * iso packer when the buffer is full.  The iso packer may
1099	 * leave bytes in the buffer for two reasons: either the
1100	 * remaining bytes wasn't enough to build a new packet, or
1101	 * there were no free packet lists.  In the first case we
1102	 * re-fill the buffer and call the iso packer again or return
1103	 * if we used all the data from userspace.  In the second
1104	 * case, the wait_event_interruptible will block until the irq
1105	 * handler frees a packet list.
1106	 */
1107
1108	for (i = 0; i < count; i += length) {
1109		p = buffer_put_bytes(s->input, count, &length);
1110		copy_from_user(p, buffer + i, length);
1111		if (s->input->length < s->input->size)
1112			continue;
1113
1114		stream_flush(s);
1115
1116		if (s->current_packet_list != NULL)
1117			continue;
1118
1119		if (file->f_flags & O_NONBLOCK)
1120			return i + length > 0 ? i + length : -EAGAIN;
1121
1122		if (wait_event_interruptible(s->packet_list_wait,
1123					     !list_empty(&s->free_packet_lists)))
1124			return -EINTR;
1125	}
1126
1127	return count;
1128}
1129
1130static int amdtp_ioctl(struct inode *inode, struct file *file,
1131			   unsigned int cmd, unsigned long arg)
1132{
1133	struct stream *s = file->private_data;
1134	struct amdtp_ioctl cfg;
1135
1136	switch(cmd)
1137	{
1138	case AMDTP_IOC_PLUG:
1139	case AMDTP_IOC_CHANNEL:
1140		if (copy_from_user(&cfg, (struct amdtp_ioctl *) arg, sizeof cfg))
1141			return -EFAULT;
1142		else
1143			return stream_configure(s, cmd, &cfg);
1144
1145	default:
1146		return -EINVAL;
1147	}
1148}
1149
1150static unsigned int amdtp_poll(struct file *file, poll_table *pt)
1151{
1152	struct stream *s = file->private_data;
1153
1154	poll_wait(file, &s->packet_list_wait, pt);
1155
1156	if (!list_empty(&s->free_packet_lists))
1157		return POLLOUT | POLLWRNORM;
1158	else
1159		return 0;
1160}
1161
1162static int amdtp_open(struct inode *inode, struct file *file)
1163{
1164	struct amdtp_host *host;
1165
1166	spin_lock(&host_list_lock);
1167	if (!list_empty(&host_list))
1168		host = list_entry(host_list.next, struct amdtp_host, link);
1169	else
1170		host = NULL;
1171	spin_unlock(&host_list_lock);
1172
1173	if (host == NULL)
1174		return -ENODEV;
1175
1176	file->private_data = stream_alloc(host);
1177	if (file->private_data == NULL)
1178		return -ENOMEM;
1179
1180	return 0;
1181}
1182
1183static int amdtp_release(struct inode *inode, struct file *file)
1184{
1185	struct stream *s = file->private_data;
1186
1187	stream_free(s);
1188
1189	return 0;
1190}
1191
1192static struct file_operations amdtp_fops =
1193{
1194	.owner =	THIS_MODULE,
1195	.write =	amdtp_write,
1196	.poll =		amdtp_poll,
1197	.ioctl =	amdtp_ioctl,
1198	.open =		amdtp_open,
1199	.release =	amdtp_release
1200};
1201
1202/* IEEE1394 Subsystem functions */
1203
1204static void amdtp_add_host(struct hpsb_host *host)
1205{
1206	struct amdtp_host *ah;
1207
1208	if (strcmp(host->driver->name, OHCI1394_DRIVER_NAME) != 0)
1209		return;
1210
1211	ah = kmalloc(sizeof *ah, SLAB_KERNEL);
1212	ah->host = host;
1213	ah->ohci = host->hostdata;
1214	INIT_LIST_HEAD(&ah->stream_list);
1215	spin_lock_init(&ah->stream_list_lock);
1216
1217	spin_lock_irq(&host_list_lock);
1218	list_add_tail(&ah->link, &host_list);
1219	spin_unlock_irq(&host_list_lock);
1220}
1221
1222static void amdtp_remove_host(struct hpsb_host *host)
1223{
1224	struct list_head *lh;
1225	struct amdtp_host *ah;
1226
1227	spin_lock_irq(&host_list_lock);
1228	list_for_each(lh, &host_list) {
1229		if (list_entry(lh, struct amdtp_host, link)->host == host) {
1230			list_del(lh);
1231			break;
1232		}
1233	}
1234	spin_unlock_irq(&host_list_lock);
1235
1236	if (lh != &host_list) {
1237		ah = list_entry(lh, struct amdtp_host, link);
1238		kfree(ah);
1239	}
1240	else
1241		HPSB_ERR("remove_host: bogus ohci host: %p", host);
1242}
1243
1244static struct hpsb_highlevel_ops amdtp_highlevel_ops = {
1245	.add_host =	amdtp_add_host,
1246	.remove_host =	amdtp_remove_host,
1247};
1248
1249/* Module interface */
1250
1251MODULE_AUTHOR("Kristian Hogsberg <hogsberg@users.sf.net>");
1252MODULE_DESCRIPTION("Driver for Audio & Music Data Transmission Protocol "
1253		   "on OHCI boards.");
1254MODULE_SUPPORTED_DEVICE("amdtp");
1255MODULE_LICENSE("GPL");
1256
1257static int __init amdtp_init_module (void)
1258{
1259	if (ieee1394_register_chardev(IEEE1394_MINOR_BLOCK_AMDTP,
1260				      THIS_MODULE, &amdtp_fops)) {
1261		HPSB_ERR("amdtp: unable to get minor device block");
1262 		return -EIO;
1263 	}
1264
1265	amdtp_highlevel = hpsb_register_highlevel ("amdtp",
1266						   &amdtp_highlevel_ops);
1267	if (amdtp_highlevel == NULL) {
1268		HPSB_ERR("amdtp: unable to register highlevel ops");
1269		ieee1394_unregister_chardev(IEEE1394_MINOR_BLOCK_AMDTP);
1270		return -EIO;
1271	}
1272
1273	HPSB_INFO("Loaded AMDTP driver");
1274
1275	return 0;
1276}
1277
1278static void __exit amdtp_exit_module (void)
1279{
1280        hpsb_unregister_highlevel(amdtp_highlevel);
1281        ieee1394_unregister_chardev(IEEE1394_MINOR_BLOCK_AMDTP);
1282
1283	HPSB_INFO("Unloaded AMDTP driver");
1284}
1285
1286module_init(amdtp_init_module);
1287module_exit(amdtp_exit_module);
1288