vchiq_2835_arm.c revision 290245
1/**
2 * Copyright (c) 2010-2012 Broadcom. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 *    notice, this list of conditions, and the following disclaimer,
9 *    without modification.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. The names of the above-listed copyright holders may not be used
14 *    to endorse or promote products derived from this software without
15 *    specific prior written permission.
16 *
17 * ALTERNATIVELY, this software may be distributed under the terms of the
18 * GNU General Public License ("GPL") version 2, as published by the Free
19 * Software Foundation.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
22 * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
23 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
24 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
25 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
26 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
27 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 */
33
34#include <interface/compat/vchi_bsd.h>
35
36#include <sys/malloc.h>
37#include <sys/rwlock.h>
38
39#include <vm/vm.h>
40#include <vm/pmap.h>
41#include <vm/vm_extern.h>
42#include <vm/vm_kern.h>
43#include <vm/vm_map.h>
44#include <vm/vm_object.h>
45#include <vm/vm_page.h>
46#include <vm/vm_pager.h>
47#include <vm/vm_param.h>
48#include <vm/vm_phys.h>
49
50#include <machine/bus.h>
51#include <arm/broadcom/bcm2835/bcm2835_mbox.h>
52#include <arm/broadcom/bcm2835/bcm2835_vcbus.h>
53
54MALLOC_DEFINE(M_VCPAGELIST, "vcpagelist", "VideoCore pagelist memory");
55
56#define TOTAL_SLOTS (VCHIQ_SLOT_ZERO_SLOTS + 2 * 32)
57
58#define VCHIQ_DOORBELL_IRQ IRQ_ARM_DOORBELL_0
59#define VCHIQ_ARM_ADDRESS(x) ((void *)PHYS_TO_VCBUS(pmap_kextract((vm_offset_t)(x))))
60
61#include "vchiq_arm.h"
62#include "vchiq_2835.h"
63#include "vchiq_connected.h"
64#include "vchiq_killable.h"
65
66#define MAX_FRAGMENTS (VCHIQ_NUM_CURRENT_BULKS * 2)
67
68typedef struct vchiq_2835_state_struct {
69   int inited;
70   VCHIQ_ARM_STATE_T arm_state;
71} VCHIQ_2835_ARM_STATE_T;
72
73static char *g_slot_mem;
74static int g_slot_mem_size;
75vm_paddr_t g_slot_phys;
76/* BSD DMA */
77bus_dma_tag_t bcm_slots_dma_tag;
78bus_dmamap_t bcm_slots_dma_map;
79
80static FRAGMENTS_T *g_fragments_base;
81static FRAGMENTS_T *g_free_fragments;
82struct semaphore g_free_fragments_sema;
83
84static DEFINE_SEMAPHORE(g_free_fragments_mutex);
85
86typedef struct bulkinfo_struct {
87	PAGELIST_T	*pagelist;
88	bus_dma_tag_t	pagelist_dma_tag;
89	bus_dmamap_t	pagelist_dma_map;
90	void		*buf;
91	size_t		size;
92} BULKINFO_T;
93
94static int
95create_pagelist(char __user *buf, size_t count, unsigned short type,
96                struct proc *p, BULKINFO_T *bi);
97
98static void
99free_pagelist(BULKINFO_T *bi, int actual);
100
101static void
102vchiq_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int err)
103{
104	bus_addr_t *addr;
105
106	if (err)
107		return;
108
109	addr = (bus_addr_t*)arg;
110	*addr = PHYS_TO_VCBUS(segs[0].ds_addr);
111}
112
113static int
114copyout_page(vm_page_t p, size_t offset, void *kaddr, size_t size)
115{
116        uint8_t *dst;
117
118        dst = pmap_mapdev(VM_PAGE_TO_PHYS(p), PAGE_SIZE);
119        if (!dst)
120                return ENOMEM;
121
122        memcpy(dst + offset, kaddr, size);
123
124        pmap_unmapdev((vm_offset_t)dst, PAGE_SIZE);
125
126        return 0;
127}
128
129int __init
130vchiq_platform_init(VCHIQ_STATE_T *state)
131{
132	VCHIQ_SLOT_ZERO_T *vchiq_slot_zero;
133	int frag_mem_size;
134	int err;
135	int i;
136
137	/* Allocate space for the channels in coherent memory */
138	g_slot_mem_size = PAGE_ALIGN(TOTAL_SLOTS * VCHIQ_SLOT_SIZE);
139	frag_mem_size = PAGE_ALIGN(sizeof(FRAGMENTS_T) * MAX_FRAGMENTS);
140
141	err = bus_dma_tag_create(
142	    NULL,
143	    PAGE_SIZE, 0,	       /* alignment, boundary */
144	    BUS_SPACE_MAXADDR_32BIT,    /* lowaddr */
145	    BUS_SPACE_MAXADDR,	  /* highaddr */
146	    NULL, NULL,		 /* filter, filterarg */
147	    g_slot_mem_size + frag_mem_size, 1,		/* maxsize, nsegments */
148	    g_slot_mem_size + frag_mem_size, 0,		/* maxsegsize, flags */
149	    NULL, NULL,		 /* lockfunc, lockarg */
150	    &bcm_slots_dma_tag);
151
152	err = bus_dmamem_alloc(bcm_slots_dma_tag, (void **)&g_slot_mem,
153	    BUS_DMA_COHERENT | BUS_DMA_WAITOK, &bcm_slots_dma_map);
154	if (err) {
155		vchiq_log_error(vchiq_core_log_level, "Unable to allocate channel memory");
156		err = -ENOMEM;
157		goto failed_alloc;
158	}
159
160	err = bus_dmamap_load(bcm_slots_dma_tag, bcm_slots_dma_map, g_slot_mem,
161	    g_slot_mem_size + frag_mem_size, vchiq_dmamap_cb,
162	    &g_slot_phys, 0);
163
164	if (err) {
165		vchiq_log_error(vchiq_core_log_level, "cannot load DMA map");
166		err = -ENOMEM;
167		goto failed_load;
168	}
169
170	WARN_ON(((int)g_slot_mem & (PAGE_SIZE - 1)) != 0);
171
172	vchiq_slot_zero = vchiq_init_slots(g_slot_mem, g_slot_mem_size);
173	if (!vchiq_slot_zero) {
174		err = -EINVAL;
175		goto failed_init_slots;
176	}
177
178	vchiq_slot_zero->platform_data[VCHIQ_PLATFORM_FRAGMENTS_OFFSET_IDX] =
179		(int)g_slot_phys + g_slot_mem_size;
180	vchiq_slot_zero->platform_data[VCHIQ_PLATFORM_FRAGMENTS_COUNT_IDX] =
181		MAX_FRAGMENTS;
182
183	g_fragments_base = (FRAGMENTS_T *)(g_slot_mem + g_slot_mem_size);
184	g_slot_mem_size += frag_mem_size;
185
186	g_free_fragments = g_fragments_base;
187	for (i = 0; i < (MAX_FRAGMENTS - 1); i++) {
188		*(FRAGMENTS_T **)&g_fragments_base[i] =
189			&g_fragments_base[i + 1];
190	}
191	*(FRAGMENTS_T **)&g_fragments_base[i] = NULL;
192	_sema_init(&g_free_fragments_sema, MAX_FRAGMENTS);
193
194	if (vchiq_init_state(state, vchiq_slot_zero, 0/*slave*/) !=
195		VCHIQ_SUCCESS) {
196		err = -EINVAL;
197		goto failed_vchiq_init;
198	}
199
200	bcm_mbox_write(BCM2835_MBOX_CHAN_VCHIQ, (unsigned int)g_slot_phys);
201
202	vchiq_log_info(vchiq_arm_log_level,
203		"vchiq_init - done (slots %x, phys %x)",
204		(unsigned int)vchiq_slot_zero, g_slot_phys);
205
206   vchiq_call_connected_callbacks();
207
208   return 0;
209
210failed_vchiq_init:
211failed_init_slots:
212failed_load:
213	bus_dmamap_unload(bcm_slots_dma_tag, bcm_slots_dma_map);
214failed_alloc:
215	bus_dmamap_destroy(bcm_slots_dma_tag, bcm_slots_dma_map);
216	bus_dma_tag_destroy(bcm_slots_dma_tag);
217
218   return err;
219}
220
221void __exit
222vchiq_platform_exit(VCHIQ_STATE_T *state)
223{
224
225	bus_dmamap_unload(bcm_slots_dma_tag, bcm_slots_dma_map);
226	bus_dmamap_destroy(bcm_slots_dma_tag, bcm_slots_dma_map);
227	bus_dma_tag_destroy(bcm_slots_dma_tag);
228}
229
230VCHIQ_STATUS_T
231vchiq_platform_init_state(VCHIQ_STATE_T *state)
232{
233   VCHIQ_STATUS_T status = VCHIQ_SUCCESS;
234   state->platform_state = kzalloc(sizeof(VCHIQ_2835_ARM_STATE_T), GFP_KERNEL);
235   ((VCHIQ_2835_ARM_STATE_T*)state->platform_state)->inited = 1;
236   status = vchiq_arm_init_state(state, &((VCHIQ_2835_ARM_STATE_T*)state->platform_state)->arm_state);
237   if(status != VCHIQ_SUCCESS)
238   {
239      ((VCHIQ_2835_ARM_STATE_T*)state->platform_state)->inited = 0;
240   }
241   return status;
242}
243
244VCHIQ_ARM_STATE_T*
245vchiq_platform_get_arm_state(VCHIQ_STATE_T *state)
246{
247   if(!((VCHIQ_2835_ARM_STATE_T*)state->platform_state)->inited)
248   {
249      BUG();
250   }
251   return &((VCHIQ_2835_ARM_STATE_T*)state->platform_state)->arm_state;
252}
253
254int
255vchiq_copy_from_user(void *dst, const void *src, int size)
256{
257
258	if (((vm_offset_t)(src)) < VM_MIN_KERNEL_ADDRESS) {
259		int error = copyin(src, dst, size);
260		return error ? VCHIQ_ERROR : VCHIQ_SUCCESS;
261	}
262	else
263		bcopy(src, dst, size);
264
265	return 0;
266}
267
268VCHIQ_STATUS_T
269vchiq_prepare_bulk_data(VCHIQ_BULK_T *bulk, VCHI_MEM_HANDLE_T memhandle,
270	void *offset, int size, int dir)
271{
272	BULKINFO_T *bi;
273	int ret;
274
275	WARN_ON(memhandle != VCHI_MEM_HANDLE_INVALID);
276	bi = malloc(sizeof(*bi), M_VCPAGELIST, M_WAITOK | M_ZERO);
277	if (bi == NULL)
278		return VCHIQ_ERROR;
279
280	ret = create_pagelist((char __user *)offset, size,
281			(dir == VCHIQ_BULK_RECEIVE)
282			? PAGELIST_READ
283			: PAGELIST_WRITE,
284			current,
285			bi);
286	if (ret != 0)
287		return VCHIQ_ERROR;
288
289	bulk->handle = memhandle;
290	bulk->data = VCHIQ_ARM_ADDRESS(bi->pagelist);
291
292	/* Store the pagelist address in remote_data, which isn't used by the
293	   slave. */
294	bulk->remote_data = bi;
295
296	return VCHIQ_SUCCESS;
297}
298
299void
300vchiq_complete_bulk(VCHIQ_BULK_T *bulk)
301{
302	if (bulk && bulk->remote_data && bulk->actual)
303		free_pagelist((BULKINFO_T *)bulk->remote_data, bulk->actual);
304}
305
306void
307vchiq_transfer_bulk(VCHIQ_BULK_T *bulk)
308{
309	/*
310	 * This should only be called on the master (VideoCore) side, but
311	 * provide an implementation to avoid the need for ifdefery.
312	 */
313	BUG();
314}
315
316void
317vchiq_dump_platform_state(void *dump_context)
318{
319	char buf[80];
320	int len;
321	len = snprintf(buf, sizeof(buf),
322		"  Platform: 2835 (VC master)");
323	vchiq_dump(dump_context, buf, len + 1);
324}
325
326VCHIQ_STATUS_T
327vchiq_platform_suspend(VCHIQ_STATE_T *state)
328{
329   return VCHIQ_ERROR;
330}
331
332VCHIQ_STATUS_T
333vchiq_platform_resume(VCHIQ_STATE_T *state)
334{
335   return VCHIQ_SUCCESS;
336}
337
338void
339vchiq_platform_paused(VCHIQ_STATE_T *state)
340{
341}
342
343void
344vchiq_platform_resumed(VCHIQ_STATE_T *state)
345{
346}
347
348int
349vchiq_platform_videocore_wanted(VCHIQ_STATE_T* state)
350{
351   return 1; // autosuspend not supported - videocore always wanted
352}
353
354int
355vchiq_platform_use_suspend_timer(void)
356{
357   return 0;
358}
359void
360vchiq_dump_platform_use_state(VCHIQ_STATE_T *state)
361{
362	vchiq_log_info(vchiq_arm_log_level, "Suspend timer not in use");
363}
364void
365vchiq_platform_handle_timeout(VCHIQ_STATE_T *state)
366{
367	(void)state;
368}
369/*
370 * Local functions
371 */
372
373static void
374pagelist_page_free(vm_page_t pp)
375{
376	vm_page_lock(pp);
377	vm_page_unwire(pp, PQ_INACTIVE);
378	if (pp->wire_count == 0 && pp->object == NULL)
379		vm_page_free(pp);
380	vm_page_unlock(pp);
381}
382
383/* There is a potential problem with partial cache lines (pages?)
384** at the ends of the block when reading. If the CPU accessed anything in
385** the same line (page?) then it may have pulled old data into the cache,
386** obscuring the new data underneath. We can solve this by transferring the
387** partial cache lines separately, and allowing the ARM to copy into the
388** cached area.
389
390** N.B. This implementation plays slightly fast and loose with the Linux
391** driver programming rules, e.g. its use of __virt_to_bus instead of
392** dma_map_single, but it isn't a multi-platform driver and it benefits
393** from increased speed as a result.
394*/
395
396static int
397create_pagelist(char __user *buf, size_t count, unsigned short type,
398	struct proc *p, BULKINFO_T *bi)
399{
400	PAGELIST_T *pagelist;
401	vm_page_t* pages;
402	unsigned long *addrs;
403	unsigned int num_pages, i;
404	vm_offset_t offset;
405	int pagelist_size;
406	char *addr, *base_addr, *next_addr;
407	int run, addridx, actual_pages;
408	int err;
409	vm_paddr_t pagelist_phys;
410
411	offset = (vm_offset_t)buf & (PAGE_SIZE - 1);
412	num_pages = (count + offset + PAGE_SIZE - 1) / PAGE_SIZE;
413
414	bi->pagelist = NULL;
415	bi->buf = buf;
416	bi->size = count;
417
418	/* Allocate enough storage to hold the page pointers and the page
419	** list
420	*/
421	pagelist_size = sizeof(PAGELIST_T) +
422		(num_pages * sizeof(unsigned long)) +
423		(num_pages * sizeof(pages[0]));
424
425	err = bus_dma_tag_create(
426	    NULL,
427	    PAGE_SIZE, 0,	       /* alignment, boundary */
428	    BUS_SPACE_MAXADDR_32BIT,    /* lowaddr */
429	    BUS_SPACE_MAXADDR,	  /* highaddr */
430	    NULL, NULL,		 /* filter, filterarg */
431	    pagelist_size, 1,		/* maxsize, nsegments */
432	    pagelist_size, 0,		/* maxsegsize, flags */
433	    NULL, NULL,		 /* lockfunc, lockarg */
434	    &bi->pagelist_dma_tag);
435
436	err = bus_dmamem_alloc(bi->pagelist_dma_tag, (void **)&pagelist,
437	    BUS_DMA_COHERENT | BUS_DMA_WAITOK, &bi->pagelist_dma_map);
438	if (err) {
439		vchiq_log_error(vchiq_core_log_level, "Unable to allocate pagelist memory");
440		err = -ENOMEM;
441		goto failed_alloc;
442	}
443
444	err = bus_dmamap_load(bi->pagelist_dma_tag, bi->pagelist_dma_map, pagelist,
445	    pagelist_size, vchiq_dmamap_cb,
446	    &pagelist_phys, 0);
447
448	if (err) {
449		vchiq_log_error(vchiq_core_log_level, "cannot load DMA map for pagelist memory");
450		err = -ENOMEM;
451		goto failed_load;
452	}
453
454	vchiq_log_trace(vchiq_arm_log_level,
455		"create_pagelist - %x", (unsigned int)pagelist);
456	if (!pagelist)
457		return -ENOMEM;
458
459	addrs = pagelist->addrs;
460	pages = (vm_page_t*)(addrs + num_pages);
461
462	actual_pages = vm_fault_quick_hold_pages(&p->p_vmspace->vm_map,
463	    (vm_offset_t)buf, count,
464	    (type == PAGELIST_READ ? VM_PROT_WRITE : 0 ) | VM_PROT_READ, pages, num_pages);
465
466	if (actual_pages != num_pages) {
467		vm_page_unhold_pages(pages, actual_pages);
468		free(pagelist, M_VCPAGELIST);
469		return (-ENOMEM);
470	}
471
472	for (i = 0; i < actual_pages; i++) {
473		vm_page_lock(pages[i]);
474		vm_page_wire(pages[i]);
475		vm_page_unhold(pages[i]);
476		vm_page_unlock(pages[i]);
477	}
478
479	pagelist->length = count;
480	pagelist->type = type;
481	pagelist->offset = offset;
482
483	/* Group the pages into runs of contiguous pages */
484
485	base_addr = (void *)PHYS_TO_VCBUS(VM_PAGE_TO_PHYS(pages[0]));
486	next_addr = base_addr + PAGE_SIZE;
487	addridx = 0;
488	run = 0;
489
490	for (i = 1; i < num_pages; i++) {
491		addr = (void *)PHYS_TO_VCBUS(VM_PAGE_TO_PHYS(pages[i]));
492		if ((addr == next_addr) && (run < (PAGE_SIZE - 1))) {
493			next_addr += PAGE_SIZE;
494			run++;
495		} else {
496			addrs[addridx] = (unsigned long)base_addr + run;
497			addridx++;
498			base_addr = addr;
499			next_addr = addr + PAGE_SIZE;
500			run = 0;
501		}
502	}
503
504	addrs[addridx] = (unsigned long)base_addr + run;
505	addridx++;
506
507	/* Partial cache lines (fragments) require special measures */
508	if ((type == PAGELIST_READ) &&
509		((pagelist->offset & (CACHE_LINE_SIZE - 1)) ||
510		((pagelist->offset + pagelist->length) &
511		(CACHE_LINE_SIZE - 1)))) {
512		FRAGMENTS_T *fragments;
513
514		if (down_interruptible(&g_free_fragments_sema) != 0) {
515      			free(pagelist, M_VCPAGELIST);
516			return -EINTR;
517		}
518
519		WARN_ON(g_free_fragments == NULL);
520
521		down(&g_free_fragments_mutex);
522		fragments = (FRAGMENTS_T *) g_free_fragments;
523		WARN_ON(fragments == NULL);
524		g_free_fragments = *(FRAGMENTS_T **) g_free_fragments;
525		up(&g_free_fragments_mutex);
526		pagelist->type =
527			 PAGELIST_READ_WITH_FRAGMENTS + (fragments -
528							 g_fragments_base);
529	}
530
531	cpu_dcache_wbinv_range((vm_offset_t)buf, count);
532
533	bus_dmamap_sync(bi->pagelist_dma_tag, bi->pagelist_dma_map, BUS_DMASYNC_PREWRITE);
534
535	bi->pagelist = pagelist;
536
537	return 0;
538
539failed_load:
540	bus_dmamap_unload(bi->pagelist_dma_tag, bi->pagelist_dma_map);
541failed_alloc:
542	bus_dmamap_destroy(bi->pagelist_dma_tag, bi->pagelist_dma_map);
543	bus_dma_tag_destroy(bi->pagelist_dma_tag);
544
545	return err;
546}
547
548static void
549free_pagelist(BULKINFO_T *bi, int actual)
550{
551	vm_page_t*pages;
552	unsigned int num_pages, i;
553	PAGELIST_T *pagelist;
554
555	pagelist = bi->pagelist;
556
557	vchiq_log_trace(vchiq_arm_log_level,
558		"free_pagelist - %x, %d", (unsigned int)pagelist, actual);
559
560	num_pages =
561		(pagelist->length + pagelist->offset + PAGE_SIZE - 1) /
562		PAGE_SIZE;
563
564	pages = (vm_page_t*)(pagelist->addrs + num_pages);
565
566	/* Deal with any partial cache lines (fragments) */
567	if (pagelist->type >= PAGELIST_READ_WITH_FRAGMENTS) {
568		FRAGMENTS_T *fragments = g_fragments_base +
569			(pagelist->type - PAGELIST_READ_WITH_FRAGMENTS);
570		int head_bytes, tail_bytes;
571		head_bytes = (CACHE_LINE_SIZE - pagelist->offset) &
572			(CACHE_LINE_SIZE - 1);
573		tail_bytes = (pagelist->offset + actual) &
574			(CACHE_LINE_SIZE - 1);
575
576		if ((actual >= 0) && (head_bytes != 0)) {
577			if (head_bytes > actual)
578				head_bytes = actual;
579
580			copyout_page(pages[0],
581				pagelist->offset,
582				fragments->headbuf,
583				head_bytes);
584		}
585
586		if ((actual >= 0) && (head_bytes < actual) &&
587			(tail_bytes != 0)) {
588
589			copyout_page(pages[num_pages-1],
590				(((vm_offset_t)bi->buf + actual) % PAGE_SIZE) - tail_bytes,
591				fragments->tailbuf,
592				tail_bytes);
593		}
594
595		down(&g_free_fragments_mutex);
596		*(FRAGMENTS_T **) fragments = g_free_fragments;
597		g_free_fragments = fragments;
598		up(&g_free_fragments_mutex);
599		up(&g_free_fragments_sema);
600	}
601
602	for (i = 0; i < num_pages; i++) {
603		if (pagelist->type != PAGELIST_WRITE) {
604			vm_page_dirty(pages[i]);
605			pagelist_page_free(pages[i]);
606		}
607	}
608
609	bus_dmamap_unload(bi->pagelist_dma_tag, bi->pagelist_dma_map);
610	bus_dmamem_free(bi->pagelist_dma_tag, bi->pagelist, bi->pagelist_dma_map);
611	bus_dmamap_destroy(bi->pagelist_dma_tag, bi->pagelist_dma_map);
612	bus_dma_tag_destroy(bi->pagelist_dma_tag);
613
614	free(bi, M_VCPAGELIST);
615}
616