vchiq_2835_arm.c revision 302408
1/**
2 * Copyright (c) 2010-2012 Broadcom. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 *    notice, this list of conditions, and the following disclaimer,
9 *    without modification.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. The names of the above-listed copyright holders may not be used
14 *    to endorse or promote products derived from this software without
15 *    specific prior written permission.
16 *
17 * ALTERNATIVELY, this software may be distributed under the terms of the
18 * GNU General Public License ("GPL") version 2, as published by the Free
19 * Software Foundation.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
22 * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
23 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
24 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
25 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
26 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
27 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 */
33
34#include <interface/compat/vchi_bsd.h>
35
36#include <sys/malloc.h>
37#include <sys/rwlock.h>
38
39#include <vm/vm.h>
40#include <vm/pmap.h>
41#include <vm/vm_extern.h>
42#include <vm/vm_kern.h>
43#include <vm/vm_map.h>
44#include <vm/vm_object.h>
45#include <vm/vm_page.h>
46#include <vm/vm_pager.h>
47#include <vm/vm_param.h>
48#include <vm/vm_phys.h>
49
50#include <machine/bus.h>
51#include <machine/cpu.h>
52#include <arm/broadcom/bcm2835/bcm2835_mbox.h>
53#include <arm/broadcom/bcm2835/bcm2835_vcbus.h>
54
55MALLOC_DEFINE(M_VCPAGELIST, "vcpagelist", "VideoCore pagelist memory");
56
57#define TOTAL_SLOTS (VCHIQ_SLOT_ZERO_SLOTS + 2 * 32)
58
59#define VCHIQ_DOORBELL_IRQ IRQ_ARM_DOORBELL_0
60#define VCHIQ_ARM_ADDRESS(x) ((void *)PHYS_TO_VCBUS(pmap_kextract((vm_offset_t)(x))))
61
62#include "vchiq_arm.h"
63#include "vchiq_2835.h"
64#include "vchiq_connected.h"
65#include "vchiq_killable.h"
66
67#define MAX_FRAGMENTS (VCHIQ_NUM_CURRENT_BULKS * 2)
68
69int g_cache_line_size = 32;
70static int g_fragment_size;
71
72typedef struct vchiq_2835_state_struct {
73   int inited;
74   VCHIQ_ARM_STATE_T arm_state;
75} VCHIQ_2835_ARM_STATE_T;
76
77static char *g_slot_mem;
78static int g_slot_mem_size;
79vm_paddr_t g_slot_phys;
80/* BSD DMA */
81bus_dma_tag_t bcm_slots_dma_tag;
82bus_dmamap_t bcm_slots_dma_map;
83
84static char *g_fragments_base;
85static char *g_free_fragments;
86struct semaphore g_free_fragments_sema;
87
88static DEFINE_SEMAPHORE(g_free_fragments_mutex);
89
90typedef struct bulkinfo_struct {
91	PAGELIST_T	*pagelist;
92	bus_dma_tag_t	pagelist_dma_tag;
93	bus_dmamap_t	pagelist_dma_map;
94	void		*buf;
95	size_t		size;
96} BULKINFO_T;
97
98static int
99create_pagelist(char __user *buf, size_t count, unsigned short type,
100                struct proc *p, BULKINFO_T *bi);
101
102static void
103free_pagelist(BULKINFO_T *bi, int actual);
104
105static void
106vchiq_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int err)
107{
108	bus_addr_t *addr;
109
110	if (err)
111		return;
112
113	addr = (bus_addr_t*)arg;
114	*addr = PHYS_TO_VCBUS(segs[0].ds_addr);
115}
116
117static int
118copyout_page(vm_page_t p, size_t offset, void *kaddr, size_t size)
119{
120        uint8_t *dst;
121
122        dst = (uint8_t*)pmap_quick_enter_page(p);
123        if (!dst)
124                return ENOMEM;
125
126        memcpy(dst + offset, kaddr, size);
127
128        pmap_quick_remove_page((vm_offset_t)dst);
129
130        return 0;
131}
132
133int __init
134vchiq_platform_init(VCHIQ_STATE_T *state)
135{
136	VCHIQ_SLOT_ZERO_T *vchiq_slot_zero;
137	int frag_mem_size;
138	int err;
139	int i;
140
141	/* Allocate space for the channels in coherent memory */
142	g_slot_mem_size = PAGE_ALIGN(TOTAL_SLOTS * VCHIQ_SLOT_SIZE);
143	g_fragment_size = 2*g_cache_line_size;
144	frag_mem_size = PAGE_ALIGN(g_fragment_size * MAX_FRAGMENTS);
145
146	err = bus_dma_tag_create(
147	    NULL,
148	    PAGE_SIZE, 0,	       /* alignment, boundary */
149	    BUS_SPACE_MAXADDR_32BIT,    /* lowaddr */
150	    BUS_SPACE_MAXADDR,	  /* highaddr */
151	    NULL, NULL,		 /* filter, filterarg */
152	    g_slot_mem_size + frag_mem_size, 1,		/* maxsize, nsegments */
153	    g_slot_mem_size + frag_mem_size, 0,		/* maxsegsize, flags */
154	    NULL, NULL,		 /* lockfunc, lockarg */
155	    &bcm_slots_dma_tag);
156
157	err = bus_dmamem_alloc(bcm_slots_dma_tag, (void **)&g_slot_mem,
158	    BUS_DMA_COHERENT | BUS_DMA_WAITOK, &bcm_slots_dma_map);
159	if (err) {
160		vchiq_log_error(vchiq_core_log_level, "Unable to allocate channel memory");
161		err = -ENOMEM;
162		goto failed_alloc;
163	}
164
165	err = bus_dmamap_load(bcm_slots_dma_tag, bcm_slots_dma_map, g_slot_mem,
166	    g_slot_mem_size + frag_mem_size, vchiq_dmamap_cb,
167	    &g_slot_phys, 0);
168
169	if (err) {
170		vchiq_log_error(vchiq_core_log_level, "cannot load DMA map");
171		err = -ENOMEM;
172		goto failed_load;
173	}
174
175	WARN_ON(((int)g_slot_mem & (PAGE_SIZE - 1)) != 0);
176
177	vchiq_slot_zero = vchiq_init_slots(g_slot_mem, g_slot_mem_size);
178	if (!vchiq_slot_zero) {
179		err = -EINVAL;
180		goto failed_init_slots;
181	}
182
183	vchiq_slot_zero->platform_data[VCHIQ_PLATFORM_FRAGMENTS_OFFSET_IDX] =
184		(int)g_slot_phys + g_slot_mem_size;
185	vchiq_slot_zero->platform_data[VCHIQ_PLATFORM_FRAGMENTS_COUNT_IDX] =
186		MAX_FRAGMENTS;
187
188	g_fragments_base = (char *)(g_slot_mem + g_slot_mem_size);
189	g_slot_mem_size += frag_mem_size;
190
191	g_free_fragments = g_fragments_base;
192	for (i = 0; i < (MAX_FRAGMENTS - 1); i++) {
193		*(char **)&g_fragments_base[i*g_fragment_size] =
194			&g_fragments_base[(i + 1)*g_fragment_size];
195	}
196	*(char **)&g_fragments_base[i*g_fragment_size] = NULL;
197	_sema_init(&g_free_fragments_sema, MAX_FRAGMENTS);
198
199	if (vchiq_init_state(state, vchiq_slot_zero, 0/*slave*/) !=
200		VCHIQ_SUCCESS) {
201		err = -EINVAL;
202		goto failed_vchiq_init;
203	}
204
205	bcm_mbox_write(BCM2835_MBOX_CHAN_VCHIQ, (unsigned int)g_slot_phys);
206
207	vchiq_log_info(vchiq_arm_log_level,
208		"vchiq_init - done (slots %x, phys %x)",
209		(unsigned int)vchiq_slot_zero, g_slot_phys);
210
211   vchiq_call_connected_callbacks();
212
213   return 0;
214
215failed_vchiq_init:
216failed_init_slots:
217	bus_dmamap_unload(bcm_slots_dma_tag, bcm_slots_dma_map);
218failed_load:
219	bus_dmamem_free(bcm_slots_dma_tag, g_slot_mem, bcm_slots_dma_map);
220failed_alloc:
221	bus_dma_tag_destroy(bcm_slots_dma_tag);
222
223   return err;
224}
225
226void __exit
227vchiq_platform_exit(VCHIQ_STATE_T *state)
228{
229
230	bus_dmamap_unload(bcm_slots_dma_tag, bcm_slots_dma_map);
231	bus_dmamem_free(bcm_slots_dma_tag, g_slot_mem, bcm_slots_dma_map);
232	bus_dma_tag_destroy(bcm_slots_dma_tag);
233}
234
235VCHIQ_STATUS_T
236vchiq_platform_init_state(VCHIQ_STATE_T *state)
237{
238   VCHIQ_STATUS_T status = VCHIQ_SUCCESS;
239   state->platform_state = kzalloc(sizeof(VCHIQ_2835_ARM_STATE_T), GFP_KERNEL);
240   ((VCHIQ_2835_ARM_STATE_T*)state->platform_state)->inited = 1;
241   status = vchiq_arm_init_state(state, &((VCHIQ_2835_ARM_STATE_T*)state->platform_state)->arm_state);
242   if(status != VCHIQ_SUCCESS)
243   {
244      ((VCHIQ_2835_ARM_STATE_T*)state->platform_state)->inited = 0;
245   }
246   return status;
247}
248
249VCHIQ_ARM_STATE_T*
250vchiq_platform_get_arm_state(VCHIQ_STATE_T *state)
251{
252   if(!((VCHIQ_2835_ARM_STATE_T*)state->platform_state)->inited)
253   {
254      BUG();
255   }
256   return &((VCHIQ_2835_ARM_STATE_T*)state->platform_state)->arm_state;
257}
258
259int
260vchiq_copy_from_user(void *dst, const void *src, int size)
261{
262
263	if (((vm_offset_t)(src)) < VM_MIN_KERNEL_ADDRESS) {
264		int error = copyin(src, dst, size);
265		return error ? VCHIQ_ERROR : VCHIQ_SUCCESS;
266	}
267	else
268		bcopy(src, dst, size);
269
270	return 0;
271}
272
273VCHIQ_STATUS_T
274vchiq_prepare_bulk_data(VCHIQ_BULK_T *bulk, VCHI_MEM_HANDLE_T memhandle,
275	void *offset, int size, int dir)
276{
277	BULKINFO_T *bi;
278	int ret;
279
280	WARN_ON(memhandle != VCHI_MEM_HANDLE_INVALID);
281	bi = malloc(sizeof(*bi), M_VCPAGELIST, M_WAITOK | M_ZERO);
282	if (bi == NULL)
283		return VCHIQ_ERROR;
284
285	ret = create_pagelist((char __user *)offset, size,
286			(dir == VCHIQ_BULK_RECEIVE)
287			? PAGELIST_READ
288			: PAGELIST_WRITE,
289			current,
290			bi);
291	if (ret != 0)
292		return VCHIQ_ERROR;
293
294	bulk->handle = memhandle;
295	bulk->data = VCHIQ_ARM_ADDRESS(bi->pagelist);
296
297	/* Store the pagelist address in remote_data, which isn't used by the
298	   slave. */
299	bulk->remote_data = bi;
300
301	return VCHIQ_SUCCESS;
302}
303
304void
305vchiq_complete_bulk(VCHIQ_BULK_T *bulk)
306{
307	if (bulk && bulk->remote_data && bulk->actual)
308		free_pagelist((BULKINFO_T *)bulk->remote_data, bulk->actual);
309}
310
311void
312vchiq_transfer_bulk(VCHIQ_BULK_T *bulk)
313{
314	/*
315	 * This should only be called on the master (VideoCore) side, but
316	 * provide an implementation to avoid the need for ifdefery.
317	 */
318	BUG();
319}
320
321void
322vchiq_dump_platform_state(void *dump_context)
323{
324	char buf[80];
325	int len;
326	len = snprintf(buf, sizeof(buf),
327		"  Platform: 2835 (VC master)");
328	vchiq_dump(dump_context, buf, len + 1);
329}
330
331VCHIQ_STATUS_T
332vchiq_platform_suspend(VCHIQ_STATE_T *state)
333{
334   return VCHIQ_ERROR;
335}
336
337VCHIQ_STATUS_T
338vchiq_platform_resume(VCHIQ_STATE_T *state)
339{
340   return VCHIQ_SUCCESS;
341}
342
343void
344vchiq_platform_paused(VCHIQ_STATE_T *state)
345{
346}
347
348void
349vchiq_platform_resumed(VCHIQ_STATE_T *state)
350{
351}
352
353int
354vchiq_platform_videocore_wanted(VCHIQ_STATE_T* state)
355{
356   return 1; // autosuspend not supported - videocore always wanted
357}
358
359int
360vchiq_platform_use_suspend_timer(void)
361{
362   return 0;
363}
364void
365vchiq_dump_platform_use_state(VCHIQ_STATE_T *state)
366{
367	vchiq_log_info(vchiq_arm_log_level, "Suspend timer not in use");
368}
369void
370vchiq_platform_handle_timeout(VCHIQ_STATE_T *state)
371{
372	(void)state;
373}
374/*
375 * Local functions
376 */
377
378static void
379pagelist_page_free(vm_page_t pp)
380{
381	vm_page_lock(pp);
382	vm_page_unwire(pp, PQ_INACTIVE);
383	if (pp->wire_count == 0 && pp->object == NULL)
384		vm_page_free(pp);
385	vm_page_unlock(pp);
386}
387
388/* There is a potential problem with partial cache lines (pages?)
389** at the ends of the block when reading. If the CPU accessed anything in
390** the same line (page?) then it may have pulled old data into the cache,
391** obscuring the new data underneath. We can solve this by transferring the
392** partial cache lines separately, and allowing the ARM to copy into the
393** cached area.
394
395** N.B. This implementation plays slightly fast and loose with the Linux
396** driver programming rules, e.g. its use of __virt_to_bus instead of
397** dma_map_single, but it isn't a multi-platform driver and it benefits
398** from increased speed as a result.
399*/
400
401static int
402create_pagelist(char __user *buf, size_t count, unsigned short type,
403	struct proc *p, BULKINFO_T *bi)
404{
405	PAGELIST_T *pagelist;
406	vm_page_t* pages;
407	unsigned long *addrs;
408	unsigned int num_pages, i;
409	vm_offset_t offset;
410	int pagelist_size;
411	char *addr, *base_addr, *next_addr;
412	int run, addridx, actual_pages;
413	int err;
414	vm_paddr_t pagelist_phys;
415	vm_paddr_t pa;
416
417	offset = (vm_offset_t)buf & (PAGE_SIZE - 1);
418	num_pages = (count + offset + PAGE_SIZE - 1) / PAGE_SIZE;
419
420	bi->pagelist = NULL;
421	bi->buf = buf;
422	bi->size = count;
423
424	/* Allocate enough storage to hold the page pointers and the page
425	** list
426	*/
427	pagelist_size = sizeof(PAGELIST_T) +
428		(num_pages * sizeof(unsigned long)) +
429		(num_pages * sizeof(pages[0]));
430
431	err = bus_dma_tag_create(
432	    NULL,
433	    PAGE_SIZE, 0,	       /* alignment, boundary */
434	    BUS_SPACE_MAXADDR_32BIT,    /* lowaddr */
435	    BUS_SPACE_MAXADDR,	  /* highaddr */
436	    NULL, NULL,		 /* filter, filterarg */
437	    pagelist_size, 1,		/* maxsize, nsegments */
438	    pagelist_size, 0,		/* maxsegsize, flags */
439	    NULL, NULL,		 /* lockfunc, lockarg */
440	    &bi->pagelist_dma_tag);
441
442	err = bus_dmamem_alloc(bi->pagelist_dma_tag, (void **)&pagelist,
443	    BUS_DMA_COHERENT | BUS_DMA_WAITOK, &bi->pagelist_dma_map);
444	if (err) {
445		vchiq_log_error(vchiq_core_log_level, "Unable to allocate pagelist memory");
446		err = -ENOMEM;
447		goto failed_alloc;
448	}
449
450	err = bus_dmamap_load(bi->pagelist_dma_tag, bi->pagelist_dma_map, pagelist,
451	    pagelist_size, vchiq_dmamap_cb,
452	    &pagelist_phys, 0);
453
454	if (err) {
455		vchiq_log_error(vchiq_core_log_level, "cannot load DMA map for pagelist memory");
456		err = -ENOMEM;
457		goto failed_load;
458	}
459
460	vchiq_log_trace(vchiq_arm_log_level,
461		"create_pagelist - %x (%d bytes @%p)", (unsigned int)pagelist, count, buf);
462
463	if (!pagelist)
464		return -ENOMEM;
465
466	addrs = pagelist->addrs;
467	pages = (vm_page_t*)(addrs + num_pages);
468
469	actual_pages = vm_fault_quick_hold_pages(&p->p_vmspace->vm_map,
470	    (vm_offset_t)buf, count,
471	    (type == PAGELIST_READ ? VM_PROT_WRITE : 0 ) | VM_PROT_READ, pages, num_pages);
472
473	if (actual_pages != num_pages) {
474		vm_page_unhold_pages(pages, actual_pages);
475		free(pagelist, M_VCPAGELIST);
476		return (-ENOMEM);
477	}
478
479	for (i = 0; i < actual_pages; i++) {
480		vm_page_lock(pages[i]);
481		vm_page_wire(pages[i]);
482		vm_page_unhold(pages[i]);
483		vm_page_unlock(pages[i]);
484	}
485
486	pagelist->length = count;
487	pagelist->type = type;
488	pagelist->offset = offset;
489
490	/* Group the pages into runs of contiguous pages */
491
492	base_addr = (void *)PHYS_TO_VCBUS(VM_PAGE_TO_PHYS(pages[0]));
493	next_addr = base_addr + PAGE_SIZE;
494	addridx = 0;
495	run = 0;
496
497	for (i = 1; i < num_pages; i++) {
498		addr = (void *)PHYS_TO_VCBUS(VM_PAGE_TO_PHYS(pages[i]));
499		if ((addr == next_addr) && (run < (PAGE_SIZE - 1))) {
500			next_addr += PAGE_SIZE;
501			run++;
502		} else {
503			addrs[addridx] = (unsigned long)base_addr + run;
504			addridx++;
505			base_addr = addr;
506			next_addr = addr + PAGE_SIZE;
507			run = 0;
508		}
509	}
510
511	addrs[addridx] = (unsigned long)base_addr + run;
512	addridx++;
513
514	/* Partial cache lines (fragments) require special measures */
515	if ((type == PAGELIST_READ) &&
516		((pagelist->offset & (g_cache_line_size - 1)) ||
517		((pagelist->offset + pagelist->length) &
518		(g_cache_line_size - 1)))) {
519		char *fragments;
520
521		if (down_interruptible(&g_free_fragments_sema) != 0) {
522      			free(pagelist, M_VCPAGELIST);
523			return -EINTR;
524		}
525
526		WARN_ON(g_free_fragments == NULL);
527
528		down(&g_free_fragments_mutex);
529		fragments = g_free_fragments;
530		WARN_ON(fragments == NULL);
531		g_free_fragments = *(char **) g_free_fragments;
532		up(&g_free_fragments_mutex);
533		pagelist->type =
534			 PAGELIST_READ_WITH_FRAGMENTS +
535			 (fragments - g_fragments_base)/g_fragment_size;
536	}
537
538	pa = pmap_extract(PCPU_GET(curpmap), (vm_offset_t)buf);
539	dcache_wbinv_poc((vm_offset_t)buf, pa, count);
540
541	bus_dmamap_sync(bi->pagelist_dma_tag, bi->pagelist_dma_map, BUS_DMASYNC_PREWRITE);
542
543	bi->pagelist = pagelist;
544
545	return 0;
546
547failed_load:
548	bus_dmamem_free(bi->pagelist_dma_tag, bi->pagelist, bi->pagelist_dma_map);
549failed_alloc:
550	bus_dma_tag_destroy(bi->pagelist_dma_tag);
551
552	return err;
553}
554
555static void
556free_pagelist(BULKINFO_T *bi, int actual)
557{
558	vm_page_t*pages;
559	unsigned int num_pages, i;
560	PAGELIST_T *pagelist;
561
562	pagelist = bi->pagelist;
563
564	vchiq_log_trace(vchiq_arm_log_level,
565		"free_pagelist - %x, %d (%lu bytes @%p)", (unsigned int)pagelist, actual, pagelist->length, bi->buf);
566
567	num_pages =
568		(pagelist->length + pagelist->offset + PAGE_SIZE - 1) /
569		PAGE_SIZE;
570
571	pages = (vm_page_t*)(pagelist->addrs + num_pages);
572
573	/* Deal with any partial cache lines (fragments) */
574	if (pagelist->type >= PAGELIST_READ_WITH_FRAGMENTS) {
575		char *fragments = g_fragments_base +
576			(pagelist->type - PAGELIST_READ_WITH_FRAGMENTS)*g_fragment_size;
577		int head_bytes, tail_bytes;
578		head_bytes = (g_cache_line_size - pagelist->offset) &
579			(g_cache_line_size - 1);
580		tail_bytes = (pagelist->offset + actual) &
581			(g_cache_line_size - 1);
582
583		if ((actual >= 0) && (head_bytes != 0)) {
584			if (head_bytes > actual)
585				head_bytes = actual;
586
587			copyout_page(pages[0],
588				pagelist->offset,
589				fragments,
590				head_bytes);
591		}
592
593		if ((actual >= 0) && (head_bytes < actual) &&
594			(tail_bytes != 0)) {
595
596			copyout_page(pages[num_pages-1],
597				(((vm_offset_t)bi->buf + actual) % PAGE_SIZE) - tail_bytes,
598				fragments + g_cache_line_size,
599				tail_bytes);
600		}
601
602		down(&g_free_fragments_mutex);
603		*(char **) fragments = g_free_fragments;
604		g_free_fragments = fragments;
605		up(&g_free_fragments_mutex);
606		up(&g_free_fragments_sema);
607	}
608
609	for (i = 0; i < num_pages; i++) {
610		if (pagelist->type != PAGELIST_WRITE) {
611			vm_page_dirty(pages[i]);
612			pagelist_page_free(pages[i]);
613		}
614	}
615
616	bus_dmamap_unload(bi->pagelist_dma_tag, bi->pagelist_dma_map);
617	bus_dmamem_free(bi->pagelist_dma_tag, bi->pagelist, bi->pagelist_dma_map);
618	bus_dma_tag_destroy(bi->pagelist_dma_tag);
619
620	free(bi, M_VCPAGELIST);
621}
622