vchiq_2835_arm.c revision 290553
1/**
2 * Copyright (c) 2010-2012 Broadcom. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 *    notice, this list of conditions, and the following disclaimer,
9 *    without modification.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. The names of the above-listed copyright holders may not be used
14 *    to endorse or promote products derived from this software without
15 *    specific prior written permission.
16 *
17 * ALTERNATIVELY, this software may be distributed under the terms of the
18 * GNU General Public License ("GPL") version 2, as published by the Free
19 * Software Foundation.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
22 * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
23 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
24 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
25 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
26 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
27 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 */
33
34#include <interface/compat/vchi_bsd.h>
35
36#include <sys/malloc.h>
37#include <sys/rwlock.h>
38
39#include <vm/vm.h>
40#include <vm/pmap.h>
41#include <vm/vm_extern.h>
42#include <vm/vm_kern.h>
43#include <vm/vm_map.h>
44#include <vm/vm_object.h>
45#include <vm/vm_page.h>
46#include <vm/vm_pager.h>
47#include <vm/vm_param.h>
48#include <vm/vm_phys.h>
49
50#include <machine/bus.h>
51#include <arm/broadcom/bcm2835/bcm2835_mbox.h>
52#include <arm/broadcom/bcm2835/bcm2835_vcbus.h>
53
54MALLOC_DEFINE(M_VCPAGELIST, "vcpagelist", "VideoCore pagelist memory");
55
56#define TOTAL_SLOTS (VCHIQ_SLOT_ZERO_SLOTS + 2 * 32)
57
58#define VCHIQ_DOORBELL_IRQ IRQ_ARM_DOORBELL_0
59#define VCHIQ_ARM_ADDRESS(x) ((void *)PHYS_TO_VCBUS(pmap_kextract((vm_offset_t)(x))))
60
61#include "vchiq_arm.h"
62#include "vchiq_2835.h"
63#include "vchiq_connected.h"
64#include "vchiq_killable.h"
65
66#define MAX_FRAGMENTS (VCHIQ_NUM_CURRENT_BULKS * 2)
67
68int g_cache_line_size = 32;
69static int g_fragment_size;
70
71typedef struct vchiq_2835_state_struct {
72   int inited;
73   VCHIQ_ARM_STATE_T arm_state;
74} VCHIQ_2835_ARM_STATE_T;
75
76static char *g_slot_mem;
77static int g_slot_mem_size;
78vm_paddr_t g_slot_phys;
79/* BSD DMA */
80bus_dma_tag_t bcm_slots_dma_tag;
81bus_dmamap_t bcm_slots_dma_map;
82
83static char *g_fragments_base;
84static char *g_free_fragments;
85struct semaphore g_free_fragments_sema;
86
87static DEFINE_SEMAPHORE(g_free_fragments_mutex);
88
89typedef struct bulkinfo_struct {
90	PAGELIST_T	*pagelist;
91	bus_dma_tag_t	pagelist_dma_tag;
92	bus_dmamap_t	pagelist_dma_map;
93	void		*buf;
94	size_t		size;
95} BULKINFO_T;
96
97static int
98create_pagelist(char __user *buf, size_t count, unsigned short type,
99                struct proc *p, BULKINFO_T *bi);
100
101static void
102free_pagelist(BULKINFO_T *bi, int actual);
103
104static void
105vchiq_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int err)
106{
107	bus_addr_t *addr;
108
109	if (err)
110		return;
111
112	addr = (bus_addr_t*)arg;
113	*addr = PHYS_TO_VCBUS(segs[0].ds_addr);
114}
115
116static int
117copyout_page(vm_page_t p, size_t offset, void *kaddr, size_t size)
118{
119        uint8_t *dst;
120
121        dst = (uint8_t*)pmap_quick_enter_page(p);
122        if (!dst)
123                return ENOMEM;
124
125        memcpy(dst + offset, kaddr, size);
126
127        pmap_quick_remove_page((vm_offset_t)dst);
128
129        return 0;
130}
131
132int __init
133vchiq_platform_init(VCHIQ_STATE_T *state)
134{
135	VCHIQ_SLOT_ZERO_T *vchiq_slot_zero;
136	int frag_mem_size;
137	int err;
138	int i;
139
140	/* Allocate space for the channels in coherent memory */
141	g_slot_mem_size = PAGE_ALIGN(TOTAL_SLOTS * VCHIQ_SLOT_SIZE);
142	g_fragment_size = 2*g_cache_line_size;
143	frag_mem_size = PAGE_ALIGN(g_fragment_size * MAX_FRAGMENTS);
144
145	err = bus_dma_tag_create(
146	    NULL,
147	    PAGE_SIZE, 0,	       /* alignment, boundary */
148	    BUS_SPACE_MAXADDR_32BIT,    /* lowaddr */
149	    BUS_SPACE_MAXADDR,	  /* highaddr */
150	    NULL, NULL,		 /* filter, filterarg */
151	    g_slot_mem_size + frag_mem_size, 1,		/* maxsize, nsegments */
152	    g_slot_mem_size + frag_mem_size, 0,		/* maxsegsize, flags */
153	    NULL, NULL,		 /* lockfunc, lockarg */
154	    &bcm_slots_dma_tag);
155
156	err = bus_dmamem_alloc(bcm_slots_dma_tag, (void **)&g_slot_mem,
157	    BUS_DMA_COHERENT | BUS_DMA_WAITOK, &bcm_slots_dma_map);
158	if (err) {
159		vchiq_log_error(vchiq_core_log_level, "Unable to allocate channel memory");
160		err = -ENOMEM;
161		goto failed_alloc;
162	}
163
164	err = bus_dmamap_load(bcm_slots_dma_tag, bcm_slots_dma_map, g_slot_mem,
165	    g_slot_mem_size + frag_mem_size, vchiq_dmamap_cb,
166	    &g_slot_phys, 0);
167
168	if (err) {
169		vchiq_log_error(vchiq_core_log_level, "cannot load DMA map");
170		err = -ENOMEM;
171		goto failed_load;
172	}
173
174	WARN_ON(((int)g_slot_mem & (PAGE_SIZE - 1)) != 0);
175
176	vchiq_slot_zero = vchiq_init_slots(g_slot_mem, g_slot_mem_size);
177	if (!vchiq_slot_zero) {
178		err = -EINVAL;
179		goto failed_init_slots;
180	}
181
182	vchiq_slot_zero->platform_data[VCHIQ_PLATFORM_FRAGMENTS_OFFSET_IDX] =
183		(int)g_slot_phys + g_slot_mem_size;
184	vchiq_slot_zero->platform_data[VCHIQ_PLATFORM_FRAGMENTS_COUNT_IDX] =
185		MAX_FRAGMENTS;
186
187	g_fragments_base = (char *)(g_slot_mem + g_slot_mem_size);
188	g_slot_mem_size += frag_mem_size;
189
190	g_free_fragments = g_fragments_base;
191	for (i = 0; i < (MAX_FRAGMENTS - 1); i++) {
192		*(char **)&g_fragments_base[i*g_fragment_size] =
193			&g_fragments_base[(i + 1)*g_fragment_size];
194	}
195	*(char **)&g_fragments_base[i*g_fragment_size] = NULL;
196	_sema_init(&g_free_fragments_sema, MAX_FRAGMENTS);
197
198	if (vchiq_init_state(state, vchiq_slot_zero, 0/*slave*/) !=
199		VCHIQ_SUCCESS) {
200		err = -EINVAL;
201		goto failed_vchiq_init;
202	}
203
204	bcm_mbox_write(BCM2835_MBOX_CHAN_VCHIQ, (unsigned int)g_slot_phys);
205
206	vchiq_log_info(vchiq_arm_log_level,
207		"vchiq_init - done (slots %x, phys %x)",
208		(unsigned int)vchiq_slot_zero, g_slot_phys);
209
210   vchiq_call_connected_callbacks();
211
212   return 0;
213
214failed_vchiq_init:
215failed_init_slots:
216failed_load:
217	bus_dmamap_unload(bcm_slots_dma_tag, bcm_slots_dma_map);
218failed_alloc:
219	bus_dmamap_destroy(bcm_slots_dma_tag, bcm_slots_dma_map);
220	bus_dma_tag_destroy(bcm_slots_dma_tag);
221
222   return err;
223}
224
225void __exit
226vchiq_platform_exit(VCHIQ_STATE_T *state)
227{
228
229	bus_dmamap_unload(bcm_slots_dma_tag, bcm_slots_dma_map);
230	bus_dmamap_destroy(bcm_slots_dma_tag, bcm_slots_dma_map);
231	bus_dma_tag_destroy(bcm_slots_dma_tag);
232}
233
234VCHIQ_STATUS_T
235vchiq_platform_init_state(VCHIQ_STATE_T *state)
236{
237   VCHIQ_STATUS_T status = VCHIQ_SUCCESS;
238   state->platform_state = kzalloc(sizeof(VCHIQ_2835_ARM_STATE_T), GFP_KERNEL);
239   ((VCHIQ_2835_ARM_STATE_T*)state->platform_state)->inited = 1;
240   status = vchiq_arm_init_state(state, &((VCHIQ_2835_ARM_STATE_T*)state->platform_state)->arm_state);
241   if(status != VCHIQ_SUCCESS)
242   {
243      ((VCHIQ_2835_ARM_STATE_T*)state->platform_state)->inited = 0;
244   }
245   return status;
246}
247
248VCHIQ_ARM_STATE_T*
249vchiq_platform_get_arm_state(VCHIQ_STATE_T *state)
250{
251   if(!((VCHIQ_2835_ARM_STATE_T*)state->platform_state)->inited)
252   {
253      BUG();
254   }
255   return &((VCHIQ_2835_ARM_STATE_T*)state->platform_state)->arm_state;
256}
257
258int
259vchiq_copy_from_user(void *dst, const void *src, int size)
260{
261
262	if (((vm_offset_t)(src)) < VM_MIN_KERNEL_ADDRESS) {
263		int error = copyin(src, dst, size);
264		return error ? VCHIQ_ERROR : VCHIQ_SUCCESS;
265	}
266	else
267		bcopy(src, dst, size);
268
269	return 0;
270}
271
272VCHIQ_STATUS_T
273vchiq_prepare_bulk_data(VCHIQ_BULK_T *bulk, VCHI_MEM_HANDLE_T memhandle,
274	void *offset, int size, int dir)
275{
276	BULKINFO_T *bi;
277	int ret;
278
279	WARN_ON(memhandle != VCHI_MEM_HANDLE_INVALID);
280	bi = malloc(sizeof(*bi), M_VCPAGELIST, M_WAITOK | M_ZERO);
281	if (bi == NULL)
282		return VCHIQ_ERROR;
283
284	ret = create_pagelist((char __user *)offset, size,
285			(dir == VCHIQ_BULK_RECEIVE)
286			? PAGELIST_READ
287			: PAGELIST_WRITE,
288			current,
289			bi);
290	if (ret != 0)
291		return VCHIQ_ERROR;
292
293	bulk->handle = memhandle;
294	bulk->data = VCHIQ_ARM_ADDRESS(bi->pagelist);
295
296	/* Store the pagelist address in remote_data, which isn't used by the
297	   slave. */
298	bulk->remote_data = bi;
299
300	return VCHIQ_SUCCESS;
301}
302
303void
304vchiq_complete_bulk(VCHIQ_BULK_T *bulk)
305{
306	if (bulk && bulk->remote_data && bulk->actual)
307		free_pagelist((BULKINFO_T *)bulk->remote_data, bulk->actual);
308}
309
310void
311vchiq_transfer_bulk(VCHIQ_BULK_T *bulk)
312{
313	/*
314	 * This should only be called on the master (VideoCore) side, but
315	 * provide an implementation to avoid the need for ifdefery.
316	 */
317	BUG();
318}
319
320void
321vchiq_dump_platform_state(void *dump_context)
322{
323	char buf[80];
324	int len;
325	len = snprintf(buf, sizeof(buf),
326		"  Platform: 2835 (VC master)");
327	vchiq_dump(dump_context, buf, len + 1);
328}
329
330VCHIQ_STATUS_T
331vchiq_platform_suspend(VCHIQ_STATE_T *state)
332{
333   return VCHIQ_ERROR;
334}
335
336VCHIQ_STATUS_T
337vchiq_platform_resume(VCHIQ_STATE_T *state)
338{
339   return VCHIQ_SUCCESS;
340}
341
342void
343vchiq_platform_paused(VCHIQ_STATE_T *state)
344{
345}
346
347void
348vchiq_platform_resumed(VCHIQ_STATE_T *state)
349{
350}
351
352int
353vchiq_platform_videocore_wanted(VCHIQ_STATE_T* state)
354{
355   return 1; // autosuspend not supported - videocore always wanted
356}
357
358int
359vchiq_platform_use_suspend_timer(void)
360{
361   return 0;
362}
363void
364vchiq_dump_platform_use_state(VCHIQ_STATE_T *state)
365{
366	vchiq_log_info(vchiq_arm_log_level, "Suspend timer not in use");
367}
368void
369vchiq_platform_handle_timeout(VCHIQ_STATE_T *state)
370{
371	(void)state;
372}
373/*
374 * Local functions
375 */
376
377static void
378pagelist_page_free(vm_page_t pp)
379{
380	vm_page_lock(pp);
381	vm_page_unwire(pp, PQ_INACTIVE);
382	if (pp->wire_count == 0 && pp->object == NULL)
383		vm_page_free(pp);
384	vm_page_unlock(pp);
385}
386
387/* There is a potential problem with partial cache lines (pages?)
388** at the ends of the block when reading. If the CPU accessed anything in
389** the same line (page?) then it may have pulled old data into the cache,
390** obscuring the new data underneath. We can solve this by transferring the
391** partial cache lines separately, and allowing the ARM to copy into the
392** cached area.
393
394** N.B. This implementation plays slightly fast and loose with the Linux
395** driver programming rules, e.g. its use of __virt_to_bus instead of
396** dma_map_single, but it isn't a multi-platform driver and it benefits
397** from increased speed as a result.
398*/
399
400static int
401create_pagelist(char __user *buf, size_t count, unsigned short type,
402	struct proc *p, BULKINFO_T *bi)
403{
404	PAGELIST_T *pagelist;
405	vm_page_t* pages;
406	unsigned long *addrs;
407	unsigned int num_pages, i;
408	vm_offset_t offset;
409	int pagelist_size;
410	char *addr, *base_addr, *next_addr;
411	int run, addridx, actual_pages;
412	int err;
413	vm_paddr_t pagelist_phys;
414
415	offset = (vm_offset_t)buf & (PAGE_SIZE - 1);
416	num_pages = (count + offset + PAGE_SIZE - 1) / PAGE_SIZE;
417
418	bi->pagelist = NULL;
419	bi->buf = buf;
420	bi->size = count;
421
422	/* Allocate enough storage to hold the page pointers and the page
423	** list
424	*/
425	pagelist_size = sizeof(PAGELIST_T) +
426		(num_pages * sizeof(unsigned long)) +
427		(num_pages * sizeof(pages[0]));
428
429	err = bus_dma_tag_create(
430	    NULL,
431	    PAGE_SIZE, 0,	       /* alignment, boundary */
432	    BUS_SPACE_MAXADDR_32BIT,    /* lowaddr */
433	    BUS_SPACE_MAXADDR,	  /* highaddr */
434	    NULL, NULL,		 /* filter, filterarg */
435	    pagelist_size, 1,		/* maxsize, nsegments */
436	    pagelist_size, 0,		/* maxsegsize, flags */
437	    NULL, NULL,		 /* lockfunc, lockarg */
438	    &bi->pagelist_dma_tag);
439
440	err = bus_dmamem_alloc(bi->pagelist_dma_tag, (void **)&pagelist,
441	    BUS_DMA_COHERENT | BUS_DMA_WAITOK, &bi->pagelist_dma_map);
442	if (err) {
443		vchiq_log_error(vchiq_core_log_level, "Unable to allocate pagelist memory");
444		err = -ENOMEM;
445		goto failed_alloc;
446	}
447
448	err = bus_dmamap_load(bi->pagelist_dma_tag, bi->pagelist_dma_map, pagelist,
449	    pagelist_size, vchiq_dmamap_cb,
450	    &pagelist_phys, 0);
451
452	if (err) {
453		vchiq_log_error(vchiq_core_log_level, "cannot load DMA map for pagelist memory");
454		err = -ENOMEM;
455		goto failed_load;
456	}
457
458	vchiq_log_trace(vchiq_arm_log_level,
459		"create_pagelist - %x (%d bytes @%p)", (unsigned int)pagelist, count, buf);
460
461	if (!pagelist)
462		return -ENOMEM;
463
464	addrs = pagelist->addrs;
465	pages = (vm_page_t*)(addrs + num_pages);
466
467	actual_pages = vm_fault_quick_hold_pages(&p->p_vmspace->vm_map,
468	    (vm_offset_t)buf, count,
469	    (type == PAGELIST_READ ? VM_PROT_WRITE : 0 ) | VM_PROT_READ, pages, num_pages);
470
471	if (actual_pages != num_pages) {
472		vm_page_unhold_pages(pages, actual_pages);
473		free(pagelist, M_VCPAGELIST);
474		return (-ENOMEM);
475	}
476
477	for (i = 0; i < actual_pages; i++) {
478		vm_page_lock(pages[i]);
479		vm_page_wire(pages[i]);
480		vm_page_unhold(pages[i]);
481		vm_page_unlock(pages[i]);
482	}
483
484	pagelist->length = count;
485	pagelist->type = type;
486	pagelist->offset = offset;
487
488	/* Group the pages into runs of contiguous pages */
489
490	base_addr = (void *)PHYS_TO_VCBUS(VM_PAGE_TO_PHYS(pages[0]));
491	next_addr = base_addr + PAGE_SIZE;
492	addridx = 0;
493	run = 0;
494
495	for (i = 1; i < num_pages; i++) {
496		addr = (void *)PHYS_TO_VCBUS(VM_PAGE_TO_PHYS(pages[i]));
497		if ((addr == next_addr) && (run < (PAGE_SIZE - 1))) {
498			next_addr += PAGE_SIZE;
499			run++;
500		} else {
501			addrs[addridx] = (unsigned long)base_addr + run;
502			addridx++;
503			base_addr = addr;
504			next_addr = addr + PAGE_SIZE;
505			run = 0;
506		}
507	}
508
509	addrs[addridx] = (unsigned long)base_addr + run;
510	addridx++;
511
512	/* Partial cache lines (fragments) require special measures */
513	if ((type == PAGELIST_READ) &&
514		((pagelist->offset & (g_cache_line_size - 1)) ||
515		((pagelist->offset + pagelist->length) &
516		(g_cache_line_size - 1)))) {
517		char *fragments;
518
519		if (down_interruptible(&g_free_fragments_sema) != 0) {
520      			free(pagelist, M_VCPAGELIST);
521			return -EINTR;
522		}
523
524		WARN_ON(g_free_fragments == NULL);
525
526		down(&g_free_fragments_mutex);
527		fragments = g_free_fragments;
528		WARN_ON(fragments == NULL);
529		g_free_fragments = *(char **) g_free_fragments;
530		up(&g_free_fragments_mutex);
531		pagelist->type =
532			 PAGELIST_READ_WITH_FRAGMENTS +
533			 (fragments - g_fragments_base)/g_fragment_size;
534	}
535
536	cpu_dcache_wbinv_range((vm_offset_t)buf, count);
537
538	bus_dmamap_sync(bi->pagelist_dma_tag, bi->pagelist_dma_map, BUS_DMASYNC_PREWRITE);
539
540	bi->pagelist = pagelist;
541
542	return 0;
543
544failed_load:
545	bus_dmamem_free(bi->pagelist_dma_tag, bi->pagelist, bi->pagelist_dma_map);
546failed_alloc:
547	bus_dma_tag_destroy(bi->pagelist_dma_tag);
548
549	return err;
550}
551
552static void
553free_pagelist(BULKINFO_T *bi, int actual)
554{
555	vm_page_t*pages;
556	unsigned int num_pages, i;
557	PAGELIST_T *pagelist;
558
559	pagelist = bi->pagelist;
560
561	vchiq_log_trace(vchiq_arm_log_level,
562		"free_pagelist - %x, %d (%lu bytes @%p)", (unsigned int)pagelist, actual, pagelist->length, bi->buf);
563
564	num_pages =
565		(pagelist->length + pagelist->offset + PAGE_SIZE - 1) /
566		PAGE_SIZE;
567
568	pages = (vm_page_t*)(pagelist->addrs + num_pages);
569
570	/* Deal with any partial cache lines (fragments) */
571	if (pagelist->type >= PAGELIST_READ_WITH_FRAGMENTS) {
572		char *fragments = g_fragments_base +
573			(pagelist->type - PAGELIST_READ_WITH_FRAGMENTS)*g_fragment_size;
574		int head_bytes, tail_bytes;
575		head_bytes = (g_cache_line_size - pagelist->offset) &
576			(g_cache_line_size - 1);
577		tail_bytes = (pagelist->offset + actual) &
578			(g_cache_line_size - 1);
579
580		if ((actual >= 0) && (head_bytes != 0)) {
581			if (head_bytes > actual)
582				head_bytes = actual;
583
584			copyout_page(pages[0],
585				pagelist->offset,
586				fragments,
587				head_bytes);
588		}
589
590		if ((actual >= 0) && (head_bytes < actual) &&
591			(tail_bytes != 0)) {
592
593			copyout_page(pages[num_pages-1],
594				(((vm_offset_t)bi->buf + actual) % PAGE_SIZE) - tail_bytes,
595				fragments + g_cache_line_size,
596				tail_bytes);
597		}
598
599		down(&g_free_fragments_mutex);
600		*(char **) fragments = g_free_fragments;
601		g_free_fragments = fragments;
602		up(&g_free_fragments_mutex);
603		up(&g_free_fragments_sema);
604	}
605
606	for (i = 0; i < num_pages; i++) {
607		if (pagelist->type != PAGELIST_WRITE) {
608			vm_page_dirty(pages[i]);
609			pagelist_page_free(pages[i]);
610		}
611	}
612
613	bus_dmamap_unload(bi->pagelist_dma_tag, bi->pagelist_dma_map);
614	bus_dmamem_free(bi->pagelist_dma_tag, bi->pagelist, bi->pagelist_dma_map);
615	bus_dma_tag_destroy(bi->pagelist_dma_tag);
616
617	free(bi, M_VCPAGELIST);
618}
619