1/*
2 * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 * Copyright 2013, winocm. <winocm@icloud.com>
30 * All rights reserved.
31 *
32 * Redistribution and use in source and binary forms, with or without modification,
33 * are permitted provided that the following conditions are met:
34 *
35 *   Redistributions of source code must retain the above copyright notice, this
36 *   list of conditions and the following disclaimer.
37 *
38 *   Redistributions in binary form must reproduce the above copyright notice, this
39 *   list of conditions and the following disclaimer in the documentation and/or
40 *   other materials provided with the distribution.
41 *
42 *   If you are going to use this software in any form that does not involve
43 *   releasing the source to this project or improving it, let me know beforehand.
44 *
45 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
46 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
47 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
48 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
49 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
50 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
51 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
52 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
53 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
54 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
55 */
56/*
57 * ARMv7 Tagged TLB support (ASID) for pmap.
58 */
59
60#include <mach_debug.h>
61#include <debug.h>
62#include <mach/vm_types.h>
63#include <mach/vm_param.h>
64#include <mach/thread_status.h>
65#include <kern/misc_protos.h>
66#include <kern/assert.h>
67#include <kern/cpu_number.h>
68#include <kern/thread.h>
69#include <arm/pmap.h>
70#include <arm/misc_protos.h>
71#include <kern/ledger.h>
72#include <kern/zalloc.h>
73#include <kern/lock.h>
74#include <kern/kalloc.h>
75#include <vm/vm_protos.h>
76#include <vm/vm_map.h>
77#include <vm/vm_kern.h>
78#include <mach/vm_param.h>
79#include <mach/vm_prot.h>
80#include <vm/vm_object.h>
81#include <vm/vm_page.h>
82#include <arm/cpu_capabilities.h>
83#include <arm/arch.h>
84#include <mach/branch_predicates.h>
85#include <arm/mp.h>
86#include <arm/cpufunc.h>
87#include "proc_reg.h"
88
89/*
90 * asid (Process context identifier) aka tagged TLB support.
91 * On processors with this feature, unless disabled via the -pmap_asid_disable
92 * boot-arg, the following algorithm is in effect:
93 * Each processor maintains an array of tag refcounts indexed by tag.
94 * Each address space maintains an array of tags indexed by CPU number.
95 * Each address space maintains a coherency vector, indexed by CPU
96 * indicating that the TLB state for that address space has a pending
97 * invalidation.
98 * On a context switch, a refcounted tag is lazily assigned to the newly
99 * dispatched (CPU, address space) tuple.
100 * When an inactive address space is invalidated on a remote CPU, it is marked
101 * for invalidation upon the next dispatch. Some invalidations are
102 * also processed at the user/kernel boundary.
103 * Provisions are made for the case where a CPU is overcommmitted, i.e.
104 * more active address spaces exist than the number of logical tags
105 * provided for by the processor architecture (currently 4096).
106 * The algorithm assumes the processor remaps the logical tags
107 * to physical TLB context IDs in an LRU fashion for efficiency. (DRK '10)
108 *
109 * asid support was originally used in x86_64, but has been adapted for use in
110 * ARMv7 platforms.
111 */
112
113boolean_t pmap_asid_disabled = FALSE;
114
115#define PMAP_INVALID ((pmap_t)0xDEAD7347)
116#define PMAP_ASID_INVALID_ASID	(0xDEAD)
117#define	PMAP_ASID_MAX_REFCOUNT (0xF0)
118#define	PMAP_ASID_MIN_ASID (1)
119
120uint32_t pmap_asid_ncpus = 0;
121
122void pmap_asid_invalidate_all_cpus(pmap_t tpmap) {
123    unsigned i;
124    assert((sizeof(tpmap->pmap_asid_coherency_vector) >= real_ncpus) && (!(sizeof(tpmap->pmap_asid_coherency_vector) & 7)));
125	for (i = 0; i < real_ncpus; i+=8) {
126          *(uint64_t *)(uintptr_t)&tpmap->pmap_asid_coherency_vector[i] = (~0ULL);
127    }
128}
129
130void pmap_asid_validate_current(void) {
131    int	ccpu = cpu_number();
132    volatile uint8_t *cptr = cpu_datap(ccpu)->cpu_pmap_asid_coherentp;
133    assert(cptr == &(current_thread()->map->pmap->pmap_asid_coherency_vector[ccpu]));
134    if (cptr) {
135        *cptr = 0;
136    }
137}
138
139void pmap_asid_invalidate_cpu(pmap_t tpmap, int ccpu) {
140	tpmap->pmap_asid_coherency_vector[ccpu] = 0xFF;
141}
142
143void pmap_asid_validate_cpu(pmap_t tpmap, int ccpu) {
144	tpmap->pmap_asid_coherency_vector[ccpu] = 0;
145}
146
147void pmap_asid_configure(void)
148{
149    int ccpu = cpu_number();
150
151    kprintf("[ASID configuration start]\n");
152    printf("PMAP: enabled ASID support\n");
153
154    assert(ml_get_interrupts_enabled() == FALSE || get_preemption_level() != 0);
155    if(!pmap_asid_disabled) {
156        if (OSIncrementAtomic(&pmap_asid_ncpus) == machine_info.max_cpus) {
157            kprintf("All ASID/asids enabled: real_ncpus: %d, pmap_asid_ncpus: %d\n", real_ncpus, pmap_asid_ncpus);
158        }
159
160        arm_tlb_flushID();
161
162        cpu_datap(ccpu)->cpu_pmap_asid_coherentp =
163            cpu_datap(ccpu)->cpu_pmap_asid_coherentp_kernel =
164            &(kernel_pmap->pmap_asid_coherency_vector[ccpu]);
165        cpu_datap(ccpu)->cpu_asid_refcounts[0] = 1;
166    }
167}
168
169void pmap_asid_initialize(pmap_t p) {
170    unsigned i;
171    unsigned nc = sizeof(p->pmap_asid_cpus)/sizeof(asid_t);
172
173    assert(nc >= real_ncpus);
174    for (i = 0; i < nc; i++) {
175        p->pmap_asid_cpus[i] = PMAP_ASID_INVALID_ASID;
176    }
177}
178
179void pmap_asid_initialize_kernel(pmap_t p) {
180    unsigned i;
181    unsigned nc = sizeof(p->pmap_asid_cpus)/sizeof(asid_t);
182
183    for (i = 0; i < nc; i++) {
184        p->pmap_asid_cpus[i] = 0;
185    }
186}
187
188asid_t  pmap_asid_allocate_asid(int ccpu) {
189    int i;
190    asid_ref_t  cur_min = 0xFF;
191    uint32_t    cur_min_index = ~1;
192    asid_ref_t  *cpu_asid_refcounts = &cpu_datap(ccpu)->cpu_asid_refcounts[0];
193    asid_ref_t  old_count;
194
195    if ((i = cpu_datap(ccpu)->cpu_asid_free_hint) != 0) {
196        if (cpu_asid_refcounts[i] == 0) {
197            (void)__sync_fetch_and_add(&cpu_asid_refcounts[i], 1);
198            cpu_datap(ccpu)->cpu_asid_free_hint = 0;
199            return i;
200        }
201    }
202    /* Linear scan to discover free slot, with hint. Room for optimization
203     * but with intelligent prefetchers this should be
204     * adequately performant, as it is invoked
205     * only on first dispatch of a new address space onto
206     * a given processor. DRKTODO: use larger loads and
207     * zero byte discovery -- any pattern != ~1 should
208     * signify a free slot.
209     */
210    for (i = PMAP_ASID_MIN_ASID; i < PMAP_ASID_MAX_ASID; i++) {
211        asid_ref_t cur_refcount = cpu_asid_refcounts[i];
212
213        assert(cur_refcount < PMAP_ASID_MAX_REFCOUNT);
214
215        if (cur_refcount == 0) {
216            (void)__sync_fetch_and_add(&cpu_asid_refcounts[i], 1);
217            return i;
218        }
219        else {
220            if (cur_refcount < cur_min) {
221                cur_min_index = i;
222                cur_min = cur_refcount;
223            }
224        }
225    }
226    assert(cur_min_index > 0 && cur_min_index < PMAP_ASID_MAX_ASID);
227    /* Consider "rebalancing" tags actively in highly oversubscribed cases
228     * perhaps selecting tags with lower activity.
229     */
230
231    old_count = __sync_fetch_and_add(&cpu_asid_refcounts[cur_min_index], 1);
232    assert(old_count < PMAP_ASID_MAX_REFCOUNT);
233    return cur_min_index;
234}
235
236void    pmap_asid_deallocate_asid(int ccpu, pmap_t tpmap) {
237    asid_t asid;
238    pmap_t lp;
239    asid_ref_t prior_count;
240
241    asid = tpmap->pmap_asid_cpus[ccpu];
242    assert(asid != PMAP_ASID_INVALID_ASID);
243    if (asid == PMAP_ASID_INVALID_ASID)
244        return;
245
246    lp = cpu_datap(ccpu)->cpu_asid_last_pmap_dispatched[asid];
247    assert(asid > 0 && asid < PMAP_ASID_MAX_ASID);
248    assert(cpu_datap(ccpu)->cpu_asid_refcounts[asid] >= 1);
249
250    if (lp == tpmap)
251        (void)__sync_bool_compare_and_swap(&cpu_datap(ccpu)->cpu_asid_last_pmap_dispatched[asid], tpmap, PMAP_INVALID);
252
253    if ((prior_count = __sync_fetch_and_sub(&cpu_datap(ccpu)->cpu_asid_refcounts[asid], 1)) == 1) {
254            cpu_datap(ccpu)->cpu_asid_free_hint = asid;
255    }
256    assert(prior_count <= PMAP_ASID_MAX_REFCOUNT);
257}
258
259void    pmap_destroy_asid_sync(pmap_t p) {
260    int i;
261    assert(ml_get_interrupts_enabled() == FALSE || get_preemption_level() !=0);
262    for (i = 0; i < PMAP_ASID_MAX_CPUS; i++)
263        if (p->pmap_asid_cpus[i] != PMAP_ASID_INVALID_ASID)
264            pmap_asid_deallocate_asid(i, p);
265}
266
267asid_t  asid_for_pmap_cpu_tuple(pmap_t pmap, int ccpu) {
268    return pmap->pmap_asid_cpus[ccpu];
269}
270
271void    pmap_asid_activate(pmap_t tpmap, int ccpu) {
272    asid_t      new_asid = tpmap->pmap_asid_cpus[ccpu];
273    pmap_t      last_pmap;
274    boolean_t   asid_conflict = FALSE, pending_flush = FALSE;
275
276    if (__improbable(new_asid == PMAP_ASID_INVALID_ASID)) {
277        new_asid = tpmap->pmap_asid_cpus[ccpu] = pmap_asid_allocate_asid(ccpu);
278    }
279    assert(new_asid != PMAP_ASID_INVALID_ASID);
280    cpu_datap(ccpu)->cpu_active_asid = new_asid;
281
282    pending_flush = (tpmap->pmap_asid_coherency_vector[ccpu] != 0);
283    if (__probable(pending_flush == FALSE)) {
284        last_pmap = cpu_datap(ccpu)->cpu_asid_last_pmap_dispatched[new_asid];
285        asid_conflict = ((last_pmap != NULL) &&(tpmap != last_pmap));
286    }
287    if (__improbable(pending_flush || asid_conflict)) {
288        pmap_asid_validate_cpu(tpmap, ccpu);
289    }
290    /* Consider making this a unique id */
291    cpu_datap(ccpu)->cpu_asid_last_pmap_dispatched[new_asid] = tpmap;
292
293    assert(new_asid < PMAP_ASID_MAX_ASID);
294    assert(((tpmap ==  kernel_pmap) && new_asid == 0) || ((new_asid != PMAP_ASID_INVALID_ASID) && (new_asid != 0)));
295    tpmap->pm_asid = new_asid;
296
297    if (!pending_flush) {
298        pending_flush = (tpmap->pmap_asid_coherency_vector[ccpu] != 0);
299        if (__improbable(pending_flush != 0)) {
300            pmap_asid_validate_cpu(tpmap, ccpu);
301            tpmap->pm_asid = new_asid;
302            kprintf("pending_flush not needed, handle this! new_asid: %x\n", new_asid);
303        }
304    }
305    cpu_datap(ccpu)->cpu_pmap_asid_coherentp = &(tpmap->pmap_asid_coherency_vector[ccpu]);
306}
307