1// Copyright 2017 The Fuchsia Authors
2//
3// Use of this source code is governed by a MIT-style
4// license that can be found in the LICENSE file or at
5// https://opensource.org/licenses/MIT
6
7#include <lib/memory_limit.h>
8
9#include <assert.h>
10#include <err.h>
11#include <inttypes.h>
12#include <iovec.h>
13#include <kernel/cmdline.h>
14#include <vm/vm.h>
15#include <fbl/algorithm.h>
16#include <platform.h>
17#include <stdio.h>
18#include <string.h>
19#include <trace.h>
20
21#define LOCAL_TRACE 0
22
23zx_status_t mem_limit_init(mem_limit_ctx_t* ctx) {
24    if (!ctx) {
25        return ZX_ERR_INVALID_ARGS;
26    }
27
28    uint64_t limit = cmdline_get_uint64("kernel.memory-limit-mb", 0u);
29    if (limit) {
30        printf("Kernel memory limit of %zu MB found.\n", limit);
31        ctx->memory_limit = limit * MB;
32        ctx->found_kernel = 0;
33        ctx->found_ramdisk = 0;
34        return ZX_OK;
35    }
36
37    return ZX_ERR_NOT_SUPPORTED;
38}
39
40zx_status_t mem_limit_get_iovs(mem_limit_ctx_t* ctx, uintptr_t range_base, size_t range_size,
41                               iovec_t iovs[], size_t* used_cnt) {
42    DEBUG_ASSERT(ctx);
43    DEBUG_ASSERT(iovs);
44    DEBUG_ASSERT(used_cnt);
45
46    if (range_size == 0 || ctx->memory_limit == 0) {
47        /* If our limit has been reached this range can be skipped */
48        *used_cnt = 0;
49        return ZX_OK;
50    }
51
52    LTRACEF("scanning range %" PRIxPTR " of size %zu, (kernel start %#" PRIxPTR " limit %zu\n",
53            range_base, range_size, ctx->kernel_base, ctx->memory_limit);
54    // Convenience values for the offsets and sizes within the range.
55    // These correspond to the two ranges that might be built to represent
56    // a pair of ranges that correspond to a kernel and a ramdisk. They're
57    // used instead of iovs[] directly to avoid casting for (void*) math.
58    uintptr_t low_base, high_base = 0;
59    size_t low_len, high_len = 0;
60
61    /* This is where things get more complicated if we found the kernel_iov. On both
62     * x86 and ARM the kernel and ramdisk will exist in the same memory range.
63     * On x86 this is the lowmem region below 4GB based on where UEFI's page
64     * allocations placed it. For ARM, it depends on the platform's bootrom, but
65     * the important detail is that they both should be in the same contiguous
66     * block of DRAM. Either way, we know the kernel + bss needs to be included
67     * in memory regardless so that's the first priority.
68     *
69     * If we booted in the first place then we can assume we have enough space
70     * for ourselves. k_low/k_high/r_high represent spans as follows:
71     * |base|<k_low>[kernel]<k_high>[ramdisk]<r_high>[end]>
72     *
73     * Alternatively, if there is no ramdisk then the situation looks more like:
74     * |base|<k_low>[kernel]<k_high>[end]
75     *
76     * TODO: when kernel relocation exists this will need to handle the ramdisk
77     * being before the kernel_iov, as well as them possibly being in different
78     * ranges.
79     */
80    uintptr_t k_base = ctx->kernel_base;
81    size_t k_size = ctx->kernel_size;
82    uintptr_t k_end = k_base + k_size;
83    uintptr_t range_end = range_base + range_size;
84    if (range_base <= k_base && k_base < range_end) {
85        // First set up the kernel low/high for the spans we care about
86        uint64_t k_low = k_base - range_base;
87        uint64_t k_high = range_end;
88        uint64_t tmp, r_high;
89        low_base = k_base;
90        low_len = k_size;
91        ctx->memory_limit -= k_size;
92        LTRACEF("kernel base %#" PRIxPTR " size %#" PRIxPTR "\n", k_base, k_size);
93
94        // Add the ramdisk, but warn the user if we have to expand the limit to fit it in memory
95        if (ctx->ramdisk_size && ctx->ramdisk_base >= range_base &&
96                ctx->ramdisk_base + ctx->ramdisk_size <= range_end) {
97            uintptr_t r_base = ctx->ramdisk_base;
98            uintptr_t r_end = r_base + ctx->ramdisk_size;
99            LTRACEF("ramdisk base %" PRIxPTR " size %" PRIxPTR "\n", r_base, ctx->ramdisk_size);
100            tmp = fbl::min(ctx->memory_limit, ctx->ramdisk_size);
101            if (tmp != ctx->ramdisk_size) {
102                size_t diff = ctx->ramdisk_size - ctx->memory_limit;
103                printf("WARNING: ramdisk forces the system to exceed the system memory limit"
104                       "of %zu bytes by %zu bytes!\n", ctx->memory_limit, diff);
105                ctx->memory_limit += diff;
106                tmp = ctx->ramdisk_size;
107            }
108            high_base = r_base;
109            high_len = tmp;
110            ctx->memory_limit -= tmp;
111
112            // If a ramdisk is found then the kernel ends at the ramdisk's base
113            // rather than at the end of the range
114            k_high = r_base - k_end;
115            r_high = range_end - r_end;
116            ctx->found_ramdisk = true;
117        } else {
118            // Set r_high to zero here so that the checks later to expand the
119            // high vector work without any special casing.
120            r_high = 0;
121        }
122
123        // We've created our kernel and ramdisk vecs, and now we expand them as
124        // much as possible within the imposed limit, starting with the k_high
125        // gap between the kernel and ramdisk_iov.
126        tmp = fbl::min(ctx->memory_limit, k_high);
127        if (tmp) {
128            LTRACEF("growing low iov by %zu bytes.\n", tmp);
129            low_len += tmp;
130            ctx->memory_limit -= tmp;
131        }
132
133        // Handle space between the start of the range and the kernel base
134        tmp = fbl::min(ctx->memory_limit, k_low);
135        if (tmp) {
136            low_base -= tmp;
137            low_len += tmp;
138            ctx->memory_limit -= tmp;
139            LTRACEF("moving low iov base back by %zu to %#" PRIxPTR ".\n",
140                    tmp, low_base);
141        }
142
143        // At this point we have already expanded the vector containing the
144        // kernel as much as we can, so low_base + low_len either ends at the
145        // start of the ramdisk, the end of the range, or the end of our memory
146        // limit. If we still have any memory left that we're allowed to use and
147        // there's space between the end of the ramdisk and end of the range,
148        // then we can attempt to grow that the high vector by the difference.
149        tmp = fbl::min(ctx->memory_limit, r_high);
150        if (tmp) {
151            LTRACEF("growing high iov by %zu bytes.\n", tmp);
152            high_len += tmp;
153            ctx->memory_limit -= tmp;
154        }
155
156        // Collapse the kernel and ramdisk into a single io vector if they're
157        // adjacent to each other.
158        if (low_base + low_len == high_base) {
159            low_len += high_len;
160            high_base = 0;
161            high_len = 0;
162            LTRACEF("Merging both iovs into a single iov base %#" PRIxPTR " size %zu\n",
163                    low_base, low_len);
164        }
165
166        ctx->found_kernel = true;
167    } else {
168        // Set an adjusted local limit for the current range we're scanning
169        // based on whether we have found the kernel and ramdisk yet. If we
170        // haven't then we need to set aside space for them in future ranges by
171        // restricting the space used by this range's vectors.
172        size_t adjusted_limit = ctx->memory_limit;
173
174        if (!ctx->found_kernel) {
175            adjusted_limit -= fbl::min(ctx->kernel_size, adjusted_limit);
176            if (ctx->ramdisk_size) {
177                adjusted_limit -= fbl::min(ctx->ramdisk_size, adjusted_limit);
178            }
179        }
180
181        LTRACEF("adjusted limit of %zu being used (found_kernel: %d, found_ramdisk: %d)\n", adjusted_limit, ctx->found_kernel, ctx->found_ramdisk);
182        // No kernel here, presumably no ramdisk. Just add what we can.
183        uint64_t tmp = fbl::min(adjusted_limit, range_size);
184        low_base = range_base;
185        low_len = tmp;
186        ctx->memory_limit -= tmp;
187        LTRACEF("using %zu bytes from base %#" PRIxPTR "\n", low_len, low_base);
188    }
189
190    DEBUG_ASSERT(low_base >= range_base);
191    DEBUG_ASSERT(high_base == 0 || high_base >= range_base);
192    DEBUG_ASSERT(low_base + low_len <= range_end);
193    DEBUG_ASSERT(high_base + high_len <= range_end);
194    DEBUG_ASSERT(low_len + high_len <= range_size);
195
196    // Build the iovs with the ranges figured out above
197    iovs[0].iov_base = reinterpret_cast<void*>(low_base);
198    iovs[0].iov_len = ROUNDUP_PAGE_SIZE(low_len);
199    iovs[1].iov_base = reinterpret_cast<void*>(high_base);
200    iovs[1].iov_len = ROUNDUP_PAGE_SIZE(high_len);
201
202    // Set the count to 0 through 2 depending on vectors used
203    *used_cnt = !!(iovs[0].iov_len) + !!(iovs[1].iov_len);
204
205    LTRACEF("used %zu iov%s remaining memory %zu bytes\n", *used_cnt, (*used_cnt == 1) ? "," : "s,", ctx->memory_limit);
206    return ZX_OK;
207}
208
209zx_status_t mem_limit_add_arenas_from_range(mem_limit_ctx_t* ctx, uintptr_t range_base,
210                                            size_t range_size, pmm_arena_info_t arena_template) {
211    size_t used;
212    iovec_t vecs[2];
213    zx_status_t status = mem_limit_get_iovs(ctx, range_base, range_size, vecs, &used);
214
215    if (status != ZX_OK) {
216        return status;
217    }
218
219    // Use the arena template and add any we created from this range to the pmm
220    for (size_t i = 0; i < used; i++) {
221        auto arena = arena_template;
222        arena.base = reinterpret_cast<paddr_t>(vecs[i].iov_base);
223        arena.size = vecs[i].iov_len;
224
225        status = pmm_add_arena(&arena);
226
227        // If either vector failed then abort the rest of the operation. There is no
228        // valid situation where only the second vector is used.
229        if (status != ZX_OK) {
230            break;
231        }
232    }
233
234    return status;
235}
236