1/*
2 * Copyright 2017, Data61
3 * Commonwealth Scientific and Industrial Research Organisation (CSIRO)
4 * ABN 41 687 119 230.
5 *
6 * This software may be distributed and modified according to the terms of
7 * the BSD 2-Clause license. Note that NO WARRANTY is provided.
8 * See "LICENSE_BSD2.txt" for details.
9 *
10 * @TAG(DATA61_BSD)
11 */
12
13/* This file contains functionality for debugging heap corruption issues. It is
14 * styled after Electric Fence [0], but we can't check as thoroughly as eFence
15 * can because we don't have support for mapping/unmapping/mprotecting regions.
16 * The basic concept is that we stick a 'canary' value before and after
17 * allocated memory. During free, we check that these canaries haven't been
18 * overwritten, which would imply buffer underflow or overflow. This checking
19 * is limited in that we don't catch heap corruption until memory is actually
20 * freed.
21 *
22 * Essentially any allocated region actually looks like this:
23 *   p           q         r
24 *   ���           ���         ���
25 *   |canary|size|memory...|canary|
26 *
27 * Internally we deal in 'unboxed' pointers, like p. Externally the caller is
28 * given 'boxed' pointers, like q. We effectively play the same tricks malloc
29 * does to store some book keeping information surrounding the malloced memory.
30 * The 'size' stored is the size the user requested, not the size including
31 * the canaries etc. We need to track this in order to calculate the pointer r
32 * on free.
33 *
34 * In addition to checking for corruption within the heap, this functionality
35 * also tracks pointers that have been allocated. If you try and free (or
36 * realloc) a pointer that was never allocated, it will be detected.
37 *
38 * To use this, you will need to instruct the linker to wrap your calls to
39 * allocation functions. You will need to append something like the following
40 * to your LD_FALGS:
41 *  -Wl,--wrap=malloc -Wl,--wrap=free -Wl,--wrap=realloc -Wl,--wrap=calloc
42 *
43 * Ensure you do not call any functions that malloc or free memory within this
44 * file or you will infinitely recurse.
45 */
46
47#include <assert.h>
48#include <autoconf.h>
49#include <sel4debug/gen_config.h>
50#include <stdint.h>
51#include <stdio.h>
52#include <stdlib.h> /* for size_t */
53#include <string.h>
54
55/* Maximum alignment of a data type. The malloc spec requires that returned
56 * pointers are aligned to this.
57 */
58#define MAX_ALIGNMENT (sizeof(void*) * 2) /* bytes */
59
60/* Random bits to OR into the pre- and post-canary values so we're not storing
61 * an exact pointer value. Note that these should be less than MAX_ALIGNMENT so
62 * that we're ORing into known 0 bits. Preserving the pointer is not actually
63 * necessary so nothing will break if you change these values to any random
64 * word-sized value.
65 */
66#define PRE_EXTRA_BITS  0x7
67#define POST_EXTRA_BITS 0x3
68
69/* Algorithms for calculating a canary value to use before and after the
70 * allocated region. The actual algorithm used here is more or less irrelevant
71 * as long as it is deterministic.
72 */
73static uintptr_t pre_canary(void *ptr)
74{
75    return ((uintptr_t)ptr) | PRE_EXTRA_BITS;
76}
77static uintptr_t post_canary(void *ptr)
78{
79    return ((uintptr_t)ptr) | POST_EXTRA_BITS;
80}
81
82typedef struct {
83    uintptr_t canary;
84    size_t size;
85} __attribute__((aligned(MAX_ALIGNMENT))) metadata_t;
86
87/* A `uintptr_t` that is not naturally aligned. It is necessary to explicitly
88 * tag unaligned pointers because, e.g., on ARM the compiler is free to
89 * transform `memcpy` calls into a variant that assumes its inputs are aligned
90 * and performs word-sized accesses. See the following for more information:
91 *   http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.faqs/4972.html
92 */
93typedef uintptr_t unaligned_uintptr_t __attribute__((aligned(1)));
94
95/* Whether we've already encountered heap corruption. See uses of this below,
96 * where we abandon all instrumentation if we've hit an error. The reasoning
97 * behind this is that error reporting functions may call malloc.
98 */
99static int err = 0;
100
101/* Adjust a size that is about to be passed to the real allocation functions in
102 * order to account for our instrumentation.
103 */
104static size_t adjust_size(size_t size)
105{
106    return size + sizeof(metadata_t) + sizeof(uintptr_t);
107}
108
109/* Wrap a (just-allocated) region with canary values. */
110static void *box(void *ptr, size_t size)
111{
112    if (ptr == NULL) {
113        return ptr;
114    }
115
116    metadata_t *pre = (metadata_t *)ptr;
117    ptr += sizeof(*pre);
118    unaligned_uintptr_t *post = ptr + size;
119
120    /* Write the leading canary. */
121    pre->canary = pre_canary(ptr);
122    pre->size = size;
123
124    /* Use memcpy for the trailing canary so we don't need to care about
125     * alignment.
126     */
127    uintptr_t canary = post_canary(ptr);
128    memcpy(post, &canary, sizeof(canary));
129
130    return ptr;
131}
132
133/* Throw an error resulting from a bad user invocation. */
134#define error(args...) do { \
135        assert(!err); /* We should not already be handling an error. */ \
136        err = 1; \
137        fprintf(stderr, args); \
138        abort(); \
139    } while (0)
140
141/* Unwrap a canary-endowed region into the original allocated pointer. */
142static void *unbox(void *ptr, void *ret_addr)
143{
144    if (ptr == NULL) {
145        return ptr;
146    }
147
148    metadata_t *pre = (metadata_t *)(ptr - sizeof(*pre));
149    unaligned_uintptr_t *post = ptr + pre->size;
150
151    /* Check the leading canary (underflow). */
152    if (pre->canary != pre_canary(ptr)) {
153        error("Leading corruption in heap memory pointed to by %p (called "
154              "prior to %p)\n", ptr, ret_addr);
155    }
156
157    /* Check the trailing canary with memcmp so we don't need to care about
158     * alignment. If the memcmp VM faults here, it's likely that you underran
159     * your buffer and overwrote the 'size' metadata, causing this function to
160     * derive an incorrect 'post' pointer.
161     */
162    uintptr_t canary = post_canary(ptr);
163    if (memcmp(post, &canary, sizeof(canary)) != 0) {
164        error("Buffer overflow in heap memory pointed to by %p (called prior "
165              "to %p)\n", ptr, ret_addr);
166    }
167
168    return (void *)pre;
169}
170
171/* Buffer for tracking currently live heap pointers. This is used to detect
172 * when the user attempts to free an invalid pointer. Note that we always track
173 * *boxed* pointers as these are the ones seen by the user.
174 */
175#ifndef CONFIG_LIBSEL4DEBUG_ALLOC_BUFFER_ENTRIES
176#define CONFIG_LIBSEL4DEBUG_ALLOC_BUFFER_ENTRIES 128
177#endif
178static uintptr_t alloced[CONFIG_LIBSEL4DEBUG_ALLOC_BUFFER_ENTRIES];
179
180/* Track the given heap pointer as currently live. */
181static void track(void *ptr)
182{
183    if (sizeof(alloced) == 0 || ptr == NULL) {
184        /* Disable tracking if we have no buffer and never track NULL. */
185        return;
186    }
187    for (unsigned int i = 0; i < sizeof(alloced) / sizeof(uintptr_t); i++) {
188        if (alloced[i] == 0) {
189            /* Found a free slot. */
190            alloced[i] = (uintptr_t)ptr;
191            return;
192        }
193    }
194    /* Failed to find a free slot. */
195    error("Exhausted pointer tracking buffer; try increasing "
196          "CONFIG_LIBSEL4DEBUG_ALLOC_BUFFER_ENTRIES value\n");
197}
198
199/* Stop tracking the given pointer (mark it as dead). */
200static void untrack(void *ptr, void *ret_addr)
201{
202    if (sizeof(alloced) == 0 || ptr == NULL) {
203        /* Ignore tracking if we have no buffer or are freeing NULL. */
204        return;
205    }
206    for (unsigned int i = 0; i < sizeof(alloced) / sizeof(uintptr_t); i++) {
207        if (alloced[i] == (uintptr_t)ptr) {
208            /* Found it. */
209            alloced[i] = 0;
210            return;
211        }
212    }
213    /* Failed to find it. */
214    error("Attempt to free pointer %p that was never malloced (called prior "
215          "to %p)\n", ptr, ret_addr);
216}
217
218/* Wrapped functions that will be exported to us from libmuslc. */
219void *__real_malloc(size_t size);
220void __real_free(void *ptr);
221void *__real_calloc(size_t num, size_t size);
222void *__real_realloc(void *ptr, size_t size);
223
224/* Actual allocation wrappers follow. */
225
226void *__wrap_malloc(size_t size)
227{
228    if (err) {
229        return __real_malloc(size);
230    }
231
232    size_t new_size = adjust_size(size);
233    void *ptr = __real_malloc(new_size);
234    ptr = box(ptr, size);
235    track(ptr);
236    return ptr;
237}
238
239void __wrap_free(void *ptr)
240{
241    if (err) {
242        __real_free(ptr);
243        return;
244    }
245
246    void *ret = __builtin_extract_return_addr(__builtin_return_address(0));
247
248    untrack(ptr, ret);
249
250    /* Write garbage all over the region we were handed back to try to expose
251     * use-after-free bugs. If we fault while doing this, it probably means the
252     * user underran their buffer and overwrote the 'size' metadata.
253     */
254    metadata_t *pre = (metadata_t *)(ptr - sizeof(*pre));
255    for (unsigned int i = 0; i < pre->size; i++) {
256        ((char *)ptr)[i] ^= (char)~i;
257    }
258
259    ptr = unbox(ptr, ret);
260    __real_free(ptr);
261}
262
263void *__wrap_calloc(size_t num, size_t size)
264{
265    if (err) {
266        return __real_calloc(num, size);
267    }
268
269    size_t sz = adjust_size(num * size);
270    size_t new_num = sz / size;
271    if (sz % size != 0) {
272        new_num++;
273    }
274
275    void *ptr = __real_calloc(new_num, size);
276    ptr = box(ptr, num * size);
277    track(ptr);
278    return ptr;
279}
280
281void *__wrap_realloc(void *ptr, size_t size)
282{
283    if (err) {
284        return __real_realloc(ptr, size);
285    }
286
287    void *ret = __builtin_extract_return_addr(__builtin_return_address(0));
288
289    untrack(ptr, ret);
290    ptr = unbox(ptr, ret);
291    size_t new_size = adjust_size(size);
292    ptr = __real_realloc(ptr, new_size);
293    ptr = box(ptr, size);
294    track(ptr);
295    return ptr;
296}
297