1/*
2 * Copyright 2020, Data61, CSIRO (ABN 41 687 119 230)
3 *
4 * SPDX-License-Identifier: BSD-2-Clause
5 */
6
7/* Tool for removing metadata from a CPIO archive.
8 *
9 * The motivation behind this is to work towards idempotent builds. Part of the
10 * seL4 build system forms a CPIO archive of ELF files from the host file
11 * system. This archive inadvertently includes information like the i-node
12 * numbers and modified times of these files. This information is irrelevant at
13 * runtime, but causes the resulting image to not be binary identical between
14 * otherwise identical builds.
15 *
16 * The code that follows strips or replaces the following fields from CPIO file
17 * entries:
18 *  - i-node number
19 *  - UID
20 *  - GID
21 *  - modified time
22 */
23
24#define _XOPEN_SOURCE 700
25
26/* We deliberately use seL4's CPIO library rather than libarchive or similar so
27 * we have the same interpretation of CPIO files as seL4. This isn't strictly
28 * essential, but it's nice for testing the robustness of this library.
29 */
30#include <cpio/cpio.h>
31
32#include <assert.h>
33#include <errno.h>
34#include <stdint.h>
35#include <stdio.h>
36#include <stdlib.h>
37#include <string.h>
38#include <sys/mman.h>
39
40/* Find the pointer to a CPIO entry header from a pointer to the entry's data.
41 * This essentially reverses the transformation in cpio_get_entry.
42 */
43static void *get_header(void *data, const char *filename)
44{
45    assert((uintptr_t)data % CPIO_ALIGNMENT == 0);
46    uintptr_t p = (uintptr_t)data - strlen(filename) - 1
47                  - sizeof(struct cpio_header);
48    return (void *)(p - (p % CPIO_ALIGNMENT));
49}
50
51int main(int argc, char **argv)
52{
53    if (argc != 2) {
54        fprintf(stderr, "Usage: %s file\n"
55                " Strip meta data from a CPIO file\n", argv[0]);
56        return -1;
57    }
58
59    FILE *archive = NULL;
60    void *p = NULL;
61    long len = 0;
62
63    archive = fopen(argv[1], "r+");
64    if (archive == NULL) {
65        perror("failed to open archive");
66        goto fail;
67    }
68
69    /* Determine the size of the archive, as we'll need to mmap the whole
70     * thing.
71     */
72    if (fseek(archive, 0, SEEK_END) != 0) {
73        perror("failed to seek archive");
74        goto fail;
75    }
76    len = ftell(archive);
77    if (len == -1) {
78        perror("failed to read size of archive");
79        goto fail;
80    }
81    if (fseek(archive, 0, SEEK_SET) != 0) {
82        perror("failed to return to beginning of archive");
83        goto fail;
84    }
85
86    /* Mmap the file so we can operate on it with libcpio. */
87    p = mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_SHARED, fileno(archive), 0);
88    if (p == MAP_FAILED) {
89        perror("failed to mmap archive");
90        p = NULL;
91        goto fail;
92    }
93
94    struct cpio_info info = { .file_count = 0 };
95    int err = cpio_info(p, &info);
96    if (err != 0) {
97        fprintf(stderr, "failed to read CPIO info\n");
98        goto fail;
99    }
100
101    for (unsigned int i = 0; i < info.file_count; i++) {
102
103        /* Use libcpio to look up the entry. */
104        unsigned long size;
105        const char *filename;
106        void *data = cpio_get_entry(p, i, &filename, &size);
107        if (data == NULL) {
108            fprintf(stderr, "failed to locate entry %u\n", i);
109            goto fail;
110        }
111
112        /* Reverse the data pointer to a header pointer. */
113        struct cpio_header *header = get_header(data, filename);
114        assert((uintptr_t)header % CPIO_ALIGNMENT == 0);
115
116        /* Synthesise an i-node number. This just needs to be distinct within
117         * the archive. I-node numbers <=10 are reserved on certain file
118         * systems.
119        */
120        unsigned int inode = 11 + i;
121        snprintf(header->c_ino, sizeof(header->c_ino), "%08x", inode);
122
123        /* Set the file owned by 'root'. */
124        memset(header->c_uid, 0, sizeof(header->c_uid));
125        memset(header->c_gid, 0, sizeof(header->c_gid));
126
127        /* Blank the modified time. */
128        memset(header->c_mtime, 0, sizeof(header->c_mtime));
129    }
130
131    munmap(p, len);
132    fclose(archive);
133
134    return 0;
135
136fail:
137    if (p != NULL) {
138        munmap(p, len);
139    }
140    if (archive != NULL) {
141        fclose(archive);
142    }
143    return -1;
144}
145