1/* 2 * Copyright 2020, Data61, CSIRO (ABN 41 687 119 230) 3 * 4 * SPDX-License-Identifier: BSD-2-Clause 5 */ 6 7/* Tool for removing metadata from a CPIO archive. 8 * 9 * The motivation behind this is to work towards idempotent builds. Part of the 10 * seL4 build system forms a CPIO archive of ELF files from the host file 11 * system. This archive inadvertently includes information like the i-node 12 * numbers and modified times of these files. This information is irrelevant at 13 * runtime, but causes the resulting image to not be binary identical between 14 * otherwise identical builds. 15 * 16 * The code that follows strips or replaces the following fields from CPIO file 17 * entries: 18 * - i-node number 19 * - UID 20 * - GID 21 * - modified time 22 */ 23 24#define _XOPEN_SOURCE 700 25 26/* We deliberately use seL4's CPIO library rather than libarchive or similar so 27 * we have the same interpretation of CPIO files as seL4. This isn't strictly 28 * essential, but it's nice for testing the robustness of this library. 29 */ 30#include <cpio/cpio.h> 31 32#include <assert.h> 33#include <errno.h> 34#include <stdint.h> 35#include <stdio.h> 36#include <stdlib.h> 37#include <string.h> 38#include <sys/mman.h> 39 40/* Find the pointer to a CPIO entry header from a pointer to the entry's data. 41 * This essentially reverses the transformation in cpio_get_entry. 42 */ 43static void *get_header(void *data, const char *filename) 44{ 45 assert((uintptr_t)data % CPIO_ALIGNMENT == 0); 46 uintptr_t p = (uintptr_t)data - strlen(filename) - 1 47 - sizeof(struct cpio_header); 48 return (void *)(p - (p % CPIO_ALIGNMENT)); 49} 50 51int main(int argc, char **argv) 52{ 53 if (argc != 2) { 54 fprintf(stderr, "Usage: %s file\n" 55 " Strip meta data from a CPIO file\n", argv[0]); 56 return -1; 57 } 58 59 FILE *archive = NULL; 60 void *p = NULL; 61 long len = 0; 62 63 archive = fopen(argv[1], "r+"); 64 if (archive == NULL) { 65 perror("failed to open archive"); 66 goto fail; 67 } 68 69 /* Determine the size of the archive, as we'll need to mmap the whole 70 * thing. 71 */ 72 if (fseek(archive, 0, SEEK_END) != 0) { 73 perror("failed to seek archive"); 74 goto fail; 75 } 76 len = ftell(archive); 77 if (len == -1) { 78 perror("failed to read size of archive"); 79 goto fail; 80 } 81 if (fseek(archive, 0, SEEK_SET) != 0) { 82 perror("failed to return to beginning of archive"); 83 goto fail; 84 } 85 86 /* Mmap the file so we can operate on it with libcpio. */ 87 p = mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_SHARED, fileno(archive), 0); 88 if (p == MAP_FAILED) { 89 perror("failed to mmap archive"); 90 p = NULL; 91 goto fail; 92 } 93 94 struct cpio_info info = { .file_count = 0 }; 95 int err = cpio_info(p, &info); 96 if (err != 0) { 97 fprintf(stderr, "failed to read CPIO info\n"); 98 goto fail; 99 } 100 101 for (unsigned int i = 0; i < info.file_count; i++) { 102 103 /* Use libcpio to look up the entry. */ 104 unsigned long size; 105 const char *filename; 106 void *data = cpio_get_entry(p, i, &filename, &size); 107 if (data == NULL) { 108 fprintf(stderr, "failed to locate entry %u\n", i); 109 goto fail; 110 } 111 112 /* Reverse the data pointer to a header pointer. */ 113 struct cpio_header *header = get_header(data, filename); 114 assert((uintptr_t)header % CPIO_ALIGNMENT == 0); 115 116 /* Synthesise an i-node number. This just needs to be distinct within 117 * the archive. I-node numbers <=10 are reserved on certain file 118 * systems. 119 */ 120 unsigned int inode = 11 + i; 121 snprintf(header->c_ino, sizeof(header->c_ino), "%08x", inode); 122 123 /* Set the file owned by 'root'. */ 124 memset(header->c_uid, 0, sizeof(header->c_uid)); 125 memset(header->c_gid, 0, sizeof(header->c_gid)); 126 127 /* Blank the modified time. */ 128 memset(header->c_mtime, 0, sizeof(header->c_mtime)); 129 } 130 131 munmap(p, len); 132 fclose(archive); 133 134 return 0; 135 136fail: 137 if (p != NULL) { 138 munmap(p, len); 139 } 140 if (archive != NULL) { 141 fclose(archive); 142 } 143 return -1; 144} 145