1/**
2 * \file
3 * \brief CPIO archive reader routines for newc/crc/bin variants.
4 */
5
6/*
7 * Copyright (c) 2007, 2008, 2011, ETH Zurich.
8 * All rights reserved.
9 *
10 * Thi2s file is distributed under the terms in the attached LICENSE file.
11 * If you do not find this file, copies can be found by writing to:
12 * ETH Zurich D-INFK, Universitaetstrasse 6, CH-8092 Zurich. Attn: Systems Group.
13 */
14
15#include <stdint.h>
16#include <stdio.h>
17#include <string.h>
18
19#include <barrelfish/static_assert.h>
20#include <sys/param.h> // for MIN
21
22#include "cpiobin.h"
23
24typedef struct
25{
26    uint8_t magic[2];
27    uint8_t dev[2];
28    uint8_t inode[2];
29    uint8_t mode[2];
30    uint8_t uid[2];
31    uint8_t gid[2];
32    uint8_t nlink[2];
33    uint8_t rdev[2];
34    uint8_t mtime[4];
35    uint8_t namesize[2];
36    uint8_t filesize[4];
37} __attribute__((packed)) cpio_bin_header_t;
38
39STATIC_ASSERT_SIZEOF(cpio_bin_header_t, 26);
40
41typedef struct
42{
43    char magic[6];
44    char inode[8];
45    char mode[8];
46    char uid[8];
47    char gid[8];
48    char nlink[8];
49    char mtime[8];
50    char filesize[8];
51    char devmajor[8];
52    char devminor[8];
53    char rdevmajor[8];
54    char rdevminor[8];
55    char namesize[8];
56    char check[8];
57} __attribute__((packed)) cpio_newc_header_t;
58
59STATIC_ASSERT_SIZEOF(cpio_newc_header_t, 110);
60
61static const char CPIO_LAST[] = "TRAILER!!!";
62const size_t CPIO_BIN_LAST_SIZE = sizeof(cpio_bin_header_t) + sizeof(CPIO_LAST);
63const size_t CPIO_NEWC_LAST_SIZE = sizeof(cpio_newc_header_t) + sizeof(CPIO_LAST);
64
65// ----------------------------------------------------------------------------
66// Identification
67
68static inline int
69cpio_is_bin_be(const uint8_t* buffer)
70{
71    const cpio_bin_header_t* h = (const cpio_bin_header_t*)buffer;
72    return (h->magic[0] * 0x100 + h->magic[1]) == 070707;
73}
74
75static inline int
76cpio_is_bin_le(const uint8_t* buffer)
77{
78    const cpio_bin_header_t* h = (const cpio_bin_header_t*)buffer;
79    return (h->magic[0] + h->magic[1] * 0x100) == 070707;
80}
81
82static inline int
83cpio_is_newc(const uint8_t* buffer)
84{
85    const cpio_newc_header_t* h = (const cpio_newc_header_t*)buffer;
86    return strncmp(h->magic, "070701", 6) == 0;
87}
88
89static inline int
90cpio_is_crc(const uint8_t* buffer)
91{
92    const cpio_newc_header_t* h = (const cpio_newc_header_t*)buffer;
93    return strncmp(h->magic, "070702", 6) == 0;
94}
95
96static inline int
97cpio_mode_is_file(cpio_mode_bits_t m)
98{
99    return CPIO_MODE_FILE == (m & CPIO_MODE_FILE_TYPE_MASK);
100}
101
102// ----------------------------------------------------------------------------
103// Decoding
104
105static inline
106int toupper(int c)
107{
108    if (c >= 'a' && c <= 'z')
109    {
110        c = c - 'a' + 'A';
111    }
112    return c;
113}
114
115static inline
116int isxdigit(int c)
117{
118    return ((c >= '0' && c <= '9') |
119            (c >= 'A' && c <= 'F') |
120            (c >= 'a' && c <= 'f'));
121}
122
123static inline uint16_t
124c2u16(const cpio_bin_header_t* h, const uint8_t* p)
125{
126    if (cpio_is_bin_be((const uint8_t*)h))
127    {
128        return (p[0] * 0x100) + p[1];
129    }
130    else
131    {
132        return (p[1] * 0x100) + p[0];
133    }
134}
135
136static inline uint32_t
137c4u32(const cpio_bin_header_t* h, const uint8_t* p)
138{
139    if (cpio_is_bin_be((const uint8_t*)h))
140    {
141        return (p[0] * 0x1000000) + (p[1] * 0x10000) + (p[2] * 0x100) + p[3];
142    }
143    else
144    {
145        return (p[3] * 0x100 + p[2]) + (p[1] * 0x100 + p[0]) * 0x10000;
146    }
147}
148
149static inline uint32_t
150a1u8(char c)
151{
152    if (c == 0)
153    {
154        return 0;
155    }
156    else {
157        c = toupper(c);
158        if (c >= 'A' && c <= 'F')
159        {
160            return ((uint32_t)(c + 10 - 'A')) & 0xffu;
161        }
162        else if (c >= '0' || c <= '9')
163        {
164            return ((uint32_t)(c - '0')) & 0xffu;
165        }
166        else
167        {
168            return ~0;
169        }
170    }
171}
172
173static inline uint32_t
174a6u32(const char* c)
175{
176    return ((a1u8(c[0]) << 20) | (a1u8(c[1]) << 16) | (a1u8(c[2]) << 12) |
177            (a1u8(c[3]) <<  8) | (a1u8(c[4]) <<  4) | (a1u8(c[5])));
178}
179
180static inline uint32_t
181a8u32(const char* c)
182{
183    return ((a1u8(c[0]) << 28) | (a1u8(c[1]) << 24) | (a1u8(c[2]) << 20) |
184            (a1u8(c[3]) << 16) | (a1u8(c[4]) << 12) | (a1u8(c[5]) <<  8) |
185            (a1u8(c[6]) <<  4) | (a1u8(c[7])));
186}
187
188// ----------------------------------------------------------------------------
189// Legacy Format (bin) accessors
190
191static inline uintptr_t
192cpio_bin_align(uintptr_t s)
193{
194    return s + (s & 1);
195}
196
197static inline uint16_t
198cpio_bin_name_bytes(const cpio_bin_header_t* h)
199{
200    return c2u16(h, h->namesize);
201}
202
203static inline const uint8_t*
204cpio_bin_data_start(const cpio_bin_header_t* h)
205{
206    return cpio_bin_align(cpio_bin_name_bytes(h)) + ((const uint8_t*)(h + 1));
207}
208
209static inline uint32_t
210cpio_bin_data_bytes(const cpio_bin_header_t* h)
211{
212    return c4u32(h, h->filesize);
213}
214
215static inline cpio_mode_bits_t
216cpio_bin_mode(const cpio_bin_header_t* h)
217{
218    return (cpio_mode_bits_t)c2u16(h, h->mode);
219}
220
221static inline const char*
222cpio_bin_name(const cpio_bin_header_t* h)
223{
224    return (const char*)(h + 1);
225}
226
227static inline int
228cpio_valid_bin_header(const uint8_t* h)
229{
230    return cpio_is_bin_be(h) || cpio_is_bin_le(h);
231}
232
233// ----------------------------------------------------------------------------
234// newc / crc accessors
235
236static inline uintptr_t
237cpio_newc_align(uintptr_t s)
238{
239    return (s + 3) & ~3;
240}
241
242static inline uint32_t
243cpio_newc_name_bytes(const cpio_newc_header_t* h)
244{
245    return a8u32(h->namesize);
246}
247
248static inline const uint8_t*
249cpio_newc_data_start(const cpio_newc_header_t* h)
250{
251    uintptr_t h_end = (uintptr_t)(h + 1);
252    return (const uint8_t*)cpio_newc_align(cpio_newc_name_bytes(h) + h_end);
253}
254
255static inline uint32_t
256cpio_newc_data_bytes(const cpio_newc_header_t* h)
257{
258    return a8u32(h->filesize);
259}
260
261static inline cpio_mode_bits_t
262cpio_newc_mode(const cpio_newc_header_t* h)
263{
264    return (cpio_mode_bits_t)a8u32(h->mode);
265}
266
267static inline const char*
268cpio_newc_name(const cpio_newc_header_t* h)
269{
270    return (const char*)(h + 1);
271}
272
273static inline uint32_t
274cpio_newc_checksum(const cpio_newc_header_t* h)
275{
276    return a8u32(h->check);
277}
278
279static int
280is_newc_string(const uint8_t* field, size_t bytes)
281{
282    const char* s = (const char*)field;
283    size_t i = 0;
284
285    while (i < bytes)
286    {
287        if (!isxdigit(s[i]))
288        {
289            return 0;
290        }
291        i++;
292    }
293    return 1;
294}
295
296static inline int
297cpio_valid_newc_header(const uint8_t* h)
298{
299    return ((cpio_is_newc(h) || cpio_is_crc(h)) &&
300            is_newc_string(h+ 6, 8) && is_newc_string(h+14, 8) &&
301            is_newc_string(h+22, 8) && is_newc_string(h+30, 8) &&
302            is_newc_string(h+38, 8) && is_newc_string(h+46, 8) &&
303            is_newc_string(h+54, 8) && is_newc_string(h+62, 8) &&
304            is_newc_string(h+70, 8) && is_newc_string(h+78, 8) &&
305            is_newc_string(h+86, 8) && is_newc_string(h+94, 8) &&
306            (is_newc_string(h+102, 8)));
307}
308
309static int
310cpio_bin_visit(
311    const uint8_t*         cpio_base,
312    size_t                 cpio_bytes,
313    cpio_visitor_t         cv,
314    cpio_generic_header_t* g,
315    void*                  arg
316    )
317{
318    const uint8_t* cpio_limit = cpio_base + cpio_bytes;
319    int visited = 0;
320
321    while (cpio_limit - cpio_base >= CPIO_BIN_LAST_SIZE &&
322           cpio_valid_bin_header(cpio_base))
323    {
324        const cpio_bin_header_t* h = (const cpio_bin_header_t*)cpio_base;
325
326        g->mode     = cpio_bin_mode(h);
327        g->name     = cpio_bin_name(h);
328        g->data     = cpio_bin_data_start(h);
329        g->datasize = cpio_bin_data_bytes(h);
330        g->checksum = 0;
331
332        if (((g->data + g->datasize) > cpio_limit) ||
333            (cv(visited, g, arg) && (g->mode != 0)))
334        {
335            break;
336        }
337        cpio_base = (cpio_bin_data_start(h) + cpio_bin_align(cpio_bin_data_bytes(h)));
338        visited++;
339    }
340    return visited;
341}
342
343static int
344cpio_newc_visit(
345    const uint8_t*         cpio_base,
346    size_t                 cpio_bytes,
347    cpio_visitor_t         cv,
348    cpio_generic_header_t* g,
349    void*                  arg
350    )
351{
352    const uint8_t* cpio_limit = cpio_base + cpio_bytes;
353    int visited = 0;
354
355    while (cpio_limit - cpio_base >= CPIO_NEWC_LAST_SIZE &&
356           cpio_valid_newc_header(cpio_base))
357    {
358        const cpio_newc_header_t* h = (const cpio_newc_header_t*)cpio_base;
359
360        g->mode     = cpio_newc_mode(h);
361        g->name     = cpio_newc_name(h);
362        g->data     = cpio_newc_data_start(h);
363        g->datasize = cpio_newc_data_bytes(h);
364        g->checksum = cpio_newc_checksum(h);
365
366        if (((g->data + g->datasize) > cpio_limit) ||
367            (cv(visited, g, arg) && (g->mode != 0)))
368        {
369            break;
370        }
371        cpio_base = (cpio_newc_data_start(h) + cpio_newc_align(cpio_newc_data_bytes(h)));
372        visited++;
373    }
374    return visited;
375}
376
377int
378cpio_visit(
379    const uint8_t*         cpio_base,
380    size_t                 cpio_bytes,
381    cpio_visitor_t         cpio_visit_fn,
382    cpio_generic_header_t* g,
383    void*                  arg
384)
385{
386    int visited = 0;
387    if (cpio_bytes >= CPIO_BIN_LAST_SIZE &&
388        cpio_valid_bin_header(cpio_base))
389    {
390        visited = cpio_bin_visit(cpio_base, cpio_bytes, cpio_visit_fn, g, arg);
391    }
392    else if (cpio_bytes >= CPIO_NEWC_LAST_SIZE &&
393             cpio_valid_newc_header(cpio_base))
394    {
395        visited = cpio_newc_visit(cpio_base, cpio_bytes, cpio_visit_fn, g, arg);
396    }
397    return visited;
398}
399
400struct cpio_find_info
401{
402    const char* search_name;
403    int ordinal;
404
405    const cpio_generic_header_t* header; // result
406};
407
408static int
409cpio_match_name(int ordinal, const cpio_generic_header_t* header, void* arg)
410{
411    struct cpio_find_info* cfi = (struct cpio_find_info*)arg;
412    int match = !strcmp(cfi->search_name, header->name);
413    if (match)
414    {
415        cfi->ordinal = ordinal;
416        cfi->header  = header;
417    }
418    return match;
419}
420
421int
422cpio_get_file_by_name(
423    const uint8_t*  cpio_base,
424    size_t          cpio_bytes,
425    const char*     name,
426    const uint8_t** file_base,
427    size_t*         file_bytes
428    )
429{
430    cpio_generic_header_t h;
431
432    struct cpio_find_info cfi = { name, 0, NULL };
433    cpio_visit(cpio_base, cpio_bytes, cpio_match_name, &h, &cfi);
434
435    int match = (cfi.header == &h) && cpio_mode_is_file(h.mode);
436    if (match)
437    {
438        if (file_base != NULL)
439        {
440            *file_base = h.data;
441        }
442        if (file_bytes != NULL)
443        {
444            *file_bytes = h.datasize;
445        }
446    }
447    return match;
448}
449
450static int
451cpio_match_file_ordinal(
452    int                          ordinal,
453    const cpio_generic_header_t* header,
454    void*                        arg
455    )
456{
457    int match = 0;
458    if (cpio_mode_is_file(header->mode))
459    {
460        struct cpio_find_info* cfi = (struct cpio_find_info*)arg;
461
462        if (cfi->ordinal == 0)
463        {
464            cfi->header = header;
465            match = 1;
466        }
467        else
468        {
469            cfi->ordinal--;
470        }
471    }
472    return match;
473}
474
475int
476cpio_get_file_by_ordinal(
477    const uint8_t*  cpio_base,
478    size_t          cpio_bytes,
479    uint32_t        ordinal,
480    const char**    file_name,
481    const uint8_t** file_base,
482    size_t*         file_bytes
483    )
484{
485    cpio_generic_header_t h;
486    struct cpio_find_info cfi = { NULL, ordinal, NULL };
487
488    cpio_visit(cpio_base, cpio_bytes, cpio_match_file_ordinal, &h, &cfi);
489    int match = (cfi.header == &h);
490    if (match)
491    {
492        if (NULL != file_name)
493        {
494            *file_name = cfi.header->name;
495        }
496        if (NULL != file_base)
497        {
498            *file_base = cfi.header->data;
499        }
500        if (NULL != file_bytes)
501        {
502            *file_bytes = cfi.header->datasize;
503        }
504    }
505    return match;
506}
507
508size_t
509cpio_archive_bytes(
510    const uint8_t* cpio_base,
511    size_t         cpio_bytes
512    )
513{
514    // Search for trailer
515    cpio_generic_header_t g;
516    struct cpio_find_info cfi = { CPIO_LAST, 0, NULL };
517
518    cpio_visit(cpio_base, cpio_bytes, cpio_match_name, &g, &cfi);
519    if (cfi.header != NULL) {
520        const uint8_t* cpio_limit = cfi.header->data - 1;
521        // Limit may be too far because of CPIO alignment rounding up
522        // of the data start region so...
523        return MIN((size_t)(cpio_limit - cpio_base), cpio_bytes);
524    }
525    else
526    {
527        return 0;
528    }
529}
530
531static int
532cpio_crc_visitor(int ordinal, const cpio_generic_header_t* header, void *arg)
533{
534    int* found_trailer = (int*)arg;
535
536    if (header->datasize > 0)
537    {
538        // "CRC" is the unsigned 32-bit sum of file data.
539        uint32_t computed = 0;
540        for (size_t i = 0; i < header->datasize; i++)
541        {
542            computed += header->data[i];
543        }
544        // Stop visiting if mismatch.
545        return computed != header->checksum;
546    }
547    else if (header->name)
548    {
549        // Success is reaching the trailer
550        *found_trailer = !strcmp(header->name, CPIO_LAST);
551    }
552    return 0;
553}
554
555int
556cpio_archive_valid(const uint8_t* cpio_base, size_t cpio_bytes)
557{
558    if ((cpio_bytes > sizeof(cpio_newc_header_t) &&
559         cpio_is_crc(cpio_base)))
560    {
561        int found_trailer = 0;  // Success is reaching the trailer
562        cpio_generic_header_t g;
563        cpio_newc_visit(cpio_base, cpio_bytes, cpio_crc_visitor,
564                        &g, &found_trailer);
565        return found_trailer;
566    }
567
568    return cpio_archive_bytes(cpio_base, cpio_bytes) > 0;
569}
570
571#ifdef TEST_CPIO
572
573#include <malloc.h>
574#include <sys/stat.h>
575#include <fcntl.h>
576
577static void cpio_newc_test(uint8_t* data, size_t data_bytes)
578{
579    if (data_bytes >= sizeof(cpio_newc_header_t*))
580    {
581        const cpio_newc_header_t* ph = (const cpio_newc_header_t*)data;
582        printf("newc magic %08x inode %08x\n",
583               a6u32(ph->magic), a8u32(ph->inode));
584    }
585}
586
587static int
588cpio_dump_info(int ordinal, const cpio_generic_header_t* header, void* arg)
589{
590    const uint8_t* file_data  = header->data;
591    uint32_t       file_bytes = header->datasize;
592
593    printf("%3d %-16s mode %7o %p...%p [%d bytes]\n",
594           ordinal, header->name, header->mode,
595           file_data, file_data + file_bytes, file_bytes);
596
597    return 0;
598}
599
600int main(int argc, const char* argv[])
601{
602    if (argc == 2)
603    {
604        size_t read_bytes;
605        struct stat st;
606        int fd = open(argv[1], O_RDONLY);
607
608        if (0 > fd)
609        {
610            fprintf(stderr, "Could not open %s\n", argv[1]);
611            return -1;
612        }
613
614        fstat(fd, &st);
615        size_t max_bytes = st.st_size;
616
617        uint8_t* read_data = (uint8_t*)malloc(max_bytes);
618        read_bytes = read(fd, read_data, max_bytes);
619
620        printf("Read %lu bytes\n", read_bytes);
621        printf("Image Valid %d Image bytes %lu\n",
622               cpio_archive_valid(read_data, read_bytes),
623               cpio_archive_bytes(read_data, read_bytes));
624
625        cpio_generic_header_t g;
626        cpio_visit(read_data, read_bytes, cpio_dump_info, &g, NULL);
627        {
628            const char* files[] = { "bfish/tip/build/hake/Path.o", "sbin/.marker", "fred" };
629            int i;
630            for (i = 0; i < sizeof(files) / sizeof(files[0]); i++)
631            {
632                const uint8_t *my_base;
633                size_t my_bytes;
634                if (cpio_get_file_by_name(read_data, read_bytes, files[i],
635                                          &my_base, &my_bytes))
636                {
637                    printf("%s => %p...%p [%lu bytes]\n",
638                           files[i], my_base, my_base + my_bytes, my_bytes);
639                }
640                else
641                {
642                    printf("%s => FILE NOT FOUND\n", files[i]);
643                }
644            }
645        }
646
647        free(read_data);
648        close(fd);
649    }
650    return 0;
651}
652
653#endif // TEST_CPIO
654