1/*-
2 * Copyright (c) 2008 Christos Zoulas
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
15 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
16 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
18 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
19 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
20 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
21 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
22 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
23 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24 * POSSIBILITY OF SUCH DAMAGE.
25 */
26/*
27 * Parse Composite Document Files, the format used in Microsoft Office
28 * document files before they switched to zipped XML.
29 * Info from: http://sc.openoffice.org/compdocfileformat.pdf
30 *
31 * N.B. This is the "Composite Document File" format, and not the
32 * "Compound Document Format", nor the "Channel Definition Format".
33 */
34
35#include "file.h"
36
37#ifndef lint
38FILE_RCSID("@(#)$File: cdf.c,v 1.123 2022/09/24 20:30:13 christos Exp $")
39#endif
40
41#include <assert.h>
42#ifdef CDF_DEBUG
43#include <err.h>
44#endif
45#include <stdlib.h>
46#include <unistd.h>
47#include <string.h>
48#include <time.h>
49#include <ctype.h>
50#include <limits.h>
51#ifdef HAVE_BYTESWAP_H
52#include <byteswap.h>
53#endif
54#ifdef HAVE_SYS_BSWAP_H
55#include <sys/bswap.h>
56#endif
57
58#ifndef EFTYPE
59#define EFTYPE EINVAL
60#endif
61
62#ifndef SIZE_T_MAX
63#define SIZE_T_MAX CAST(size_t, ~0ULL)
64#endif
65
66#include "cdf.h"
67
68#ifdef CDF_DEBUG
69#define DPRINTF(a) printf a, fflush(stdout)
70#else
71#define DPRINTF(a)
72#endif
73
74static union {
75	char s[4];
76	uint32_t u;
77} cdf_bo;
78
79#define NEED_SWAP	(cdf_bo.u == CAST(uint32_t, 0x01020304))
80
81#define CDF_TOLE8(x)	\
82    (CAST(uint64_t, NEED_SWAP ? _cdf_tole8(x) : CAST(uint64_t, x)))
83#define CDF_TOLE4(x)	\
84    (CAST(uint32_t, NEED_SWAP ? _cdf_tole4(x) : CAST(uint32_t, x)))
85#define CDF_TOLE2(x)	\
86    (CAST(uint16_t, NEED_SWAP ? _cdf_tole2(x) : CAST(uint16_t, x)))
87#define CDF_TOLE(x)	(/*CONSTCOND*/sizeof(x) == 2 ? \
88			    CDF_TOLE2(CAST(uint16_t, x)) : \
89			(/*CONSTCOND*/sizeof(x) == 4 ? \
90			    CDF_TOLE4(CAST(uint32_t, x)) : \
91			    CDF_TOLE8(CAST(uint64_t, x))))
92#define CDF_GETUINT32(x, y)	cdf_getuint32(x, y)
93
94#define CDF_MALLOC(n) cdf_malloc(__FILE__, __LINE__, (n))
95#define CDF_REALLOC(p, n) cdf_realloc(__FILE__, __LINE__, (p), (n))
96#define CDF_CALLOC(n, u) cdf_calloc(__FILE__, __LINE__, (n), (u))
97
98
99/*ARGSUSED*/
100static void *
101cdf_malloc(const char *file __attribute__((__unused__)),
102    size_t line __attribute__((__unused__)), size_t n)
103{
104	DPRINTF(("%s,%" SIZE_T_FORMAT "u: %s %" SIZE_T_FORMAT "u\n",
105	    file, line, __func__, n));
106	if (n == 0)
107	    n++;
108	return malloc(n);
109}
110
111/*ARGSUSED*/
112static void *
113cdf_realloc(const char *file __attribute__((__unused__)),
114    size_t line __attribute__((__unused__)), void *p, size_t n)
115{
116	DPRINTF(("%s,%" SIZE_T_FORMAT "u: %s %" SIZE_T_FORMAT "u\n",
117	    file, line, __func__, n));
118	return realloc(p, n);
119}
120
121/*ARGSUSED*/
122static void *
123cdf_calloc(const char *file __attribute__((__unused__)),
124    size_t line __attribute__((__unused__)), size_t n, size_t u)
125{
126	DPRINTF(("%s,%" SIZE_T_FORMAT "u: %s %" SIZE_T_FORMAT "u %"
127	    SIZE_T_FORMAT "u\n", file, line, __func__, n, u));
128	if (n == 0)
129	    n++;
130	return calloc(n, u);
131}
132
133#if defined(HAVE_BYTESWAP_H)
134# define _cdf_tole2(x)	bswap_16(x)
135# define _cdf_tole4(x)	bswap_32(x)
136# define _cdf_tole8(x)	bswap_64(x)
137#elif defined(HAVE_SYS_BSWAP_H)
138# define _cdf_tole2(x)	bswap16(x)
139# define _cdf_tole4(x)	bswap32(x)
140# define _cdf_tole8(x)	bswap64(x)
141#else
142/*
143 * swap a short
144 */
145static uint16_t
146_cdf_tole2(uint16_t sv)
147{
148	uint16_t rv;
149	uint8_t *s = RCAST(uint8_t *, RCAST(void *, &sv));
150	uint8_t *d = RCAST(uint8_t *, RCAST(void *, &rv));
151	d[0] = s[1];
152	d[1] = s[0];
153	return rv;
154}
155
156/*
157 * swap an int
158 */
159static uint32_t
160_cdf_tole4(uint32_t sv)
161{
162	uint32_t rv;
163	uint8_t *s = RCAST(uint8_t *, RCAST(void *, &sv));
164	uint8_t *d = RCAST(uint8_t *, RCAST(void *, &rv));
165	d[0] = s[3];
166	d[1] = s[2];
167	d[2] = s[1];
168	d[3] = s[0];
169	return rv;
170}
171
172/*
173 * swap a quad
174 */
175static uint64_t
176_cdf_tole8(uint64_t sv)
177{
178	uint64_t rv;
179	uint8_t *s = RCAST(uint8_t *, RCAST(void *, &sv));
180	uint8_t *d = RCAST(uint8_t *, RCAST(void *, &rv));
181	d[0] = s[7];
182	d[1] = s[6];
183	d[2] = s[5];
184	d[3] = s[4];
185	d[4] = s[3];
186	d[5] = s[2];
187	d[6] = s[1];
188	d[7] = s[0];
189	return rv;
190}
191#endif
192
193/*
194 * grab a uint32_t from a possibly unaligned address, and return it in
195 * the native host order.
196 */
197static uint32_t
198cdf_getuint32(const uint8_t *p, size_t offs)
199{
200	uint32_t rv;
201	(void)memcpy(&rv, p + offs * sizeof(uint32_t), sizeof(rv));
202	return CDF_TOLE4(rv);
203}
204
205#define CDF_UNPACK(a)	\
206    (void)memcpy(&(a), &buf[len], sizeof(a)), len += sizeof(a)
207#define CDF_UNPACKA(a)	\
208    (void)memcpy((a), &buf[len], sizeof(a)), len += sizeof(a)
209
210uint16_t
211cdf_tole2(uint16_t sv)
212{
213	return CDF_TOLE2(sv);
214}
215
216uint32_t
217cdf_tole4(uint32_t sv)
218{
219	return CDF_TOLE4(sv);
220}
221
222uint64_t
223cdf_tole8(uint64_t sv)
224{
225	return CDF_TOLE8(sv);
226}
227
228void
229cdf_swap_header(cdf_header_t *h)
230{
231	size_t i;
232
233	h->h_magic = CDF_TOLE8(h->h_magic);
234	h->h_uuid[0] = CDF_TOLE8(h->h_uuid[0]);
235	h->h_uuid[1] = CDF_TOLE8(h->h_uuid[1]);
236	h->h_revision = CDF_TOLE2(h->h_revision);
237	h->h_version = CDF_TOLE2(h->h_version);
238	h->h_byte_order = CDF_TOLE2(h->h_byte_order);
239	h->h_sec_size_p2 = CDF_TOLE2(h->h_sec_size_p2);
240	h->h_short_sec_size_p2 = CDF_TOLE2(h->h_short_sec_size_p2);
241	h->h_num_sectors_in_sat = CDF_TOLE4(h->h_num_sectors_in_sat);
242	h->h_secid_first_directory = CDF_TOLE4(h->h_secid_first_directory);
243	h->h_min_size_standard_stream =
244	    CDF_TOLE4(h->h_min_size_standard_stream);
245	h->h_secid_first_sector_in_short_sat =
246	    CDF_TOLE4(CAST(uint32_t, h->h_secid_first_sector_in_short_sat));
247	h->h_num_sectors_in_short_sat =
248	    CDF_TOLE4(h->h_num_sectors_in_short_sat);
249	h->h_secid_first_sector_in_master_sat =
250	    CDF_TOLE4(CAST(uint32_t, h->h_secid_first_sector_in_master_sat));
251	h->h_num_sectors_in_master_sat =
252	    CDF_TOLE4(h->h_num_sectors_in_master_sat);
253	for (i = 0; i < __arraycount(h->h_master_sat); i++) {
254		h->h_master_sat[i] =
255		    CDF_TOLE4(CAST(uint32_t, h->h_master_sat[i]));
256	}
257}
258
259void
260cdf_unpack_header(cdf_header_t *h, char *buf)
261{
262	size_t i;
263	size_t len = 0;
264
265	CDF_UNPACK(h->h_magic);
266	CDF_UNPACKA(h->h_uuid);
267	CDF_UNPACK(h->h_revision);
268	CDF_UNPACK(h->h_version);
269	CDF_UNPACK(h->h_byte_order);
270	CDF_UNPACK(h->h_sec_size_p2);
271	CDF_UNPACK(h->h_short_sec_size_p2);
272	CDF_UNPACKA(h->h_unused0);
273	CDF_UNPACK(h->h_num_sectors_in_sat);
274	CDF_UNPACK(h->h_secid_first_directory);
275	CDF_UNPACKA(h->h_unused1);
276	CDF_UNPACK(h->h_min_size_standard_stream);
277	CDF_UNPACK(h->h_secid_first_sector_in_short_sat);
278	CDF_UNPACK(h->h_num_sectors_in_short_sat);
279	CDF_UNPACK(h->h_secid_first_sector_in_master_sat);
280	CDF_UNPACK(h->h_num_sectors_in_master_sat);
281	for (i = 0; i < __arraycount(h->h_master_sat); i++)
282		CDF_UNPACK(h->h_master_sat[i]);
283}
284
285void
286cdf_swap_dir(cdf_directory_t *d)
287{
288	d->d_namelen = CDF_TOLE2(d->d_namelen);
289	d->d_left_child = CDF_TOLE4(CAST(uint32_t, d->d_left_child));
290	d->d_right_child = CDF_TOLE4(CAST(uint32_t, d->d_right_child));
291	d->d_storage = CDF_TOLE4(CAST(uint32_t, d->d_storage));
292	d->d_storage_uuid[0] = CDF_TOLE8(d->d_storage_uuid[0]);
293	d->d_storage_uuid[1] = CDF_TOLE8(d->d_storage_uuid[1]);
294	d->d_flags = CDF_TOLE4(d->d_flags);
295	d->d_created = CDF_TOLE8(CAST(uint64_t, d->d_created));
296	d->d_modified = CDF_TOLE8(CAST(uint64_t, d->d_modified));
297	d->d_stream_first_sector = CDF_TOLE4(
298	    CAST(uint32_t, d->d_stream_first_sector));
299	d->d_size = CDF_TOLE4(d->d_size);
300}
301
302void
303cdf_swap_class(cdf_classid_t *d)
304{
305	d->cl_dword = CDF_TOLE4(d->cl_dword);
306	d->cl_word[0] = CDF_TOLE2(d->cl_word[0]);
307	d->cl_word[1] = CDF_TOLE2(d->cl_word[1]);
308}
309
310void
311cdf_unpack_dir(cdf_directory_t *d, char *buf)
312{
313	size_t len = 0;
314
315	CDF_UNPACKA(d->d_name);
316	CDF_UNPACK(d->d_namelen);
317	CDF_UNPACK(d->d_type);
318	CDF_UNPACK(d->d_color);
319	CDF_UNPACK(d->d_left_child);
320	CDF_UNPACK(d->d_right_child);
321	CDF_UNPACK(d->d_storage);
322	CDF_UNPACKA(d->d_storage_uuid);
323	CDF_UNPACK(d->d_flags);
324	CDF_UNPACK(d->d_created);
325	CDF_UNPACK(d->d_modified);
326	CDF_UNPACK(d->d_stream_first_sector);
327	CDF_UNPACK(d->d_size);
328	CDF_UNPACK(d->d_unused0);
329}
330
331int
332cdf_zero_stream(cdf_stream_t *scn)
333{
334	scn->sst_len = 0;
335	scn->sst_dirlen = 0;
336	scn->sst_ss = 0;
337	free(scn->sst_tab);
338	scn->sst_tab = NULL;
339	return -1;
340}
341
342static size_t
343cdf_check_stream(const cdf_stream_t *sst, const cdf_header_t *h)
344{
345	size_t ss = sst->sst_dirlen < h->h_min_size_standard_stream ?
346	    CDF_SHORT_SEC_SIZE(h) : CDF_SEC_SIZE(h);
347	assert(ss == sst->sst_ss);
348	return sst->sst_ss;
349}
350
351static int
352cdf_check_stream_offset(const cdf_stream_t *sst, const cdf_header_t *h,
353    const void *p, size_t tail, int line)
354{
355	const char *b = RCAST(const char *, sst->sst_tab);
356	const char *e = RCAST(const char *, p) + tail;
357	size_t ss = cdf_check_stream(sst, h);
358	/*LINTED*/(void)&line;
359	if (e >= b && CAST(size_t, e - b) <= ss * sst->sst_len)
360		return 0;
361	DPRINTF(("%d: offset begin %p < end %p || %" SIZE_T_FORMAT "u"
362	    " > %" SIZE_T_FORMAT "u [%" SIZE_T_FORMAT "u %"
363	    SIZE_T_FORMAT "u]\n", line, b, e, (size_t)(e - b),
364	    ss * sst->sst_len, ss, sst->sst_len));
365	errno = EFTYPE;
366	return -1;
367}
368
369static ssize_t
370cdf_read(const cdf_info_t *info, off_t off, void *buf, size_t len)
371{
372	size_t siz = CAST(size_t, off + len);
373
374	if (CAST(off_t, off + len) != CAST(off_t, siz))
375		goto out;
376
377	if (info->i_buf != NULL && info->i_len >= siz) {
378		(void)memcpy(buf, &info->i_buf[off], len);
379		return CAST(ssize_t, len);
380	}
381
382	if (info->i_fd == -1)
383		goto out;
384
385	if (pread(info->i_fd, buf, len, off) != CAST(ssize_t, len))
386		return -1;
387
388	return CAST(ssize_t, len);
389out:
390	errno = EINVAL;
391	return -1;
392}
393
394int
395cdf_read_header(const cdf_info_t *info, cdf_header_t *h)
396{
397	char buf[512];
398
399	(void)memcpy(cdf_bo.s, "\01\02\03\04", 4);
400	if (cdf_read(info, CAST(off_t, 0), buf, sizeof(buf)) == -1)
401		return -1;
402	cdf_unpack_header(h, buf);
403	cdf_swap_header(h);
404	if (h->h_magic != CDF_MAGIC) {
405		DPRINTF(("Bad magic %#" INT64_T_FORMAT "x != %#"
406		    INT64_T_FORMAT "x\n",
407		    (unsigned long long)h->h_magic,
408		    (unsigned long long)CDF_MAGIC));
409		goto out;
410	}
411	if (h->h_sec_size_p2 > 20) {
412		DPRINTF(("Bad sector size %hu\n", h->h_sec_size_p2));
413		goto out;
414	}
415	if (h->h_short_sec_size_p2 > 20) {
416		DPRINTF(("Bad short sector size %hu\n",
417		    h->h_short_sec_size_p2));
418		goto out;
419	}
420	return 0;
421out:
422	errno = EFTYPE;
423	return -1;
424}
425
426
427ssize_t
428cdf_read_sector(const cdf_info_t *info, void *buf, size_t offs, size_t len,
429    const cdf_header_t *h, cdf_secid_t id)
430{
431	size_t ss = CDF_SEC_SIZE(h);
432	size_t pos;
433
434	if (SIZE_T_MAX / ss < CAST(size_t, id))
435		return -1;
436
437	pos = CDF_SEC_POS(h, id);
438	assert(ss == len);
439	return cdf_read(info, CAST(off_t, pos), RCAST(char *, buf) + offs, len);
440}
441
442ssize_t
443cdf_read_short_sector(const cdf_stream_t *sst, void *buf, size_t offs,
444    size_t len, const cdf_header_t *h, cdf_secid_t id)
445{
446	size_t ss = CDF_SHORT_SEC_SIZE(h);
447	size_t pos;
448
449	if (SIZE_T_MAX / ss < CAST(size_t, id))
450		return -1;
451
452	pos = CDF_SHORT_SEC_POS(h, id);
453	assert(ss == len);
454	if (pos + len > CDF_SEC_SIZE(h) * sst->sst_len) {
455		DPRINTF(("Out of bounds read %" SIZE_T_FORMAT "u > %"
456		    SIZE_T_FORMAT "u\n",
457		    pos + len, CDF_SEC_SIZE(h) * sst->sst_len));
458		goto out;
459	}
460	(void)memcpy(RCAST(char *, buf) + offs,
461	    RCAST(const char *, sst->sst_tab) + pos, len);
462	return len;
463out:
464	errno = EFTYPE;
465	return -1;
466}
467
468/*
469 * Read the sector allocation table.
470 */
471int
472cdf_read_sat(const cdf_info_t *info, cdf_header_t *h, cdf_sat_t *sat)
473{
474	size_t i, j, k;
475	size_t ss = CDF_SEC_SIZE(h);
476	cdf_secid_t *msa, mid, sec;
477	size_t nsatpersec = (ss / sizeof(mid)) - 1;
478
479	for (i = 0; i < __arraycount(h->h_master_sat); i++)
480		if (h->h_master_sat[i] == CDF_SECID_FREE)
481			break;
482
483#define CDF_SEC_LIMIT (UINT32_MAX / (64 * ss))
484	if ((nsatpersec > 0 &&
485	    h->h_num_sectors_in_master_sat > CDF_SEC_LIMIT / nsatpersec) ||
486	    i > CDF_SEC_LIMIT) {
487		DPRINTF(("Number of sectors in master SAT too big %u %"
488		    SIZE_T_FORMAT "u\n", h->h_num_sectors_in_master_sat, i));
489		errno = EFTYPE;
490		return -1;
491	}
492
493	sat->sat_len = h->h_num_sectors_in_master_sat * nsatpersec + i;
494	DPRINTF(("sat_len = %" SIZE_T_FORMAT "u ss = %" SIZE_T_FORMAT "u\n",
495	    sat->sat_len, ss));
496	if ((sat->sat_tab = CAST(cdf_secid_t *, CDF_CALLOC(sat->sat_len, ss)))
497	    == NULL)
498		return -1;
499
500	for (i = 0; i < __arraycount(h->h_master_sat); i++) {
501		if (h->h_master_sat[i] < 0)
502			break;
503		if (cdf_read_sector(info, sat->sat_tab, ss * i, ss, h,
504		    h->h_master_sat[i]) != CAST(ssize_t, ss)) {
505			DPRINTF(("Reading sector %d", h->h_master_sat[i]));
506			goto out1;
507		}
508	}
509
510	if ((msa = CAST(cdf_secid_t *, CDF_CALLOC(1, ss))) == NULL)
511		goto out1;
512
513	mid = h->h_secid_first_sector_in_master_sat;
514	for (j = 0; j < h->h_num_sectors_in_master_sat; j++) {
515		if (mid < 0)
516			goto out;
517		if (j >= CDF_LOOP_LIMIT) {
518			DPRINTF(("Reading master sector loop limit"));
519			goto out3;
520		}
521		if (cdf_read_sector(info, msa, 0, ss, h, mid) !=
522		    CAST(ssize_t, ss)) {
523			DPRINTF(("Reading master sector %d", mid));
524			goto out2;
525		}
526		for (k = 0; k < nsatpersec; k++, i++) {
527			sec = CDF_TOLE4(CAST(uint32_t, msa[k]));
528			if (sec < 0)
529				goto out;
530			if (i >= sat->sat_len) {
531			    DPRINTF(("Out of bounds reading MSA %"
532				SIZE_T_FORMAT "u >= %" SIZE_T_FORMAT "u",
533				i, sat->sat_len));
534			    goto out3;
535			}
536			if (cdf_read_sector(info, sat->sat_tab, ss * i, ss, h,
537			    sec) != CAST(ssize_t, ss)) {
538				DPRINTF(("Reading sector %d",
539				    CDF_TOLE4(msa[k])));
540				goto out2;
541			}
542		}
543		mid = CDF_TOLE4(CAST(uint32_t, msa[nsatpersec]));
544	}
545out:
546	sat->sat_len = i;
547	free(msa);
548	return 0;
549out3:
550	errno = EFTYPE;
551out2:
552	free(msa);
553out1:
554	free(sat->sat_tab);
555	return -1;
556}
557
558size_t
559cdf_count_chain(const cdf_sat_t *sat, cdf_secid_t sid, size_t size)
560{
561	size_t i, j;
562	cdf_secid_t maxsector = CAST(cdf_secid_t, (sat->sat_len * size)
563	    / sizeof(maxsector));
564
565	DPRINTF(("Chain:"));
566	if (sid == CDF_SECID_END_OF_CHAIN) {
567		/* 0-length chain. */
568		DPRINTF((" empty\n"));
569		return 0;
570	}
571
572	for (j = i = 0; sid >= 0; i++, j++) {
573		DPRINTF((" %d", sid));
574		if (j >= CDF_LOOP_LIMIT) {
575			DPRINTF(("Counting chain loop limit"));
576			goto out;
577		}
578		if (sid >= maxsector) {
579			DPRINTF(("Sector %d >= %d\n", sid, maxsector));
580			goto out;
581		}
582		sid = CDF_TOLE4(CAST(uint32_t, sat->sat_tab[sid]));
583	}
584	if (i == 0) {
585		DPRINTF((" none, sid: %d\n", sid));
586		goto out;
587
588	}
589	DPRINTF(("\n"));
590	return i;
591out:
592	errno = EFTYPE;
593	return CAST(size_t, -1);
594}
595
596int
597cdf_read_long_sector_chain(const cdf_info_t *info, const cdf_header_t *h,
598    const cdf_sat_t *sat, cdf_secid_t sid, size_t len, cdf_stream_t *scn)
599{
600	size_t ss = CDF_SEC_SIZE(h), i, j;
601	ssize_t nr;
602	scn->sst_tab = NULL;
603	scn->sst_len = cdf_count_chain(sat, sid, ss);
604	scn->sst_dirlen = MAX(h->h_min_size_standard_stream, len);
605	scn->sst_ss = ss;
606
607	if (sid == CDF_SECID_END_OF_CHAIN || len == 0)
608		return cdf_zero_stream(scn);
609
610	if (scn->sst_len == CAST(size_t, -1))
611		goto out;
612
613	scn->sst_tab = CDF_CALLOC(scn->sst_len, ss);
614	if (scn->sst_tab == NULL)
615		return cdf_zero_stream(scn);
616
617	for (j = i = 0; sid >= 0; i++, j++) {
618		if (j >= CDF_LOOP_LIMIT) {
619			DPRINTF(("Read long sector chain loop limit"));
620			goto out;
621		}
622		if (i >= scn->sst_len) {
623			DPRINTF(("Out of bounds reading long sector chain "
624			    "%" SIZE_T_FORMAT "u > %" SIZE_T_FORMAT "u\n", i,
625			    scn->sst_len));
626			goto out;
627		}
628		if ((nr = cdf_read_sector(info, scn->sst_tab, i * ss, ss, h,
629		    sid)) != CAST(ssize_t, ss)) {
630			if (i == scn->sst_len - 1 && nr > 0) {
631				/* Last sector might be truncated */
632				return 0;
633			}
634			DPRINTF(("Reading long sector chain %d", sid));
635			goto out;
636		}
637		sid = CDF_TOLE4(CAST(uint32_t, sat->sat_tab[sid]));
638	}
639	return 0;
640out:
641	errno = EFTYPE;
642	return cdf_zero_stream(scn);
643}
644
645int
646cdf_read_short_sector_chain(const cdf_header_t *h,
647    const cdf_sat_t *ssat, const cdf_stream_t *sst,
648    cdf_secid_t sid, size_t len, cdf_stream_t *scn)
649{
650	size_t ss = CDF_SHORT_SEC_SIZE(h), i, j;
651	scn->sst_tab = NULL;
652	scn->sst_len = cdf_count_chain(ssat, sid, CDF_SEC_SIZE(h));
653	scn->sst_dirlen = len;
654	scn->sst_ss = ss;
655
656	if (scn->sst_len == CAST(size_t, -1))
657		goto out;
658
659	scn->sst_tab = CDF_CALLOC(scn->sst_len, ss);
660	if (scn->sst_tab == NULL)
661		return cdf_zero_stream(scn);
662
663	for (j = i = 0; sid >= 0; i++, j++) {
664		if (j >= CDF_LOOP_LIMIT) {
665			DPRINTF(("Read short sector chain loop limit"));
666			goto out;
667		}
668		if (i >= scn->sst_len) {
669			DPRINTF(("Out of bounds reading short sector chain "
670			    "%" SIZE_T_FORMAT "u > %" SIZE_T_FORMAT "u\n",
671			    i, scn->sst_len));
672			goto out;
673		}
674		if (cdf_read_short_sector(sst, scn->sst_tab, i * ss, ss, h,
675		    sid) != CAST(ssize_t, ss)) {
676			DPRINTF(("Reading short sector chain %d", sid));
677			goto out;
678		}
679		sid = CDF_TOLE4(CAST(uint32_t, ssat->sat_tab[sid]));
680	}
681	return 0;
682out:
683	errno = EFTYPE;
684	return cdf_zero_stream(scn);
685}
686
687int
688cdf_read_sector_chain(const cdf_info_t *info, const cdf_header_t *h,
689    const cdf_sat_t *sat, const cdf_sat_t *ssat, const cdf_stream_t *sst,
690    cdf_secid_t sid, size_t len, cdf_stream_t *scn)
691{
692
693	if (len < h->h_min_size_standard_stream && sst->sst_tab != NULL)
694		return cdf_read_short_sector_chain(h, ssat, sst, sid, len,
695		    scn);
696	else
697		return cdf_read_long_sector_chain(info, h, sat, sid, len, scn);
698}
699
700int
701cdf_read_dir(const cdf_info_t *info, const cdf_header_t *h,
702    const cdf_sat_t *sat, cdf_dir_t *dir)
703{
704	size_t i, j;
705	size_t ss = CDF_SEC_SIZE(h), ns, nd;
706	char *buf;
707	cdf_secid_t sid = h->h_secid_first_directory;
708
709	ns = cdf_count_chain(sat, sid, ss);
710	if (ns == CAST(size_t, -1))
711		return -1;
712
713	nd = ss / CDF_DIRECTORY_SIZE;
714
715	dir->dir_len = ns * nd;
716	dir->dir_tab = CAST(cdf_directory_t *,
717	    CDF_CALLOC(dir->dir_len, sizeof(dir->dir_tab[0])));
718	if (dir->dir_tab == NULL)
719		return -1;
720
721	if ((buf = CAST(char *, CDF_MALLOC(ss))) == NULL) {
722		free(dir->dir_tab);
723		return -1;
724	}
725
726	for (j = i = 0; i < ns; i++, j++) {
727		if (j >= CDF_LOOP_LIMIT) {
728			DPRINTF(("Read dir loop limit"));
729			goto out;
730		}
731		if (cdf_read_sector(info, buf, 0, ss, h, sid) !=
732		    CAST(ssize_t, ss)) {
733			DPRINTF(("Reading directory sector %d", sid));
734			goto out;
735		}
736		for (j = 0; j < nd; j++) {
737			cdf_unpack_dir(&dir->dir_tab[i * nd + j],
738			    &buf[j * CDF_DIRECTORY_SIZE]);
739		}
740		sid = CDF_TOLE4(CAST(uint32_t, sat->sat_tab[sid]));
741	}
742	if (NEED_SWAP)
743		for (i = 0; i < dir->dir_len; i++)
744			cdf_swap_dir(&dir->dir_tab[i]);
745	free(buf);
746	return 0;
747out:
748	free(dir->dir_tab);
749	free(buf);
750	errno = EFTYPE;
751	return -1;
752}
753
754
755int
756cdf_read_ssat(const cdf_info_t *info, const cdf_header_t *h,
757    const cdf_sat_t *sat, cdf_sat_t *ssat)
758{
759	size_t i, j;
760	size_t ss = CDF_SEC_SIZE(h);
761	cdf_secid_t sid = h->h_secid_first_sector_in_short_sat;
762
763	ssat->sat_tab = NULL;
764	ssat->sat_len = cdf_count_chain(sat, sid, ss);
765	if (ssat->sat_len == CAST(size_t, -1))
766		goto out;
767
768	ssat->sat_tab = CAST(cdf_secid_t *, CDF_CALLOC(ssat->sat_len, ss));
769	if (ssat->sat_tab == NULL)
770		goto out1;
771
772	for (j = i = 0; sid >= 0; i++, j++) {
773		if (j >= CDF_LOOP_LIMIT) {
774			DPRINTF(("Read short sat sector loop limit"));
775			goto out;
776		}
777		if (i >= ssat->sat_len) {
778			DPRINTF(("Out of bounds reading short sector chain "
779			    "%" SIZE_T_FORMAT "u > %" SIZE_T_FORMAT "u\n", i,
780			    ssat->sat_len));
781			goto out;
782		}
783		if (cdf_read_sector(info, ssat->sat_tab, i * ss, ss, h, sid) !=
784		    CAST(ssize_t, ss)) {
785			DPRINTF(("Reading short sat sector %d", sid));
786			goto out1;
787		}
788		sid = CDF_TOLE4(CAST(uint32_t, sat->sat_tab[sid]));
789	}
790	return 0;
791out:
792	errno = EFTYPE;
793out1:
794	free(ssat->sat_tab);
795	return -1;
796}
797
798int
799cdf_read_short_stream(const cdf_info_t *info, const cdf_header_t *h,
800    const cdf_sat_t *sat, const cdf_dir_t *dir, cdf_stream_t *scn,
801    const cdf_directory_t **root)
802{
803	size_t i;
804	const cdf_directory_t *d;
805
806	*root = NULL;
807	for (i = 0; i < dir->dir_len; i++)
808		if (dir->dir_tab[i].d_type == CDF_DIR_TYPE_ROOT_STORAGE)
809			break;
810
811	/* If the it is not there, just fake it; some docs don't have it */
812	if (i == dir->dir_len) {
813		DPRINTF(("Cannot find root storage dir\n"));
814		goto out;
815	}
816	d = &dir->dir_tab[i];
817	*root = d;
818
819	/* If the it is not there, just fake it; some docs don't have it */
820	if (d->d_stream_first_sector < 0) {
821		DPRINTF(("No first secror in dir\n"));
822		goto out;
823	}
824
825	return cdf_read_long_sector_chain(info, h, sat,
826	    d->d_stream_first_sector, d->d_size, scn);
827out:
828	scn->sst_tab = NULL;
829	(void)cdf_zero_stream(scn);
830	return 0;
831}
832
833static int
834cdf_namecmp(const char *d, const uint16_t *s, size_t l)
835{
836	for (; l--; d++, s++)
837		if (*d != CDF_TOLE2(*s))
838			return CAST(unsigned char, *d) - CDF_TOLE2(*s);
839	return 0;
840}
841
842int
843cdf_read_doc_summary_info(const cdf_info_t *info, const cdf_header_t *h,
844    const cdf_sat_t *sat, const cdf_sat_t *ssat, const cdf_stream_t *sst,
845    const cdf_dir_t *dir, cdf_stream_t *scn)
846{
847	return cdf_read_user_stream(info, h, sat, ssat, sst, dir,
848	    "\05DocumentSummaryInformation", scn);
849}
850
851int
852cdf_read_summary_info(const cdf_info_t *info, const cdf_header_t *h,
853    const cdf_sat_t *sat, const cdf_sat_t *ssat, const cdf_stream_t *sst,
854    const cdf_dir_t *dir, cdf_stream_t *scn)
855{
856	return cdf_read_user_stream(info, h, sat, ssat, sst, dir,
857	    "\05SummaryInformation", scn);
858}
859
860int
861cdf_read_user_stream(const cdf_info_t *info, const cdf_header_t *h,
862    const cdf_sat_t *sat, const cdf_sat_t *ssat, const cdf_stream_t *sst,
863    const cdf_dir_t *dir, const char *name, cdf_stream_t *scn)
864{
865	const cdf_directory_t *d;
866	int i = cdf_find_stream(dir, name, CDF_DIR_TYPE_USER_STREAM);
867
868	if (i <= 0) {
869		memset(scn, 0, sizeof(*scn));
870		return -1;
871	}
872
873	d = &dir->dir_tab[i - 1];
874	return cdf_read_sector_chain(info, h, sat, ssat, sst,
875	    d->d_stream_first_sector, d->d_size, scn);
876}
877
878int
879cdf_find_stream(const cdf_dir_t *dir, const char *name, int type)
880{
881	size_t i, name_len = strlen(name) + 1;
882
883	for (i = dir->dir_len; i > 0; i--)
884		if (dir->dir_tab[i - 1].d_type == type &&
885		    cdf_namecmp(name, dir->dir_tab[i - 1].d_name, name_len)
886		    == 0)
887			break;
888	if (i > 0)
889		return CAST(int, i);
890
891	DPRINTF(("Cannot find type %d `%s'\n", type, name));
892	errno = ESRCH;
893	return 0;
894}
895
896#define CDF_SHLEN_LIMIT (UINT32_MAX / 64)
897#define CDF_PROP_LIMIT (UINT32_MAX / (64 * sizeof(cdf_property_info_t)))
898
899static const void *
900cdf_offset(const void *p, size_t l)
901{
902	return CAST(const void *, CAST(const uint8_t *, p) + l);
903}
904
905static const uint8_t *
906cdf_get_property_info_pos(const cdf_stream_t *sst, const cdf_header_t *h,
907    const uint8_t *p, const uint8_t *e, size_t i)
908{
909	size_t tail = (i << 1) + 1;
910	size_t ofs;
911
912	if (p >= e) {
913		DPRINTF(("Past end %p < %p\n", e, p));
914		return NULL;
915	}
916
917	if (cdf_check_stream_offset(sst, h, p, (tail + 1) * sizeof(uint32_t),
918	    __LINE__) == -1)
919		return NULL;
920
921	ofs = CDF_GETUINT32(p, tail);
922	if (ofs < 2 * sizeof(uint32_t)) {
923		DPRINTF(("Offset too small %zu\n", ofs));
924		return NULL;
925	}
926
927	ofs -= 2 * sizeof(uint32_t);
928	if (ofs > CAST(size_t, e - p)) {
929		DPRINTF(("Offset too big %zu %td\n", ofs, e - p));
930		return NULL;
931	}
932
933	return CAST(const uint8_t *, cdf_offset(CAST(const void *, p), ofs));
934}
935
936static cdf_property_info_t *
937cdf_grow_info(cdf_property_info_t **info, size_t *maxcount, size_t incr)
938{
939	cdf_property_info_t *inp;
940	size_t newcount = *maxcount + incr;
941
942	if (newcount > CDF_PROP_LIMIT) {
943		DPRINTF(("exceeded property limit %" SIZE_T_FORMAT "u > %"
944		    SIZE_T_FORMAT "u\n", newcount, CDF_PROP_LIMIT));
945		goto out;
946	}
947	inp = CAST(cdf_property_info_t *,
948	    CDF_REALLOC(*info, newcount * sizeof(*inp)));
949	if (inp == NULL)
950		goto out;
951
952	*info = inp;
953	*maxcount = newcount;
954	return inp;
955out:
956	free(*info);
957	*maxcount = 0;
958	*info = NULL;
959	return NULL;
960}
961
962static int
963cdf_copy_info(cdf_property_info_t *inp, const void *p, const void *e,
964    size_t len)
965{
966	if (inp->pi_type & CDF_VECTOR)
967		return 0;
968
969	if (CAST(size_t, CAST(const char *, e) - CAST(const char *, p)) < len)
970		return 0;
971
972	(void)memcpy(&inp->pi_val, p, len);
973
974	switch (len) {
975	case 2:
976		inp->pi_u16 = CDF_TOLE2(inp->pi_u16);
977		break;
978	case 4:
979		inp->pi_u32 = CDF_TOLE4(inp->pi_u32);
980		break;
981	case 8:
982		inp->pi_u64 = CDF_TOLE8(inp->pi_u64);
983		break;
984	default:
985		abort();
986	}
987	return 1;
988}
989
990int
991cdf_read_property_info(const cdf_stream_t *sst, const cdf_header_t *h,
992    uint32_t offs, cdf_property_info_t **info, size_t *count, size_t *maxcount)
993{
994	const cdf_section_header_t *shp;
995	cdf_section_header_t sh;
996	const uint8_t *p, *q, *e;
997	size_t i, o4, nelements, j, slen, left;
998	cdf_property_info_t *inp;
999
1000	if (offs > UINT32_MAX / 4) {
1001		errno = EFTYPE;
1002		goto out;
1003	}
1004	shp = CAST(const cdf_section_header_t *,
1005	    cdf_offset(sst->sst_tab, offs));
1006	if (cdf_check_stream_offset(sst, h, shp, sizeof(*shp), __LINE__) == -1)
1007		goto out;
1008	sh.sh_len = CDF_TOLE4(shp->sh_len);
1009	if (sh.sh_len > CDF_SHLEN_LIMIT) {
1010		errno = EFTYPE;
1011		goto out;
1012	}
1013
1014	if (cdf_check_stream_offset(sst, h, shp, sh.sh_len, __LINE__) == -1)
1015		goto out;
1016
1017	sh.sh_properties = CDF_TOLE4(shp->sh_properties);
1018	DPRINTF(("section len: %u properties %u\n", sh.sh_len,
1019	    sh.sh_properties));
1020	if (sh.sh_properties > CDF_PROP_LIMIT)
1021		goto out;
1022	inp = cdf_grow_info(info, maxcount, sh.sh_properties);
1023	if (inp == NULL)
1024		goto out;
1025	inp += *count;
1026	*count += sh.sh_properties;
1027	p = CAST(const uint8_t *, cdf_offset(sst->sst_tab, offs + sizeof(sh)));
1028	e = CAST(const uint8_t *, cdf_offset(shp, sh.sh_len));
1029	if (p >= e || cdf_check_stream_offset(sst, h, e, 0, __LINE__) == -1)
1030		goto out;
1031
1032	for (i = 0; i < sh.sh_properties; i++) {
1033		if ((q = cdf_get_property_info_pos(sst, h, p, e, i)) == NULL)
1034			goto out;
1035		inp[i].pi_id = CDF_GETUINT32(p, i << 1);
1036		left = CAST(size_t, e - q);
1037		if (left < sizeof(uint32_t)) {
1038			DPRINTF(("short info (no type)_\n"));
1039			goto out;
1040		}
1041		inp[i].pi_type = CDF_GETUINT32(q, 0);
1042		DPRINTF(("%" SIZE_T_FORMAT "u) id=%#x type=%#x offs=%#tx,%#x\n",
1043		    i, inp[i].pi_id, inp[i].pi_type, q - p, offs));
1044		if (inp[i].pi_type & CDF_VECTOR) {
1045			if (left < sizeof(uint32_t) * 2) {
1046				DPRINTF(("missing CDF_VECTOR length\n"));
1047				goto out;
1048			}
1049			nelements = CDF_GETUINT32(q, 1);
1050			if (nelements > CDF_ELEMENT_LIMIT || nelements == 0) {
1051				DPRINTF(("CDF_VECTOR with nelements == %"
1052				    SIZE_T_FORMAT "u\n", nelements));
1053				goto out;
1054			}
1055			slen = 2;
1056		} else {
1057			nelements = 1;
1058			slen = 1;
1059		}
1060		o4 = slen * sizeof(uint32_t);
1061		if (inp[i].pi_type & (CDF_ARRAY|CDF_BYREF|CDF_RESERVED))
1062			goto unknown;
1063		switch (inp[i].pi_type & CDF_TYPEMASK) {
1064		case CDF_NULL:
1065		case CDF_EMPTY:
1066			break;
1067		case CDF_SIGNED16:
1068			if (!cdf_copy_info(&inp[i], &q[o4], e, sizeof(int16_t)))
1069				goto unknown;
1070			break;
1071		case CDF_SIGNED32:
1072		case CDF_BOOL:
1073		case CDF_UNSIGNED32:
1074		case CDF_FLOAT:
1075			if (!cdf_copy_info(&inp[i], &q[o4], e, sizeof(int32_t)))
1076				goto unknown;
1077			break;
1078		case CDF_SIGNED64:
1079		case CDF_UNSIGNED64:
1080		case CDF_DOUBLE:
1081		case CDF_FILETIME:
1082			if (!cdf_copy_info(&inp[i], &q[o4], e, sizeof(int64_t)))
1083				goto unknown;
1084			break;
1085		case CDF_LENGTH32_STRING:
1086		case CDF_LENGTH32_WSTRING:
1087			if (nelements > 1) {
1088				size_t nelem = inp - *info;
1089				inp = cdf_grow_info(info, maxcount, nelements);
1090				if (inp == NULL)
1091					goto out;
1092				inp += nelem;
1093			}
1094			for (j = 0; j < nelements && i < sh.sh_properties;
1095			    j++, i++)
1096			{
1097				uint32_t l;
1098
1099				if (o4 + sizeof(uint32_t) > left)
1100					goto out;
1101
1102				l = CDF_GETUINT32(q, slen);
1103				o4 += sizeof(uint32_t);
1104				if (o4 + l > left)
1105					goto out;
1106
1107				inp[i].pi_str.s_len = l;
1108				inp[i].pi_str.s_buf = CAST(const char *,
1109				    CAST(const void *, &q[o4]));
1110
1111				DPRINTF(("o=%" SIZE_T_FORMAT "u l=%d(%"
1112				    SIZE_T_FORMAT "u), t=%" SIZE_T_FORMAT
1113				    "u s=%.*s\n", o4, l,
1114				    CDF_ROUND(l, sizeof(l)),
1115				    left, (int)l, inp[i].pi_str.s_buf));
1116
1117				if (l & 1)
1118					l++;
1119
1120				slen += l >> 1;
1121				o4 = slen * sizeof(uint32_t);
1122			}
1123			i--;
1124			break;
1125		case CDF_CLIPBOARD:
1126			if (inp[i].pi_type & CDF_VECTOR)
1127				goto unknown;
1128			break;
1129		default:
1130		unknown:
1131			memset(&inp[i].pi_val, 0, sizeof(inp[i].pi_val));
1132			DPRINTF(("Don't know how to deal with %#x\n",
1133			    inp[i].pi_type));
1134			break;
1135		}
1136	}
1137	return 0;
1138out:
1139	free(*info);
1140	*info = NULL;
1141	*count = 0;
1142	*maxcount = 0;
1143	errno = EFTYPE;
1144	return -1;
1145}
1146
1147int
1148cdf_unpack_summary_info(const cdf_stream_t *sst, const cdf_header_t *h,
1149    cdf_summary_info_header_t *ssi, cdf_property_info_t **info, size_t *count)
1150{
1151	size_t maxcount;
1152	const cdf_summary_info_header_t *si =
1153	    CAST(const cdf_summary_info_header_t *, sst->sst_tab);
1154	const cdf_section_declaration_t *sd =
1155	    CAST(const cdf_section_declaration_t *, RCAST(const void *,
1156	    RCAST(const char *, sst->sst_tab)
1157	    + CDF_SECTION_DECLARATION_OFFSET));
1158
1159	if (cdf_check_stream_offset(sst, h, si, sizeof(*si), __LINE__) == -1 ||
1160	    cdf_check_stream_offset(sst, h, sd, sizeof(*sd), __LINE__) == -1)
1161		return -1;
1162	ssi->si_byte_order = CDF_TOLE2(si->si_byte_order);
1163	ssi->si_os_version = CDF_TOLE2(si->si_os_version);
1164	ssi->si_os = CDF_TOLE2(si->si_os);
1165	ssi->si_class = si->si_class;
1166	cdf_swap_class(&ssi->si_class);
1167	ssi->si_count = CDF_TOLE4(si->si_count);
1168	*count = 0;
1169	maxcount = 0;
1170	*info = NULL;
1171	if (cdf_read_property_info(sst, h, CDF_TOLE4(sd->sd_offset), info,
1172	    count, &maxcount) == -1)
1173		return -1;
1174	return 0;
1175}
1176
1177
1178#define extract_catalog_field(t, f, l) \
1179    if (b + l + sizeof(cep->f) > eb) { \
1180	    cep->ce_namlen = 0; \
1181	    break; \
1182    } \
1183    memcpy(&cep->f, b + (l), sizeof(cep->f)); \
1184    ce[i].f = CAST(t, CDF_TOLE(cep->f))
1185
1186int
1187cdf_unpack_catalog(const cdf_header_t *h, const cdf_stream_t *sst,
1188    cdf_catalog_t **cat)
1189{
1190	size_t ss = cdf_check_stream(sst, h);
1191	const char *b = CAST(const char *, sst->sst_tab);
1192	const char *nb, *eb = b + ss * sst->sst_len;
1193	size_t nr, i, j, k;
1194	cdf_catalog_entry_t *ce;
1195	uint16_t reclen;
1196	const uint16_t *np;
1197
1198	for (nr = 0;; nr++) {
1199		memcpy(&reclen, b, sizeof(reclen));
1200		reclen = CDF_TOLE2(reclen);
1201		if (reclen == 0)
1202			break;
1203		b += reclen;
1204		if (b > eb)
1205		    break;
1206	}
1207	if (nr == 0)
1208		return -1;
1209	nr--;
1210	*cat = CAST(cdf_catalog_t *,
1211	    CDF_MALLOC(sizeof(cdf_catalog_t) + nr * sizeof(*ce)));
1212	if (*cat == NULL)
1213		return -1;
1214	ce = (*cat)->cat_e;
1215	memset(ce, 0, nr * sizeof(*ce));
1216	b = CAST(const char *, sst->sst_tab);
1217	for (j = i = 0; i < nr; b += reclen) {
1218		cdf_catalog_entry_t *cep = &ce[j];
1219		uint16_t rlen;
1220
1221		extract_catalog_field(uint16_t, ce_namlen, 0);
1222		extract_catalog_field(uint16_t, ce_num, 4);
1223		extract_catalog_field(uint64_t, ce_timestamp, 8);
1224		reclen = cep->ce_namlen;
1225
1226		if (reclen < 14) {
1227			cep->ce_namlen = 0;
1228			continue;
1229		}
1230
1231		cep->ce_namlen = __arraycount(cep->ce_name) - 1;
1232		rlen = reclen - 14;
1233		if (cep->ce_namlen > rlen)
1234			cep->ce_namlen = rlen;
1235
1236		np = CAST(const uint16_t *, CAST(const void *, (b + 16)));
1237		nb = CAST(const char *, CAST(const void *,
1238		    (np + cep->ce_namlen)));
1239		if (nb > eb) {
1240			cep->ce_namlen = 0;
1241			break;
1242		}
1243
1244		for (k = 0; k < cep->ce_namlen; k++)
1245			cep->ce_name[k] = np[k]; /* XXX: CDF_TOLE2? */
1246		cep->ce_name[cep->ce_namlen] = 0;
1247		j = i;
1248		i++;
1249	}
1250	(*cat)->cat_num = j;
1251	return 0;
1252}
1253
1254int
1255cdf_print_classid(char *buf, size_t buflen, const cdf_classid_t *id)
1256{
1257	return snprintf(buf, buflen, "%.8x-%.4x-%.4x-%.2x%.2x-"
1258	    "%.2x%.2x%.2x%.2x%.2x%.2x", id->cl_dword, id->cl_word[0],
1259	    id->cl_word[1], id->cl_two[0], id->cl_two[1], id->cl_six[0],
1260	    id->cl_six[1], id->cl_six[2], id->cl_six[3], id->cl_six[4],
1261	    id->cl_six[5]);
1262}
1263
1264static const struct {
1265	uint32_t v;
1266	const char *n;
1267} vn[] = {
1268	{ CDF_PROPERTY_CODE_PAGE, "Code page" },
1269	{ CDF_PROPERTY_TITLE, "Title" },
1270	{ CDF_PROPERTY_SUBJECT, "Subject" },
1271	{ CDF_PROPERTY_AUTHOR, "Author" },
1272	{ CDF_PROPERTY_KEYWORDS, "Keywords" },
1273	{ CDF_PROPERTY_COMMENTS, "Comments" },
1274	{ CDF_PROPERTY_TEMPLATE, "Template" },
1275	{ CDF_PROPERTY_LAST_SAVED_BY, "Last Saved By" },
1276	{ CDF_PROPERTY_REVISION_NUMBER, "Revision Number" },
1277	{ CDF_PROPERTY_TOTAL_EDITING_TIME, "Total Editing Time" },
1278	{ CDF_PROPERTY_LAST_PRINTED, "Last Printed" },
1279	{ CDF_PROPERTY_CREATE_TIME, "Create Time/Date" },
1280	{ CDF_PROPERTY_LAST_SAVED_TIME, "Last Saved Time/Date" },
1281	{ CDF_PROPERTY_NUMBER_OF_PAGES, "Number of Pages" },
1282	{ CDF_PROPERTY_NUMBER_OF_WORDS, "Number of Words" },
1283	{ CDF_PROPERTY_NUMBER_OF_CHARACTERS, "Number of Characters" },
1284	{ CDF_PROPERTY_THUMBNAIL, "Thumbnail" },
1285	{ CDF_PROPERTY_NAME_OF_APPLICATION, "Name of Creating Application" },
1286	{ CDF_PROPERTY_SECURITY, "Security" },
1287	{ CDF_PROPERTY_LOCALE_ID, "Locale ID" },
1288};
1289
1290int
1291cdf_print_property_name(char *buf, size_t bufsiz, uint32_t p)
1292{
1293	size_t i;
1294
1295	for (i = 0; i < __arraycount(vn); i++)
1296		if (vn[i].v == p)
1297			return snprintf(buf, bufsiz, "%s", vn[i].n);
1298	return snprintf(buf, bufsiz, "%#x", p);
1299}
1300
1301int
1302cdf_print_elapsed_time(char *buf, size_t bufsiz, cdf_timestamp_t ts)
1303{
1304	int len = 0;
1305	int days, hours, mins, secs;
1306
1307	ts /= CDF_TIME_PREC;
1308	secs = CAST(int, ts % 60);
1309	ts /= 60;
1310	mins = CAST(int, ts % 60);
1311	ts /= 60;
1312	hours = CAST(int, ts % 24);
1313	ts /= 24;
1314	days = CAST(int, ts);
1315
1316	if (days) {
1317		len += snprintf(buf + len, bufsiz - len, "%dd+", days);
1318		if (CAST(size_t, len) >= bufsiz)
1319			return len;
1320	}
1321
1322	if (days || hours) {
1323		len += snprintf(buf + len, bufsiz - len, "%.2d:", hours);
1324		if (CAST(size_t, len) >= bufsiz)
1325			return len;
1326	}
1327
1328	len += snprintf(buf + len, bufsiz - len, "%.2d:", mins);
1329	if (CAST(size_t, len) >= bufsiz)
1330		return len;
1331
1332	len += snprintf(buf + len, bufsiz - len, "%.2d", secs);
1333	return len;
1334}
1335
1336char *
1337cdf_u16tos8(char *buf, size_t len, const uint16_t *p)
1338{
1339	size_t i;
1340	for (i = 0; i < len && p[i]; i++)
1341		buf[i] = CAST(char, p[i]);
1342	buf[i] = '\0';
1343	return buf;
1344}
1345
1346#ifdef CDF_DEBUG
1347void
1348cdf_dump_header(const cdf_header_t *h)
1349{
1350	size_t i;
1351
1352#define DUMP(a, b) (void)fprintf(stderr, "%40.40s = " a "\n", # b, h->h_ ## b)
1353#define DUMP2(a, b) (void)fprintf(stderr, "%40.40s = " a " (" a ")\n", # b, \
1354    h->h_ ## b, 1 << h->h_ ## b)
1355	DUMP("%d", revision);
1356	DUMP("%d", version);
1357	DUMP("%#x", byte_order);
1358	DUMP2("%d", sec_size_p2);
1359	DUMP2("%d", short_sec_size_p2);
1360	DUMP("%d", num_sectors_in_sat);
1361	DUMP("%d", secid_first_directory);
1362	DUMP("%d", min_size_standard_stream);
1363	DUMP("%d", secid_first_sector_in_short_sat);
1364	DUMP("%d", num_sectors_in_short_sat);
1365	DUMP("%d", secid_first_sector_in_master_sat);
1366	DUMP("%d", num_sectors_in_master_sat);
1367	for (i = 0; i < __arraycount(h->h_master_sat); i++) {
1368		if (h->h_master_sat[i] == CDF_SECID_FREE)
1369			break;
1370		(void)fprintf(stderr, "%35.35s[%.3" SIZE_T_FORMAT "u] = %d\n",
1371		    "master_sat", i, h->h_master_sat[i]);
1372	}
1373}
1374
1375void
1376cdf_dump_sat(const char *prefix, const cdf_sat_t *sat, size_t size)
1377{
1378	size_t i, j, s = size / sizeof(cdf_secid_t);
1379
1380	for (i = 0; i < sat->sat_len; i++) {
1381		(void)fprintf(stderr, "%s[%" SIZE_T_FORMAT "u]:\n%.6"
1382		    SIZE_T_FORMAT "u: ", prefix, i, i * s);
1383		for (j = 0; j < s; j++) {
1384			(void)fprintf(stderr, "%5d, ",
1385			    CDF_TOLE4(sat->sat_tab[s * i + j]));
1386			if ((j + 1) % 10 == 0)
1387				(void)fprintf(stderr, "\n%.6" SIZE_T_FORMAT
1388				    "u: ", i * s + j + 1);
1389		}
1390		(void)fprintf(stderr, "\n");
1391	}
1392}
1393
1394void
1395cdf_dump(const void *v, size_t len)
1396{
1397	size_t i, j;
1398	const unsigned char *p = v;
1399	char abuf[16];
1400
1401	(void)fprintf(stderr, "%.4x: ", 0);
1402	for (i = 0, j = 0; i < len; i++, p++) {
1403		(void)fprintf(stderr, "%.2x ", *p);
1404		abuf[j++] = isprint(*p) ? *p : '.';
1405		if (j == 16) {
1406			j = 0;
1407			abuf[15] = '\0';
1408			(void)fprintf(stderr, "%s\n%.4" SIZE_T_FORMAT "x: ",
1409			    abuf, i + 1);
1410		}
1411	}
1412	(void)fprintf(stderr, "\n");
1413}
1414
1415void
1416cdf_dump_stream(const cdf_stream_t *sst)
1417{
1418	size_t ss = sst->sst_ss;
1419	cdf_dump(sst->sst_tab, ss * sst->sst_len);
1420}
1421
1422void
1423cdf_dump_dir(const cdf_info_t *info, const cdf_header_t *h,
1424    const cdf_sat_t *sat, const cdf_sat_t *ssat, const cdf_stream_t *sst,
1425    const cdf_dir_t *dir)
1426{
1427	size_t i, j;
1428	cdf_directory_t *d;
1429	char name[__arraycount(d->d_name)];
1430	cdf_stream_t scn;
1431	struct timespec ts;
1432
1433	static const char *types[] = { "empty", "user storage",
1434	    "user stream", "lockbytes", "property", "root storage" };
1435
1436	for (i = 0; i < dir->dir_len; i++) {
1437		char buf[26];
1438		d = &dir->dir_tab[i];
1439		for (j = 0; j < sizeof(name); j++)
1440			name[j] = (char)CDF_TOLE2(d->d_name[j]);
1441		(void)fprintf(stderr, "Directory %" SIZE_T_FORMAT "u: %s\n",
1442		    i, name);
1443		if (d->d_type < __arraycount(types))
1444			(void)fprintf(stderr, "Type: %s\n", types[d->d_type]);
1445		else
1446			(void)fprintf(stderr, "Type: %d\n", d->d_type);
1447		(void)fprintf(stderr, "Color: %s\n",
1448		    d->d_color ? "black" : "red");
1449		(void)fprintf(stderr, "Left child: %d\n", d->d_left_child);
1450		(void)fprintf(stderr, "Right child: %d\n", d->d_right_child);
1451		(void)fprintf(stderr, "Flags: %#x\n", d->d_flags);
1452		cdf_timestamp_to_timespec(&ts, d->d_created);
1453		(void)fprintf(stderr, "Created %s", cdf_ctime(&ts.tv_sec, buf));
1454		cdf_timestamp_to_timespec(&ts, d->d_modified);
1455		(void)fprintf(stderr, "Modified %s",
1456		    cdf_ctime(&ts.tv_sec, buf));
1457		(void)fprintf(stderr, "Stream %d\n", d->d_stream_first_sector);
1458		(void)fprintf(stderr, "Size %d\n", d->d_size);
1459		switch (d->d_type) {
1460		case CDF_DIR_TYPE_USER_STORAGE:
1461			(void)fprintf(stderr, "Storage: %d\n", d->d_storage);
1462			break;
1463		case CDF_DIR_TYPE_USER_STREAM:
1464			if (sst == NULL)
1465				break;
1466			if (cdf_read_sector_chain(info, h, sat, ssat, sst,
1467			    d->d_stream_first_sector, d->d_size, &scn) == -1) {
1468				warn("Can't read stream for %s at %d len %d",
1469				    name, d->d_stream_first_sector, d->d_size);
1470				break;
1471			}
1472			cdf_dump_stream(&scn);
1473			free(scn.sst_tab);
1474			break;
1475		default:
1476			break;
1477		}
1478
1479	}
1480}
1481
1482void
1483cdf_dump_property_info(const cdf_property_info_t *info, size_t count)
1484{
1485	cdf_timestamp_t tp;
1486	struct timespec ts;
1487	char buf[64];
1488	size_t i, j;
1489
1490	for (i = 0; i < count; i++) {
1491		cdf_print_property_name(buf, sizeof(buf), info[i].pi_id);
1492		(void)fprintf(stderr, "%" SIZE_T_FORMAT "u) %s: ", i, buf);
1493		switch (info[i].pi_type) {
1494		case CDF_NULL:
1495			break;
1496		case CDF_SIGNED16:
1497			(void)fprintf(stderr, "signed 16 [%hd]\n",
1498			    info[i].pi_s16);
1499			break;
1500		case CDF_SIGNED32:
1501			(void)fprintf(stderr, "signed 32 [%d]\n",
1502			    info[i].pi_s32);
1503			break;
1504		case CDF_UNSIGNED32:
1505			(void)fprintf(stderr, "unsigned 32 [%u]\n",
1506			    info[i].pi_u32);
1507			break;
1508		case CDF_FLOAT:
1509			(void)fprintf(stderr, "float [%g]\n",
1510			    info[i].pi_f);
1511			break;
1512		case CDF_DOUBLE:
1513			(void)fprintf(stderr, "double [%g]\n",
1514			    info[i].pi_d);
1515			break;
1516		case CDF_LENGTH32_STRING:
1517			(void)fprintf(stderr, "string %u [%.*s]\n",
1518			    info[i].pi_str.s_len,
1519			    info[i].pi_str.s_len, info[i].pi_str.s_buf);
1520			break;
1521		case CDF_LENGTH32_WSTRING:
1522			(void)fprintf(stderr, "string %u [",
1523			    info[i].pi_str.s_len);
1524			for (j = 0; j < info[i].pi_str.s_len - 1; j++)
1525			    (void)fputc(info[i].pi_str.s_buf[j << 1], stderr);
1526			(void)fprintf(stderr, "]\n");
1527			break;
1528		case CDF_FILETIME:
1529			tp = info[i].pi_tp;
1530			if (tp < 1000000000000000LL) {
1531				cdf_print_elapsed_time(buf, sizeof(buf), tp);
1532				(void)fprintf(stderr, "timestamp %s\n", buf);
1533			} else {
1534				char tbuf[26];
1535				cdf_timestamp_to_timespec(&ts, tp);
1536				(void)fprintf(stderr, "timestamp %s",
1537				    cdf_ctime(&ts.tv_sec, tbuf));
1538			}
1539			break;
1540		case CDF_CLIPBOARD:
1541			(void)fprintf(stderr, "CLIPBOARD %u\n", info[i].pi_u32);
1542			break;
1543		default:
1544			DPRINTF(("Don't know how to deal with %#x\n",
1545			    info[i].pi_type));
1546			break;
1547		}
1548	}
1549}
1550
1551
1552void
1553cdf_dump_summary_info(const cdf_header_t *h, const cdf_stream_t *sst)
1554{
1555	char buf[128];
1556	cdf_summary_info_header_t ssi;
1557	cdf_property_info_t *info;
1558	size_t count;
1559
1560	(void)&h;
1561	if (cdf_unpack_summary_info(sst, h, &ssi, &info, &count) == -1)
1562		return;
1563	(void)fprintf(stderr, "Endian: %#x\n", ssi.si_byte_order);
1564	(void)fprintf(stderr, "Os Version %d.%d\n", ssi.si_os_version & 0xff,
1565	    ssi.si_os_version >> 8);
1566	(void)fprintf(stderr, "Os %d\n", ssi.si_os);
1567	cdf_print_classid(buf, sizeof(buf), &ssi.si_class);
1568	(void)fprintf(stderr, "Class %s\n", buf);
1569	(void)fprintf(stderr, "Count %d\n", ssi.si_count);
1570	cdf_dump_property_info(info, count);
1571	free(info);
1572}
1573
1574
1575void
1576cdf_dump_catalog(const cdf_header_t *h, const cdf_stream_t *sst)
1577{
1578	cdf_catalog_t *cat;
1579	cdf_unpack_catalog(h, sst, &cat);
1580	const cdf_catalog_entry_t *ce = cat->cat_e;
1581	struct timespec ts;
1582	char tbuf[64], sbuf[256];
1583	size_t i;
1584
1585	printf("Catalog:\n");
1586	for (i = 0; i < cat->cat_num; i++) {
1587		cdf_timestamp_to_timespec(&ts, ce[i].ce_timestamp);
1588		printf("\t%d %s %s", ce[i].ce_num,
1589		    cdf_u16tos8(sbuf, ce[i].ce_namlen, ce[i].ce_name),
1590		    cdf_ctime(&ts.tv_sec, tbuf));
1591	}
1592	free(cat);
1593}
1594
1595#endif
1596
1597#ifdef TEST
1598int
1599main(int argc, char *argv[])
1600{
1601	int i;
1602	cdf_header_t h;
1603	cdf_sat_t sat, ssat;
1604	cdf_stream_t sst, scn;
1605	cdf_dir_t dir;
1606	cdf_info_t info;
1607	const cdf_directory_t *root;
1608#ifdef __linux__
1609#define getprogname() __progname
1610	extern char *__progname;
1611#endif
1612	if (argc < 2) {
1613		(void)fprintf(stderr, "Usage: %s <filename>\n", getprogname());
1614		return -1;
1615	}
1616
1617	info.i_buf = NULL;
1618	info.i_len = 0;
1619	for (i = 1; i < argc; i++) {
1620		if ((info.i_fd = open(argv[1], O_RDONLY)) == -1)
1621			err(EXIT_FAILURE, "Cannot open `%s'", argv[1]);
1622
1623		if (cdf_read_header(&info, &h) == -1)
1624			err(EXIT_FAILURE, "Cannot read header");
1625#ifdef CDF_DEBUG
1626		cdf_dump_header(&h);
1627#endif
1628
1629		if (cdf_read_sat(&info, &h, &sat) == -1)
1630			err(EXIT_FAILURE, "Cannot read sat");
1631#ifdef CDF_DEBUG
1632		cdf_dump_sat("SAT", &sat, CDF_SEC_SIZE(&h));
1633#endif
1634
1635		if (cdf_read_ssat(&info, &h, &sat, &ssat) == -1)
1636			err(EXIT_FAILURE, "Cannot read ssat");
1637#ifdef CDF_DEBUG
1638		cdf_dump_sat("SSAT", &ssat, CDF_SHORT_SEC_SIZE(&h));
1639#endif
1640
1641		if (cdf_read_dir(&info, &h, &sat, &dir) == -1)
1642			err(EXIT_FAILURE, "Cannot read dir");
1643
1644		if (cdf_read_short_stream(&info, &h, &sat, &dir, &sst, &root)
1645		    == -1)
1646			err(EXIT_FAILURE, "Cannot read short stream");
1647#ifdef CDF_DEBUG
1648		cdf_dump_stream(&sst);
1649#endif
1650
1651#ifdef CDF_DEBUG
1652		cdf_dump_dir(&info, &h, &sat, &ssat, &sst, &dir);
1653#endif
1654
1655
1656		if (cdf_read_summary_info(&info, &h, &sat, &ssat, &sst, &dir,
1657		    &scn) == -1)
1658			warn("Cannot read summary info");
1659#ifdef CDF_DEBUG
1660		else
1661			cdf_dump_summary_info(&h, &scn);
1662#endif
1663		if (cdf_read_user_stream(&info, &h, &sat, &ssat, &sst,
1664		    &dir, "Catalog", &scn) == -1)
1665			warn("Cannot read catalog");
1666#ifdef CDF_DEBUG
1667		else
1668			cdf_dump_catalog(&h, &scn);
1669#endif
1670
1671		(void)close(info.i_fd);
1672	}
1673
1674	return 0;
1675}
1676#endif
1677