cdf.h revision 284194
160814Sps/*-
260786Sps * Copyright (c) 2008 Christos Zoulas
3221715Sdelphij * All rights reserved.
460786Sps *
560786Sps * Redistribution and use in source and binary forms, with or without
660786Sps * modification, are permitted provided that the following conditions
760786Sps * are met:
860786Sps * 1. Redistributions of source code must retain the above copyright
960786Sps *    notice, this list of conditions and the following disclaimer.
1060786Sps * 2. Redistributions in binary form must reproduce the above copyright
1160786Sps *    notice, this list of conditions and the following disclaimer in the
1260786Sps *    documentation and/or other materials provided with the distribution.
1360786Sps *
1460786Sps * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
1560786Sps * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
1660786Sps * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
1760786Sps * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
1860786Sps * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
1960786Sps * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
2060786Sps * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
2160786Sps * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
2260786Sps * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
2360786Sps * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
2460786Sps * POSSIBILITY OF SUCH DAMAGE.
2560786Sps */
2660786Sps/*
2760786Sps * Parse Composite Document Files, the format used in Microsoft Office
2860786Sps * document files before they switched to zipped XML.
2960786Sps * Info from: http://sc.openoffice.org/compdocfileformat.pdf
3060786Sps *
3160786Sps * N.B. This is the "Composite Document File" format, and not the
3260786Sps * "Compound Document Format", nor the "Channel Definition Format".
33170259Sdelphij */
3460786Sps
3560786Sps#ifndef _H_CDF_
3660786Sps#define _H_CDF_
3760786Sps
3860786Sps#ifdef WIN32
3960786Sps#include <winsock2.h>
4060786Sps#define timespec timeval
4160786Sps#define tv_nsec tv_usec
4260786Sps#endif
4360786Sps#ifdef __DJGPP__
4460786Sps#define timespec timeval
4589022Sps#define tv_nsec tv_usec
4660786Sps#endif
4789022Sps
4860786Spstypedef int32_t cdf_secid_t;
4989022Sps
5060786Sps#define CDF_LOOP_LIMIT					10000
5189022Sps
5260786Sps#define CDF_SECID_NULL					0
5360786Sps#define CDF_SECID_FREE					-1
5489022Sps#define CDF_SECID_END_OF_CHAIN				-2
5589022Sps#define CDF_SECID_SECTOR_ALLOCATION_TABLE		-3
56170259Sdelphij#define CDF_SECID_MASTER_SECTOR_ALLOCATION_TABLE	-4
57170259Sdelphij
5860786Spstypedef struct {
5960786Sps	uint64_t	h_magic;
6060786Sps#define CDF_MAGIC	0xE11AB1A1E011CFD0LL
6160786Sps	uint64_t	h_uuid[2];
6289022Sps	uint16_t	h_revision;
6360786Sps	uint16_t	h_version;
6460786Sps	uint16_t	h_byte_order;
6560786Sps	uint16_t	h_sec_size_p2;
6660786Sps	uint16_t	h_short_sec_size_p2;
6760786Sps	uint8_t		h_unused0[10];
6860786Sps	uint32_t	h_num_sectors_in_sat;
6960786Sps	uint32_t	h_secid_first_directory;
7060786Sps	uint8_t		h_unused1[4];
7160786Sps	uint32_t	h_min_size_standard_stream;
7260786Sps	cdf_secid_t	h_secid_first_sector_in_short_sat;
7360786Sps	uint32_t	h_num_sectors_in_short_sat;
74170259Sdelphij	cdf_secid_t	h_secid_first_sector_in_master_sat;
7560786Sps	uint32_t	h_num_sectors_in_master_sat;
7660786Sps	cdf_secid_t	h_master_sat[436/4];
7760786Sps} cdf_header_t;
7889022Sps
7960786Sps#define CDF_SEC_SIZE(h) ((size_t)(1 << (h)->h_sec_size_p2))
8060786Sps#define CDF_SEC_POS(h, secid) (CDF_SEC_SIZE(h) + (secid) * CDF_SEC_SIZE(h))
8160786Sps#define CDF_SHORT_SEC_SIZE(h)	((size_t)(1 << (h)->h_short_sec_size_p2))
8260786Sps#define CDF_SHORT_SEC_POS(h, secid) ((secid) * CDF_SHORT_SEC_SIZE(h))
8360786Sps
8460786Spstypedef int32_t cdf_dirid_t;
8560786Sps#define CDF_DIRID_NULL	-1
8660786Sps
8760786Spstypedef int64_t cdf_timestamp_t;
8860786Sps#define CDF_BASE_YEAR	1601
8960786Sps#define CDF_TIME_PREC	10000000
9060786Sps
9160786Spstypedef struct {
9260786Sps	uint16_t	d_name[32];
9360786Sps	uint16_t	d_namelen;
9460786Sps	uint8_t		d_type;
9560786Sps#define CDF_DIR_TYPE_EMPTY		0
9660786Sps#define CDF_DIR_TYPE_USER_STORAGE	1
9760786Sps#define CDF_DIR_TYPE_USER_STREAM	2
9860786Sps#define CDF_DIR_TYPE_LOCKBYTES		3
9960786Sps#define CDF_DIR_TYPE_PROPERTY		4
10060786Sps#define CDF_DIR_TYPE_ROOT_STORAGE	5
10160786Sps	uint8_t		d_color;
10260786Sps#define CDF_DIR_COLOR_READ	0
10360786Sps#define CDF_DIR_COLOR_BLACK	1
10460786Sps	cdf_dirid_t	d_left_child;
10560786Sps	cdf_dirid_t	d_right_child;
10660786Sps	cdf_dirid_t	d_storage;
10760786Sps	uint64_t	d_storage_uuid[2];
10860786Sps	uint32_t	d_flags;
10960786Sps	cdf_timestamp_t d_created;
11060786Sps	cdf_timestamp_t d_modified;
11160786Sps	cdf_secid_t	d_stream_first_sector;
11260786Sps	uint32_t	d_size;
11360786Sps	uint32_t	d_unused0;
11460786Sps} cdf_directory_t;
11560786Sps
11660786Sps#define CDF_DIRECTORY_SIZE	128
11760786Sps
11860786Spstypedef struct {
119128348Stjr	cdf_secid_t *sat_tab;
120128348Stjr	size_t sat_len;
121128348Stjr} cdf_sat_t;
122128348Stjr
12360786Spstypedef struct {
12460786Sps	cdf_directory_t *dir_tab;
12560786Sps	size_t dir_len;
126128348Stjr} cdf_dir_t;
127128348Stjr
128128348Stjrtypedef struct {
129128348Stjr	void *sst_tab;
130128348Stjr	size_t sst_len;
131128348Stjr	size_t sst_dirlen;
132128348Stjr} cdf_stream_t;
133128348Stjr
134128348Stjrtypedef struct {
135128348Stjr	uint32_t	cl_dword;
136128348Stjr	uint16_t	cl_word[2];
137128348Stjr	uint8_t		cl_two[2];
138128348Stjr	uint8_t		cl_six[6];
13960786Sps} cdf_classid_t;
14060786Sps
14160786Spstypedef struct {
142128348Stjr	uint16_t	si_byte_order;
143128348Stjr	uint16_t	si_zero;
14460786Sps	uint16_t	si_os_version;
145128348Stjr	uint16_t	si_os;
14660786Sps	cdf_classid_t	si_class;
147128348Stjr	uint32_t	si_count;
14860786Sps} cdf_summary_info_header_t;
14960786Sps
15060786Sps#define CDF_SECTION_DECLARATION_OFFSET 0x1c
15160786Sps
15260786Spstypedef struct {
15360786Sps	cdf_classid_t	sd_class;
15460786Sps	uint32_t	sd_offset;
15560786Sps} cdf_section_declaration_t;
15660786Sps
15760786Spstypedef struct {
15860786Sps	uint32_t	sh_len;
15960786Sps	uint32_t	sh_properties;
16060786Sps} cdf_section_header_t;
16160786Sps
16260786Spstypedef struct {
16360786Sps	uint32_t	pi_id;
16460786Sps	uint32_t	pi_type;
16560786Sps	union {
16660786Sps		uint16_t	_pi_u16;
16760786Sps		int16_t		_pi_s16;
16860786Sps		uint32_t	_pi_u32;
16960786Sps		int32_t		_pi_s32;
170161478Sdelphij		uint64_t	_pi_u64;
17160786Sps		int64_t		_pi_s64;
17260786Sps		cdf_timestamp_t _pi_tp;
17360786Sps		float		_pi_f;
17460786Sps		double		_pi_d;
17560786Sps		struct {
17660786Sps			uint32_t s_len;
17760786Sps			const char *s_buf;
17860786Sps		} _pi_str;
17960786Sps	} pi_val;
18060786Sps#define pi_u64	pi_val._pi_u64
18160786Sps#define pi_s64	pi_val._pi_s64
18260786Sps#define pi_u32	pi_val._pi_u32
18360786Sps#define pi_s32	pi_val._pi_s32
18460786Sps#define pi_u16	pi_val._pi_u16
18560786Sps#define pi_s16	pi_val._pi_s16
18660786Sps#define pi_f	pi_val._pi_f
18760786Sps#define pi_d	pi_val._pi_d
18860786Sps#define pi_tp	pi_val._pi_tp
18960786Sps#define pi_str	pi_val._pi_str
19060786Sps} cdf_property_info_t;
19160786Sps
19260786Sps#define CDF_ROUND(val, by)     (((val) + (by) - 1) & ~((by) - 1))
19360786Sps
19460786Sps/* Variant type definitions */
19560786Sps#define CDF_EMPTY		0x00000000
19660786Sps#define CDF_NULL		0x00000001
19760786Sps#define CDF_SIGNED16		0x00000002
19860786Sps#define CDF_SIGNED32		0x00000003
199191930Sdelphij#define CDF_FLOAT		0x00000004
20060786Sps#define CDF_DOUBLE		0x00000005
20160786Sps#define CDF_CY			0x00000006
20260786Sps#define CDF_DATE		0x00000007
20360786Sps#define CDF_BSTR		0x00000008
20460786Sps#define CDF_DISPATCH		0x00000009
20560786Sps#define CDF_ERROR		0x0000000a
206161478Sdelphij#define CDF_BOOL		0x0000000b
20760786Sps#define CDF_VARIANT		0x0000000c
20860786Sps#define CDF_UNKNOWN		0x0000000d
209128348Stjr#define CDF_DECIMAL		0x0000000e
21089022Sps#define CDF_SIGNED8		0x00000010
211128348Stjr#define CDF_UNSIGNED8		0x00000011
212128348Stjr#define CDF_UNSIGNED16		0x00000012
213128348Stjr#define CDF_UNSIGNED32		0x00000013
21460786Sps#define CDF_SIGNED64		0x00000014
215128348Stjr#define CDF_UNSIGNED64		0x00000015
21689022Sps#define CDF_INT			0x00000016
217128348Stjr#define CDF_UINT		0x00000017
218128348Stjr#define CDF_VOID		0x00000018
219128348Stjr#define CDF_HRESULT		0x00000019
22060786Sps#define CDF_PTR			0x0000001a
22160786Sps#define CDF_SAFEARRAY		0x0000001b
22260786Sps#define CDF_CARRAY		0x0000001c
22360786Sps#define CDF_USERDEFINED		0x0000001d
22460786Sps#define CDF_LENGTH32_STRING	0x0000001e
22560786Sps#define CDF_LENGTH32_WSTRING	0x0000001f
22660786Sps#define CDF_FILETIME		0x00000040
22760786Sps#define CDF_BLOB		0x00000041
22860786Sps#define CDF_STREAM		0x00000042
22960786Sps#define CDF_STORAGE		0x00000043
23060786Sps#define CDF_STREAMED_OBJECT	0x00000044
231128348Stjr#define CDF_STORED_OBJECT	0x00000045
23289022Sps#define CDF_BLOB_OBJECT		0x00000046
23389022Sps#define CDF_CLIPBOARD		0x00000047
234128348Stjr#define CDF_CLSID		0x00000048
23560786Sps#define CDF_VECTOR		0x00001000
23660786Sps#define CDF_ARRAY		0x00002000
23760786Sps#define CDF_BYREF		0x00004000
23860786Sps#define CDF_RESERVED		0x00008000
23960786Sps#define CDF_ILLEGAL		0x0000ffff
24060786Sps#define CDF_ILLEGALMASKED	0x00000fff
24160786Sps#define CDF_TYPEMASK		0x00000fff
24260786Sps
24360786Sps#define CDF_PROPERTY_CODE_PAGE			0x00000001
24460786Sps#define CDF_PROPERTY_TITLE			0x00000002
24560786Sps#define CDF_PROPERTY_SUBJECT			0x00000003
24660786Sps#define CDF_PROPERTY_AUTHOR			0x00000004
24760786Sps#define CDF_PROPERTY_KEYWORDS			0x00000005
24860786Sps#define CDF_PROPERTY_COMMENTS			0x00000006
24960786Sps#define CDF_PROPERTY_TEMPLATE			0x00000007
25060786Sps#define CDF_PROPERTY_LAST_SAVED_BY		0x00000008
25160786Sps#define CDF_PROPERTY_REVISION_NUMBER		0x00000009
25260786Sps#define CDF_PROPERTY_TOTAL_EDITING_TIME		0x0000000a
25360786Sps#define CDF_PROPERTY_LAST_PRINTED		0X0000000b
25460786Sps#define CDF_PROPERTY_CREATE_TIME		0x0000000c
25560786Sps#define CDF_PROPERTY_LAST_SAVED_TIME		0x0000000d
256128348Stjr#define CDF_PROPERTY_NUMBER_OF_PAGES		0x0000000e
257128348Stjr#define CDF_PROPERTY_NUMBER_OF_WORDS		0x0000000f
25860786Sps#define CDF_PROPERTY_NUMBER_OF_CHARACTERS	0x00000010
25960786Sps#define CDF_PROPERTY_THUMBNAIL			0x00000011
260161478Sdelphij#define CDF_PROPERTY_NAME_OF_APPLICATION	0x00000012
261161478Sdelphij#define CDF_PROPERTY_SECURITY			0x00000013
262161478Sdelphij#define CDF_PROPERTY_LOCALE_ID			0x80000000
26360786Sps
26460786Spstypedef struct {
26560786Sps	int i_fd;
26660786Sps	const unsigned char *i_buf;
26760786Sps	size_t i_len;
26860786Sps} cdf_info_t;
26960786Sps
27060786Sps
27160786Spstypedef struct {
27260786Sps	uint16_t ce_namlen;
27360786Sps	uint32_t ce_num;
27460786Sps	uint64_t ce_timestamp;
27560786Sps	uint16_t ce_name[256];
276128348Stjr} cdf_catalog_entry_t;
277128348Stjr
278161478Sdelphijtypedef struct {
27960786Sps	size_t cat_num;
28060786Sps	cdf_catalog_entry_t cat_e[0];
28160786Sps} cdf_catalog_t;
282161478Sdelphij
283161478Sdelphijstruct timespec;
28460786Spsint cdf_timestamp_to_timespec(struct timespec *, cdf_timestamp_t);
285161478Sdelphijint cdf_timespec_to_timestamp(cdf_timestamp_t *, const struct timespec *);
28660786Spsint cdf_read_header(const cdf_info_t *, cdf_header_t *);
287161478Sdelphijvoid cdf_swap_header(cdf_header_t *);
288161478Sdelphijvoid cdf_unpack_header(cdf_header_t *, char *);
289161478Sdelphijvoid cdf_swap_dir(cdf_directory_t *);
29060786Spsvoid cdf_unpack_dir(cdf_directory_t *, char *);
291161478Sdelphijvoid cdf_swap_class(cdf_classid_t *);
292161478Sdelphijssize_t cdf_read_sector(const cdf_info_t *, void *, size_t, size_t,
293161478Sdelphij    const cdf_header_t *, cdf_secid_t);
294161478Sdelphijssize_t cdf_read_short_sector(const cdf_stream_t *, void *, size_t, size_t,
295161478Sdelphij    const cdf_header_t *, cdf_secid_t);
296161478Sdelphijint cdf_read_sat(const cdf_info_t *, cdf_header_t *, cdf_sat_t *);
297161478Sdelphijsize_t cdf_count_chain(const cdf_sat_t *, cdf_secid_t, size_t);
29860786Spsint cdf_read_long_sector_chain(const cdf_info_t *, const cdf_header_t *,
29960786Sps    const cdf_sat_t *, cdf_secid_t, size_t, cdf_stream_t *);
30060786Spsint cdf_read_short_sector_chain(const cdf_header_t *, const cdf_sat_t *,
30160786Sps    const cdf_stream_t *, cdf_secid_t, size_t, cdf_stream_t *);
30260786Spsint cdf_read_sector_chain(const cdf_info_t *, const cdf_header_t *,
30360786Sps    const cdf_sat_t *, const cdf_sat_t *, const cdf_stream_t *, cdf_secid_t,
30460786Sps    size_t, cdf_stream_t *);
305128348Stjrint cdf_read_dir(const cdf_info_t *, const cdf_header_t *, const cdf_sat_t *,
30660786Sps    cdf_dir_t *);
307221715Sdelphijint cdf_read_ssat(const cdf_info_t *, const cdf_header_t *, const cdf_sat_t *,
308221715Sdelphij    cdf_sat_t *);
309221715Sdelphijint cdf_read_short_stream(const cdf_info_t *, const cdf_header_t *,
31060786Sps    const cdf_sat_t *, const cdf_dir_t *, cdf_stream_t *,
311128348Stjr    const cdf_directory_t **);
31289022Spsint cdf_read_property_info(const cdf_stream_t *, const cdf_header_t *, uint32_t,
31389022Sps    cdf_property_info_t **, size_t *, size_t *);
31489022Spsint cdf_read_user_stream(const cdf_info_t *, const cdf_header_t *,
315128348Stjr    const cdf_sat_t *, const cdf_sat_t *, const cdf_stream_t *,
31689022Sps    const cdf_dir_t *, const char *, cdf_stream_t *);
31760786Sps#define cdf_read_catalog(info, header, sat, ssat, stream, dir, scn) \
31860786Sps    cdf_read_user_stream(info, header, sat, ssat, stream, dir, "Catalog", \
319128348Stjr    scn)
320128348Stjr#define cdf_read_encrypted_package(info, header, sat, ssat, stream, dir, scn) \
321128348Stjr    cdf_read_user_stream(info, header, sat, ssat, stream, dir, \
32260786Sps    "EncryptedPackage", scn)
32360786Spsint cdf_read_summary_info(const cdf_info_t *, const cdf_header_t *,
32460786Sps    const cdf_sat_t *, const cdf_sat_t *, const cdf_stream_t *,
32560786Sps    const cdf_dir_t *, cdf_stream_t *);
32660786Spsint cdf_unpack_summary_info(const cdf_stream_t *, const cdf_header_t *,
32760786Sps    cdf_summary_info_header_t *, cdf_property_info_t **, size_t *);
328128348Stjrint cdf_unpack_catalog(const cdf_header_t *, const cdf_stream_t *,
32960786Sps    cdf_catalog_t **);
33060786Spsint cdf_print_classid(char *, size_t, const cdf_classid_t *);
331128348Stjrint cdf_print_property_name(char *, size_t, uint32_t);
33260786Spsint cdf_print_elapsed_time(char *, size_t, cdf_timestamp_t);
33360786Spsuint16_t cdf_tole2(uint16_t);
334128348Stjruint32_t cdf_tole4(uint32_t);
33589022Spsuint64_t cdf_tole8(uint64_t);
33689022Spschar *cdf_ctime(const time_t *, char *);
33789022Spschar *cdf_u16tos8(char *, size_t, const uint16_t *);
33889022Sps
339128348Stjr#ifdef CDF_DEBUG
34089022Spsvoid cdf_dump_header(const cdf_header_t *);
34160786Spsvoid cdf_dump_sat(const char *, const cdf_sat_t *, size_t);
34260786Spsvoid cdf_dump(void *, size_t);
34360786Spsvoid cdf_dump_stream(const cdf_header_t *, const cdf_stream_t *);
34460786Spsvoid cdf_dump_dir(const cdf_info_t *, const cdf_header_t *, const cdf_sat_t *,
34560786Sps    const cdf_sat_t *, const cdf_stream_t *, const cdf_dir_t *);
34660786Spsvoid cdf_dump_property_info(const cdf_property_info_t *, size_t);
34760786Spsvoid cdf_dump_summary_info(const cdf_header_t *, const cdf_stream_t *);
34860786Spsvoid cdf_dump_catalog(const cdf_header_t *, const cdf_stream_t *);
34960786Sps#endif
35060786Sps
351128348Stjr
352128348Stjr#endif /* _H_CDF_ */
35360786Sps