test_archive_string_conversion.c revision 232153
1/*-
2 * Copyright (c) 2011 Michihiro NAKAJIMA
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25#include "test.h"
26__FBSDID("$FreeBSD$");
27
28#include <locale.h>
29
30#define __LIBARCHIVE_TEST
31#include "archive_string.h"
32
33/*
34Execute the following to rebuild the data for this program:
35   tail -n +36 test_archive_string_conversion.c | /bin/sh
36#
37# This requires http://unicode.org/Public/UNIDATA/NormalizationTest.txt
38#
39if="NormalizationTest.txt"
40if [ ! -f ${if} ]; then
41  echo "Not found: \"${if}\""
42  exit 0
43fi
44of=test_archive_string_conversion.txt.Z
45echo "\$FreeBSD\$" > ${of}.uu
46awk -F ';'  '$0 ~/^[0-9A-F]+/ {printf "%s;%s\n", $2, $3}' ${if} | compress | uuencode ${of} >> ${of}.uu
47exit 1
48*/
49
50static int
51unicode_to_utf8(char *p, uint32_t uc)
52{
53        char *_p = p;
54
55        /* Translate code point to UTF8 */
56        if (uc <= 0x7f) {
57                *p++ = (char)uc;
58        } else if (uc <= 0x7ff) {
59                *p++ = 0xc0 | ((uc >> 6) & 0x1f);
60                *p++ = 0x80 | (uc & 0x3f);
61        } else if (uc <= 0xffff) {
62                *p++ = 0xe0 | ((uc >> 12) & 0x0f);
63                *p++ = 0x80 | ((uc >> 6) & 0x3f);
64                *p++ = 0x80 | (uc & 0x3f);
65        } else {
66                *p++ = 0xf0 | ((uc >> 18) & 0x07);
67                *p++ = 0x80 | ((uc >> 12) & 0x3f);
68                *p++ = 0x80 | ((uc >> 6) & 0x3f);
69                *p++ = 0x80 | (uc & 0x3f);
70        }
71        return ((int)(p - _p));
72}
73
74static void
75archive_be16enc(void *pp, uint16_t u)
76{
77        unsigned char *p = (unsigned char *)pp;
78
79        p[0] = (u >> 8) & 0xff;
80        p[1] = u & 0xff;
81}
82
83static int
84unicode_to_utf16be(char *p, uint32_t uc)
85{
86	char *utf16 = p;
87
88	if (uc > 0xffff) {
89		/* We have a code point that won't fit into a
90		 * wchar_t; convert it to a surrogate pair. */
91		uc -= 0x10000;
92		archive_be16enc(utf16, ((uc >> 10) & 0x3ff) + 0xD800);
93		archive_be16enc(utf16+2, (uc & 0x3ff) + 0xDC00);
94		return (4);
95	} else {
96		archive_be16enc(utf16, uc);
97		return (2);
98	}
99}
100
101static void
102archive_le16enc(void *pp, uint16_t u)
103{
104	unsigned char *p = (unsigned char *)pp;
105
106	p[0] = u & 0xff;
107	p[1] = (u >> 8) & 0xff;
108}
109
110static size_t
111unicode_to_utf16le(char *p, uint32_t uc)
112{
113	char *utf16 = p;
114
115	if (uc > 0xffff) {
116		/* We have a code point that won't fit into a
117		 * wchar_t; convert it to a surrogate pair. */
118		uc -= 0x10000;
119		archive_le16enc(utf16, ((uc >> 10) & 0x3ff) + 0xD800);
120		archive_le16enc(utf16+2, (uc & 0x3ff) + 0xDC00);
121		return (4);
122	} else {
123		archive_le16enc(utf16, uc);
124		return (2);
125	}
126}
127
128static int
129wc_size(void)
130{
131	return (sizeof(wchar_t));
132}
133
134static int
135unicode_to_wc(wchar_t *wp, uint32_t uc)
136{
137	if (wc_size() == 4) {
138		*wp = (wchar_t)uc;
139		return (1);
140	}
141	if (uc > 0xffff) {
142		/* We have a code point that won't fit into a
143		 * wchar_t; convert it to a surrogate pair. */
144		uc -= 0x10000;
145		*wp++ = (wchar_t)(((uc >> 10) & 0x3ff) + 0xD800);
146		*wp = (wchar_t)((uc & 0x3ff) + 0xDC00);
147		return (2);
148	} else {
149		*wp = (wchar_t)uc;
150		return (1);
151	}
152}
153
154/*
155 * Note: U+2000 - U+2FFF, U+F900 - U+FAFF and U+2F800 - U+2FAFF are not
156 * converted to NFD on Mac OS.
157 * see also http://developer.apple.com/library/mac/#qa/qa2001/qa1173.html
158 */
159static int
160scan_unicode_pattern(char *out, wchar_t *wout, char *u16be, char *u16le,
161    const char *pattern, int exclude_mac_nfd)
162{
163	unsigned uc = 0;
164	const char *p = pattern;
165	char *op = out;
166	wchar_t *owp = wout;
167	char *op16be = u16be;
168	char *op16le = u16le;
169
170	for (;;) {
171		if (*p >= '0' && *p <= '9')
172			uc = (uc << 4) + (*p - '0');
173		else if (*p >= 'A' && *p <= 'F')
174			uc = (uc << 4) + (*p - 'A' + 0x0a);
175		else {
176			if (exclude_mac_nfd) {
177				/*
178				 * These are not converted to NFD on Mac OS.
179				 */
180				if ((uc >= 0x2000 && uc <= 0x2FFF) ||
181				    (uc >= 0xF900 && uc <= 0xFAFF) ||
182				    (uc >= 0x2F800 && uc <= 0x2FAFF))
183					return (-1);
184				/*
185				 * Those code points are not converted to
186				 * NFD on Mac OS. I do not know the reason
187				 * because it is undocumented.
188				 *   NFC        NFD
189				 *   1109A  ==> 11099 110BA
190				 *   1109C  ==> 1109B 110BA
191				 *   110AB  ==> 110A5 110BA
192				 */
193				if (uc == 0x1109A || uc == 0x1109C ||
194				    uc == 0x110AB)
195					return (-1);
196			}
197			op16be += unicode_to_utf16be(op16be, uc);
198			op16le += unicode_to_utf16le(op16le, uc);
199			owp += unicode_to_wc(owp, uc);
200			op += unicode_to_utf8(op, uc);
201			if (!*p) {
202				*op16be++ = 0;
203				*op16be = 0;
204				*op16le++ = 0;
205				*op16le = 0;
206				*owp = L'\0';
207				*op = '\0';
208				break;
209			}
210			uc = 0;
211		}
212		p++;
213	}
214	return (0);
215}
216
217static int
218is_wc_unicode(void)
219{
220#if defined(_WIN32) && !defined(__CYGWIN__)
221	return (1);
222#else
223	return (0);
224#endif
225}
226
227/*
228 * A conversion test that we correctly normalize UTF-8 and UTF-16BE characters.
229 * On Mac OS, the characters to be Form D.
230 * On other platforms, the characters to be Form C.
231 */
232static void
233test_archive_string_normalization(void)
234{
235	struct archive *a, *a2;
236	struct archive_entry *ae;
237	struct archive_string utf8;
238	struct archive_mstring mstr;
239	struct archive_string_conv *f_sconv8, *t_sconv8;
240	struct archive_string_conv *f_sconv16be, *f_sconv16le;
241	FILE *fp;
242	char buff[512];
243	static const char reffile[] = "test_archive_string_conversion.txt.Z";
244	ssize_t size;
245	int line = 0;
246	int locale_is_utf8, wc_is_unicode;
247
248	locale_is_utf8 = (NULL != setlocale(LC_ALL, "en_US.UTF-8"));
249	wc_is_unicode = is_wc_unicode();
250	/* If it doesn't exist, just warn and return. */
251	if (!locale_is_utf8 && !wc_is_unicode) {
252		skipping("invalid encoding tests require a suitable locale;"
253		    " en_US.UTF-8 not available on this system");
254		return;
255	}
256
257	archive_string_init(&utf8);
258	memset(&mstr, 0, sizeof(mstr));
259
260	/*
261	 * Extract a test pattern file.
262	 */
263	extract_reference_file(reffile);
264	assert((a = archive_read_new()) != NULL);
265	assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a));
266	assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_raw(a));
267        assertEqualIntA(a, ARCHIVE_OK,
268            archive_read_open_filename(a, reffile, 512));
269
270	assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
271	assert((fp = fopen("testdata.txt", "w")) != NULL);
272	while ((size = archive_read_data(a, buff, 512)) > 0)
273		fwrite(buff, 1, size, fp);
274	fclose(fp);
275
276	/* Open a test pattern file. */
277	assert((fp = fopen("testdata.txt", "r")) != NULL);
278
279	/*
280	 * Create string conversion objects.
281	 */
282	assertA(NULL != (f_sconv8 =
283	    archive_string_conversion_from_charset(a, "UTF-8", 0)));
284	assertA(NULL != (f_sconv16be =
285	    archive_string_conversion_from_charset(a, "UTF-16BE", 0)));
286	assertA(NULL != (f_sconv16le =
287	    archive_string_conversion_from_charset(a, "UTF-16LE", 0)));
288	assert((a2 = archive_write_new()) != NULL);
289	assertA(NULL != (t_sconv8 =
290	    archive_string_conversion_to_charset(a2, "UTF-8", 0)));
291	if (f_sconv8 == NULL || f_sconv16be == NULL || f_sconv16le == NULL ||
292	    t_sconv8 == NULL || fp == NULL) {
293		/* We cannot continue this test. */
294		if (fp != NULL)
295			fclose(fp);
296		assertEqualInt(ARCHIVE_OK, archive_read_free(a));
297		return;
298	}
299
300	/*
301	 * Read test data.
302	 *  Test data format:
303	 *     <NFC Unicode pattern> ';' <NFD Unicode pattern> '\n'
304	 *  Unicode pattern format:
305	 *     [0-9A-F]{4,5}([ ][0-9A-F]{4,5}){0,}
306	 */
307	while (fgets(buff, sizeof(buff), fp) != NULL) {
308		char nfc[80], nfd[80];
309		char utf8_nfc[80], utf8_nfd[80];
310		char utf16be_nfc[80], utf16be_nfd[80];
311		char utf16le_nfc[80], utf16le_nfd[80];
312		wchar_t wc_nfc[40], wc_nfd[40];
313		char *e, *p;
314
315		line++;
316		if (buff[0] == '#')
317			continue;
318		p = strchr(buff, ';');
319		if (p == NULL)
320			continue;
321		*p++ = '\0';
322		/* Copy an NFC pattern */
323		strncpy(nfc, buff, sizeof(nfc)-1);
324		nfc[sizeof(nfc)-1] = '\0';
325		e = p;
326		p = strchr(p, '\n');
327		if (p == NULL)
328			continue;
329		*p = '\0';
330		/* Copy an NFD pattern */
331		strncpy(nfd, e, sizeof(nfd)-1);
332		nfd[sizeof(nfd)-1] = '\0';
333
334		/*
335		 * Convert an NFC pattern to UTF-8 bytes.
336		 */
337#if defined(__APPLE__)
338		if (scan_unicode_pattern(utf8_nfc, wc_nfc, utf16be_nfc, utf16le_nfc,
339		    nfc, 1) != 0)
340			continue;
341#else
342		scan_unicode_pattern(utf8_nfc, wc_nfc, utf16be_nfc, utf16le_nfc,
343		    nfc, 0);
344#endif
345
346		/*
347		 * Convert an NFD pattern to UTF-8 bytes.
348		 */
349		scan_unicode_pattern(utf8_nfd, wc_nfd, utf16be_nfd, utf16le_nfd,
350		    nfd, 0);
351
352		if (locale_is_utf8) {
353#if defined(__APPLE__)
354			/*
355			 * Normalize an NFC string for import.
356			 */
357			assertEqualInt(0, archive_strcpy_in_locale(
358			    &utf8, utf8_nfc, f_sconv8));
359			failure("NFC(%s) should be converted to NFD(%s):%d",
360			    nfc, nfd, line);
361			assertEqualUTF8String(utf8_nfd, utf8.s);
362
363			/*
364			 * Normalize an NFD string for import.
365			 */
366			assertEqualInt(0, archive_strcpy_in_locale(
367			    &utf8, utf8_nfd, f_sconv8));
368			failure("NFD(%s) should not be any changed:%d",
369			    nfd, line);
370			assertEqualUTF8String(utf8_nfd, utf8.s);
371
372			/*
373			 * Copy an NFD string for export.
374			 */
375			assertEqualInt(0, archive_strcpy_in_locale(
376			    &utf8, utf8_nfd, t_sconv8));
377			failure("NFD(%s) should not be any changed:%d",
378			    nfd, line);
379			assertEqualUTF8String(utf8_nfd, utf8.s);
380
381			/*
382			 * Normalize an NFC string in UTF-16BE for import.
383			 */
384			assertEqualInt(0, archive_strncpy_in_locale(
385			    &utf8, utf16be_nfc, 100000, f_sconv16be));
386			failure("NFC(%s) should be converted to NFD(%s):%d",
387			    nfc, nfd, line);
388			assertEqualUTF8String(utf8_nfd, utf8.s);
389
390			/*
391			 * Normalize an NFC string in UTF-16LE for import.
392			 */
393			assertEqualInt(0, archive_strncpy_in_locale(
394			    &utf8, utf16le_nfc, 100000, f_sconv16le));
395			failure("NFC(%s) should be converted to NFD(%s):%d",
396			    nfc, nfd, line);
397			assertEqualUTF8String(utf8_nfd, utf8.s);
398#else
399			/*
400			 * Normalize an NFD string for import.
401			 */
402			assertEqualInt(0, archive_strcpy_in_locale(
403			    &utf8, utf8_nfd, f_sconv8));
404			failure("NFD(%s) should be converted to NFC(%s):%d",
405			    nfd, nfc, line);
406			assertEqualUTF8String(utf8_nfc, utf8.s);
407
408			/*
409			 * Normalize an NFC string for import.
410			 */
411			assertEqualInt(0, archive_strcpy_in_locale(
412			    &utf8, utf8_nfc, f_sconv8));
413			failure("NFC(%s) should not be any changed:%d",
414			    nfc, line);
415			assertEqualUTF8String(utf8_nfc, utf8.s);
416
417			/*
418			 * Copy an NFC string for export.
419			 */
420			assertEqualInt(0, archive_strcpy_in_locale(
421			    &utf8, utf8_nfc, t_sconv8));
422			failure("NFC(%s) should not be any changed:%d",
423			    nfc, line);
424			assertEqualUTF8String(utf8_nfc, utf8.s);
425
426			/*
427			 * Normalize an NFD string in UTF-16BE for import.
428			 */
429			assertEqualInt(0, archive_strncpy_in_locale(
430			    &utf8, utf16be_nfd, 100000, f_sconv16be));
431			failure("NFD(%s) should be converted to NFC(%s):%d",
432			    nfd, nfc, line);
433			assertEqualUTF8String(utf8_nfc, utf8.s);
434
435			/*
436			 * Normalize an NFD string in UTF-16LE for import.
437			 */
438			assertEqualInt(0, archive_strncpy_in_locale(
439			    &utf8, utf16le_nfd, 100000, f_sconv16le));
440			failure("NFD(%s) should be converted to NFC(%s):%d",
441			    nfd, nfc, line);
442			assertEqualUTF8String(utf8_nfc, utf8.s);
443#endif
444		}
445
446		/*
447		 * Test for archive_mstring interface.
448		 * In specific, Windows platform UTF-16BE is directly
449		 * converted to/from wide-character to avoid the effect of
450		 * current locale since windows platform cannot make
451		 * locale UTF-8.
452		 */
453		if (locale_is_utf8 || wc_is_unicode) {
454			const wchar_t *wp;
455			const char *mp;
456			size_t mplen;
457
458#if defined(__APPLE__)
459			/*
460			 * Normalize an NFD string in UTF-8 for import.
461			 */
462			assertEqualInt(0, archive_mstring_copy_mbs_len_l(
463			    &mstr, utf8_nfc, 100000, f_sconv8));
464			assertEqualInt(0,
465			    archive_mstring_get_wcs(a, &mstr, &wp));
466			failure("UTF-8 NFC(%s) should be converted "
467			    "to WCS NFD(%s):%d", nfc, nfd, line);
468			assertEqualWString(wc_nfd, wp);
469
470			/*
471			 * Normalize an NFD string in UTF-16BE for import.
472			 */
473			assertEqualInt(0, archive_mstring_copy_mbs_len_l(
474			    &mstr, utf16be_nfc, 100000, f_sconv16be));
475			assertEqualInt(0,
476			    archive_mstring_get_wcs(a, &mstr, &wp));
477			failure("UTF-16BE NFC(%s) should be converted "
478			    "to WCS NFD(%s):%d", nfc, nfd, line);
479			assertEqualWString(wc_nfd, wp);
480
481			/*
482			 * Normalize an NFD string in UTF-16LE for import.
483			 */
484			assertEqualInt(0, archive_mstring_copy_mbs_len_l(
485			    &mstr, utf16le_nfc, 100000, f_sconv16le));
486			assertEqualInt(0,
487			    archive_mstring_get_wcs(a, &mstr, &wp));
488			failure("UTF-16LE NFC(%s) should be converted "
489			    "to WCS NFD(%s):%d", nfc, nfd, line);
490			assertEqualWString(wc_nfd, wp);
491
492			/*
493			 * Copy an NFD wide-string for export.
494			 */
495			assertEqualInt(0, archive_mstring_copy_wcs(
496			    &mstr, wc_nfd));
497			assertEqualInt(0, archive_mstring_get_mbs_l(
498			    &mstr, &mp, &mplen, t_sconv8));
499			failure("WCS NFD(%s) should be UTF-8 NFD:%d"
500			    ,nfd, line);
501			assertEqualUTF8String(utf8_nfd, mp);
502#else
503			/*
504			 * Normalize an NFD string in UTF-8 for import.
505			 */
506			assertEqualInt(0, archive_mstring_copy_mbs_len_l(
507			    &mstr, utf8_nfd, 100000, f_sconv8));
508			assertEqualInt(0,
509			    archive_mstring_get_wcs(a, &mstr, &wp));
510			failure("UTF-8 NFD(%s) should be converted "
511			    "to WCS NFC(%s):%d", nfd, nfc, line);
512			assertEqualWString(wc_nfc, wp);
513
514			/*
515			 * Normalize an NFD string in UTF-16BE for import.
516			 */
517			assertEqualInt(0, archive_mstring_copy_mbs_len_l(
518			    &mstr, utf16be_nfd, 100000, f_sconv16be));
519			assertEqualInt(0,
520			    archive_mstring_get_wcs(a, &mstr, &wp));
521			failure("UTF-8 NFD(%s) should be converted "
522			    "to WCS NFC(%s):%d", nfd, nfc, line);
523			assertEqualWString(wc_nfc, wp);
524
525			/*
526			 * Normalize an NFD string in UTF-16LE for import.
527			 */
528			assertEqualInt(0, archive_mstring_copy_mbs_len_l(
529			    &mstr, utf16le_nfd, 100000, f_sconv16le));
530			assertEqualInt(0,
531			    archive_mstring_get_wcs(a, &mstr, &wp));
532			failure("UTF-8 NFD(%s) should be converted "
533			    "to WCS NFC(%s):%d", nfd, nfc, line);
534			assertEqualWString(wc_nfc, wp);
535
536			/*
537			 * Copy an NFC wide-string for export.
538			 */
539			assertEqualInt(0, archive_mstring_copy_wcs(
540			    &mstr, wc_nfc));
541			assertEqualInt(0, archive_mstring_get_mbs_l(
542			    &mstr, &mp, &mplen, t_sconv8));
543			failure("WCS NFC(%s) should be UTF-8 NFC:%d"
544			    ,nfc, line);
545			assertEqualUTF8String(utf8_nfc, mp);
546#endif
547		}
548	}
549
550	archive_string_free(&utf8);
551	archive_mstring_clean(&mstr);
552	fclose(fp);
553	assertEqualInt(ARCHIVE_OK, archive_read_free(a));
554	assertEqualInt(ARCHIVE_OK, archive_write_free(a2));
555}
556
557static void
558test_archive_string_canonicalization(void)
559{
560	struct archive *a;
561	struct archive_string_conv *sconv;
562
563	setlocale(LC_ALL, "en_US.UTF-8");
564
565	assert((a = archive_read_new()) != NULL);
566
567	assertA(NULL != (sconv =
568	    archive_string_conversion_to_charset(a, "UTF-8", 1)));
569	failure("Charset name should be UTF-8");
570	assertEqualString("UTF-8",
571	    archive_string_conversion_charset_name(sconv));
572
573	assertA(NULL != (sconv =
574	    archive_string_conversion_to_charset(a, "UTF8", 1)));
575	failure("Charset name should be UTF-8");
576	assertEqualString("UTF-8",
577	    archive_string_conversion_charset_name(sconv));
578
579	assertA(NULL != (sconv =
580	    archive_string_conversion_to_charset(a, "utf8", 1)));
581	failure("Charset name should be UTF-8");
582	assertEqualString("UTF-8",
583	    archive_string_conversion_charset_name(sconv));
584
585	assertA(NULL != (sconv =
586	    archive_string_conversion_to_charset(a, "UTF-16BE", 1)));
587	failure("Charset name should be UTF-16BE");
588	assertEqualString("UTF-16BE",
589	    archive_string_conversion_charset_name(sconv));
590
591	assertA(NULL != (sconv =
592	    archive_string_conversion_to_charset(a, "UTF16BE", 1)));
593	failure("Charset name should be UTF-16BE");
594	assertEqualString("UTF-16BE",
595	    archive_string_conversion_charset_name(sconv));
596
597	assertA(NULL != (sconv =
598	    archive_string_conversion_to_charset(a, "utf16be", 1)));
599	failure("Charset name should be UTF-16BE");
600	assertEqualString("UTF-16BE",
601	    archive_string_conversion_charset_name(sconv));
602
603	assertA(NULL != (sconv =
604	    archive_string_conversion_to_charset(a, "UTF-16LE", 1)));
605	failure("Charset name should be UTF-16LE");
606	assertEqualString("UTF-16LE",
607	    archive_string_conversion_charset_name(sconv));
608
609	assertA(NULL != (sconv =
610	    archive_string_conversion_to_charset(a, "UTF16LE", 1)));
611	failure("Charset name should be UTF-16LE");
612	assertEqualString("UTF-16LE",
613	    archive_string_conversion_charset_name(sconv));
614
615	assertA(NULL != (sconv =
616	    archive_string_conversion_to_charset(a, "utf16le", 1)));
617	failure("Charset name should be UTF-16LE");
618	assertEqualString("UTF-16LE",
619	    archive_string_conversion_charset_name(sconv));
620
621	assertEqualInt(ARCHIVE_OK, archive_read_free(a));
622
623}
624
625DEFINE_TEST(test_archive_string_conversion)
626{
627	test_archive_string_normalization();
628	test_archive_string_canonicalization();
629}
630