suffix.c revision 292588
1///////////////////////////////////////////////////////////////////////////////
2//
3/// \file       suffix.c
4/// \brief      Checks filename suffix and creates the destination filename
5//
6//  Author:     Lasse Collin
7//
8//  This file has been put into the public domain.
9//  You can do whatever you want with this file.
10//
11///////////////////////////////////////////////////////////////////////////////
12
13#include "private.h"
14
15#ifdef __DJGPP__
16#	include <fcntl.h>
17#endif
18
19// For case-insensitive filename suffix on case-insensitive systems
20#if defined(TUKLIB_DOSLIKE) || defined(__VMS)
21#	define strcmp strcasecmp
22#endif
23
24
25static char *custom_suffix = NULL;
26
27
28/// \brief      Test if the char is a directory separator
29static bool
30is_dir_sep(char c)
31{
32#ifdef TUKLIB_DOSLIKE
33	return c == '/' || c == '\\' || c == ':';
34#else
35	return c == '/';
36#endif
37}
38
39
40/// \brief      Test if the string contains a directory separator
41static bool
42has_dir_sep(const char *str)
43{
44#ifdef TUKLIB_DOSLIKE
45	return strpbrk(str, "/\\:") != NULL;
46#else
47	return strchr(str, '/') != NULL;
48#endif
49}
50
51
52#ifdef __DJGPP__
53/// \brief      Test for special suffix used for 8.3 short filenames (SFN)
54///
55/// \return     If str matches *.?- or *.??-, true is returned. Otherwise
56///             false is returned.
57static bool
58has_sfn_suffix(const char *str, size_t len)
59{
60	if (len >= 4 && str[len - 1] == '-' && str[len - 2] != '.'
61			&& !is_dir_sep(str[len - 2])) {
62		// *.?-
63		if (str[len - 3] == '.')
64			return !is_dir_sep(str[len - 4]);
65
66		// *.??-
67		if (len >= 5 && !is_dir_sep(str[len - 3])
68				&& str[len - 4] == '.')
69			return !is_dir_sep(str[len - 5]);
70	}
71
72	return false;
73}
74#endif
75
76
77/// \brief      Checks if src_name has given compressed_suffix
78///
79/// \param      suffix      Filename suffix to look for
80/// \param      src_name    Input filename
81/// \param      src_len     strlen(src_name)
82///
83/// \return     If src_name has the suffix, src_len - strlen(suffix) is
84///             returned. It's always a positive integer. Otherwise zero
85///             is returned.
86static size_t
87test_suffix(const char *suffix, const char *src_name, size_t src_len)
88{
89	const size_t suffix_len = strlen(suffix);
90
91	// The filename must have at least one character in addition to
92	// the suffix. src_name may contain path to the filename, so we
93	// need to check for directory separator too.
94	if (src_len <= suffix_len
95			|| is_dir_sep(src_name[src_len - suffix_len - 1]))
96		return 0;
97
98	if (strcmp(suffix, src_name + src_len - suffix_len) == 0)
99		return src_len - suffix_len;
100
101	return 0;
102}
103
104
105/// \brief      Removes the filename suffix of the compressed file
106///
107/// \return     Name of the uncompressed file, or NULL if file has unknown
108///             suffix.
109static char *
110uncompressed_name(const char *src_name, const size_t src_len)
111{
112	static const struct {
113		const char *compressed;
114		const char *uncompressed;
115	} suffixes[] = {
116		{ ".xz",    "" },
117		{ ".txz",   ".tar" }, // .txz abbreviation for .txt.gz is rare.
118		{ ".lzma",  "" },
119#ifdef __DJGPP__
120		{ ".lzm",   "" },
121#endif
122		{ ".tlz",   ".tar" },
123		// { ".gz",    "" },
124		// { ".tgz",   ".tar" },
125	};
126
127	const char *new_suffix = "";
128	size_t new_len = 0;
129
130	if (opt_format == FORMAT_RAW) {
131		// Don't check for known suffixes when --format=raw was used.
132		if (custom_suffix == NULL) {
133			message_error(_("%s: With --format=raw, "
134					"--suffix=.SUF is required unless "
135					"writing to stdout"), src_name);
136			return NULL;
137		}
138	} else {
139		for (size_t i = 0; i < ARRAY_SIZE(suffixes); ++i) {
140			new_len = test_suffix(suffixes[i].compressed,
141					src_name, src_len);
142			if (new_len != 0) {
143				new_suffix = suffixes[i].uncompressed;
144				break;
145			}
146		}
147
148#ifdef __DJGPP__
149		// Support also *.?- -> *.? and *.??- -> *.?? on DOS.
150		// This is done also when long filenames are available
151		// to keep it easy to decompress files created when
152		// long filename support wasn't available.
153		if (new_len == 0 && has_sfn_suffix(src_name, src_len)) {
154			new_suffix = "";
155			new_len = src_len - 1;
156		}
157#endif
158	}
159
160	if (new_len == 0 && custom_suffix != NULL)
161		new_len = test_suffix(custom_suffix, src_name, src_len);
162
163	if (new_len == 0) {
164		message_warning(_("%s: Filename has an unknown suffix, "
165				"skipping"), src_name);
166		return NULL;
167	}
168
169	const size_t new_suffix_len = strlen(new_suffix);
170	char *dest_name = xmalloc(new_len + new_suffix_len + 1);
171
172	memcpy(dest_name, src_name, new_len);
173	memcpy(dest_name + new_len, new_suffix, new_suffix_len);
174	dest_name[new_len + new_suffix_len] = '\0';
175
176	return dest_name;
177}
178
179
180/// This message is needed in multiple places in compressed_name(),
181/// so the message has been put into its own function.
182static void
183msg_suffix(const char *src_name, const char *suffix)
184{
185	message_warning(_("%s: File already has `%s' suffix, skipping"),
186			src_name, suffix);
187	return;
188}
189
190
191/// \brief      Appends suffix to src_name
192///
193/// In contrast to uncompressed_name(), we check only suffixes that are valid
194/// for the specified file format.
195static char *
196compressed_name(const char *src_name, size_t src_len)
197{
198	// The order of these must match the order in args.h.
199	static const char *const all_suffixes[][4] = {
200		{
201			".xz",
202			".txz",
203			NULL
204		}, {
205			".lzma",
206#ifdef __DJGPP__
207			".lzm",
208#endif
209			".tlz",
210			NULL
211/*
212		}, {
213			".gz",
214			".tgz",
215			NULL
216*/
217		}, {
218			// --format=raw requires specifying the suffix
219			// manually or using stdout.
220			NULL
221		}
222	};
223
224	// args.c ensures this.
225	assert(opt_format != FORMAT_AUTO);
226
227	const size_t format = opt_format - 1;
228	const char *const *suffixes = all_suffixes[format];
229
230	// Look for known filename suffixes and refuse to compress them.
231	for (size_t i = 0; suffixes[i] != NULL; ++i) {
232		if (test_suffix(suffixes[i], src_name, src_len) != 0) {
233			msg_suffix(src_name, suffixes[i]);
234			return NULL;
235		}
236	}
237
238#ifdef __DJGPP__
239	// Recognize also the special suffix that is used when long
240	// filename (LFN) support isn't available. This suffix is
241	// recognized on LFN systems too.
242	if (opt_format == FORMAT_XZ && has_sfn_suffix(src_name, src_len)) {
243		msg_suffix(src_name, "-");
244		return NULL;
245	}
246#endif
247
248	if (custom_suffix != NULL) {
249		if (test_suffix(custom_suffix, src_name, src_len) != 0) {
250			msg_suffix(src_name, custom_suffix);
251			return NULL;
252		}
253	}
254
255	// TODO: Hmm, maybe it would be better to validate this in args.c,
256	// since the suffix handling when decoding is weird now.
257	if (opt_format == FORMAT_RAW && custom_suffix == NULL) {
258		message_error(_("%s: With --format=raw, "
259				"--suffix=.SUF is required unless "
260				"writing to stdout"), src_name);
261		return NULL;
262	}
263
264	const char *suffix = custom_suffix != NULL
265			? custom_suffix : suffixes[0];
266	size_t suffix_len = strlen(suffix);
267
268#ifdef __DJGPP__
269	if (!_use_lfn(src_name)) {
270		// Long filename (LFN) support isn't available and we are
271		// limited to 8.3 short filenames (SFN).
272		//
273		// Look for suffix separator from the filename, and make sure
274		// that it is in the filename, not in a directory name.
275		const char *sufsep = strrchr(src_name, '.');
276		if (sufsep == NULL || sufsep[1] == '\0'
277				|| has_dir_sep(sufsep)) {
278			// src_name has no filename extension.
279			//
280			// Examples:
281			// xz foo         -> foo.xz
282			// xz -F lzma foo -> foo.lzm
283			// xz -S x foo    -> foox
284			// xz -S x foo.   -> foo.x
285			// xz -S x.y foo  -> foox.y
286			// xz -S .x foo   -> foo.x
287			// xz -S .x foo.  -> foo.x
288			//
289			// Avoid double dots:
290			if (sufsep != NULL && sufsep[1] == '\0'
291					&& suffix[0] == '.')
292				--src_len;
293
294		} else if (custom_suffix == NULL
295				&& strcasecmp(sufsep, ".tar") == 0) {
296			// ".tar" is handled specially.
297			//
298			// Examples:
299			// xz foo.tar          -> foo.txz
300			// xz -F lzma foo.tar  -> foo.tlz
301			static const char *const tar_suffixes[] = {
302				".txz",
303				".tlz",
304				// ".tgz",
305			};
306			suffix = tar_suffixes[format];
307			suffix_len = 4;
308			src_len -= 4;
309
310		} else {
311			if (custom_suffix == NULL && opt_format == FORMAT_XZ) {
312				// Instead of the .xz suffix, use a single
313				// character at the end of the filename
314				// extension. This is to minimize name
315				// conflicts when compressing multiple files
316				// with the same basename. E.g. foo.txt and
317				// foo.exe become foo.tx- and foo.ex-. Dash
318				// is rare as the last character of the
319				// filename extension, so it seems to be
320				// quite safe choice and it stands out better
321				// in directory listings than e.g. x. For
322				// comparison, gzip uses z.
323				suffix = "-";
324				suffix_len = 1;
325			}
326
327			if (suffix[0] == '.') {
328				// The first character of the suffix is a dot.
329				// Throw away the original filename extension
330				// and replace it with the new suffix.
331				//
332				// Examples:
333				// xz -F lzma foo.txt  -> foo.lzm
334				// xz -S .x  foo.txt   -> foo.x
335				src_len = sufsep - src_name;
336
337			} else {
338				// The first character of the suffix is not
339				// a dot. Preserve the first 0-2 characters
340				// of the original filename extension.
341				//
342				// Examples:
343				// xz foo.txt         -> foo.tx-
344				// xz -S x  foo.c     -> foo.cx
345				// xz -S ab foo.c     -> foo.cab
346				// xz -S ab foo.txt   -> foo.tab
347				// xz -S abc foo.txt  -> foo.abc
348				//
349				// Truncate the suffix to three chars:
350				if (suffix_len > 3)
351					suffix_len = 3;
352
353				// If needed, overwrite 1-3 characters.
354				if (strlen(sufsep) > 4 - suffix_len)
355					src_len = sufsep - src_name
356							+ 4 - suffix_len;
357			}
358		}
359	}
360#endif
361
362	char *dest_name = xmalloc(src_len + suffix_len + 1);
363
364	memcpy(dest_name, src_name, src_len);
365	memcpy(dest_name + src_len, suffix, suffix_len);
366	dest_name[src_len + suffix_len] = '\0';
367
368	return dest_name;
369}
370
371
372extern char *
373suffix_get_dest_name(const char *src_name)
374{
375	assert(src_name != NULL);
376
377	// Length of the name is needed in all cases to locate the end of
378	// the string to compare the suffix, so calculate the length here.
379	const size_t src_len = strlen(src_name);
380
381	return opt_mode == MODE_COMPRESS
382			? compressed_name(src_name, src_len)
383			: uncompressed_name(src_name, src_len);
384}
385
386
387extern void
388suffix_set(const char *suffix)
389{
390	// Empty suffix and suffixes having a directory separator are
391	// rejected. Such suffixes would break things later.
392	if (suffix[0] == '\0' || has_dir_sep(suffix))
393		message_fatal(_("%s: Invalid filename suffix"), suffix);
394
395	// Replace the old custom_suffix (if any) with the new suffix.
396	free(custom_suffix);
397	custom_suffix = xstrdup(suffix);
398	return;
399}
400