1207753Smm///////////////////////////////////////////////////////////////////////////////
2207753Smm//
3207753Smm/// \file       suffix.c
4207753Smm/// \brief      Checks filename suffix and creates the destination filename
5207753Smm//
6207753Smm//  Author:     Lasse Collin
7207753Smm//
8207753Smm//  This file has been put into the public domain.
9207753Smm//  You can do whatever you want with this file.
10207753Smm//
11207753Smm///////////////////////////////////////////////////////////////////////////////
12207753Smm
13207753Smm#include "private.h"
14207753Smm
15292588Sdelphij#ifdef __DJGPP__
16292588Sdelphij#	include <fcntl.h>
17292588Sdelphij#endif
18292588Sdelphij
19207753Smm// For case-insensitive filename suffix on case-insensitive systems
20207753Smm#if defined(TUKLIB_DOSLIKE) || defined(__VMS)
21207753Smm#	define strcmp strcasecmp
22207753Smm#endif
23207753Smm
24207753Smm
25207753Smmstatic char *custom_suffix = NULL;
26207753Smm
27207753Smm
28219001Smm/// \brief      Test if the char is a directory separator
29219001Smmstatic bool
30219001Smmis_dir_sep(char c)
31219001Smm{
32219001Smm#ifdef TUKLIB_DOSLIKE
33219001Smm	return c == '/' || c == '\\' || c == ':';
34219001Smm#else
35219001Smm	return c == '/';
36219001Smm#endif
37219001Smm}
38219001Smm
39219001Smm
40219001Smm/// \brief      Test if the string contains a directory separator
41219001Smmstatic bool
42219001Smmhas_dir_sep(const char *str)
43219001Smm{
44219001Smm#ifdef TUKLIB_DOSLIKE
45219001Smm	return strpbrk(str, "/\\:") != NULL;
46219001Smm#else
47219001Smm	return strchr(str, '/') != NULL;
48219001Smm#endif
49219001Smm}
50219001Smm
51219001Smm
52292588Sdelphij#ifdef __DJGPP__
53292588Sdelphij/// \brief      Test for special suffix used for 8.3 short filenames (SFN)
54292588Sdelphij///
55292588Sdelphij/// \return     If str matches *.?- or *.??-, true is returned. Otherwise
56292588Sdelphij///             false is returned.
57292588Sdelphijstatic bool
58292588Sdelphijhas_sfn_suffix(const char *str, size_t len)
59292588Sdelphij{
60292588Sdelphij	if (len >= 4 && str[len - 1] == '-' && str[len - 2] != '.'
61292588Sdelphij			&& !is_dir_sep(str[len - 2])) {
62292588Sdelphij		// *.?-
63292588Sdelphij		if (str[len - 3] == '.')
64292588Sdelphij			return !is_dir_sep(str[len - 4]);
65292588Sdelphij
66292588Sdelphij		// *.??-
67292588Sdelphij		if (len >= 5 && !is_dir_sep(str[len - 3])
68292588Sdelphij				&& str[len - 4] == '.')
69292588Sdelphij			return !is_dir_sep(str[len - 5]);
70292588Sdelphij	}
71292588Sdelphij
72292588Sdelphij	return false;
73292588Sdelphij}
74292588Sdelphij#endif
75292588Sdelphij
76292588Sdelphij
77207753Smm/// \brief      Checks if src_name has given compressed_suffix
78207753Smm///
79207753Smm/// \param      suffix      Filename suffix to look for
80207753Smm/// \param      src_name    Input filename
81207753Smm/// \param      src_len     strlen(src_name)
82207753Smm///
83207753Smm/// \return     If src_name has the suffix, src_len - strlen(suffix) is
84207753Smm///             returned. It's always a positive integer. Otherwise zero
85207753Smm///             is returned.
86207753Smmstatic size_t
87207753Smmtest_suffix(const char *suffix, const char *src_name, size_t src_len)
88207753Smm{
89207753Smm	const size_t suffix_len = strlen(suffix);
90207753Smm
91207753Smm	// The filename must have at least one character in addition to
92207753Smm	// the suffix. src_name may contain path to the filename, so we
93207753Smm	// need to check for directory separator too.
94219001Smm	if (src_len <= suffix_len
95219001Smm			|| is_dir_sep(src_name[src_len - suffix_len - 1]))
96207753Smm		return 0;
97207753Smm
98207753Smm	if (strcmp(suffix, src_name + src_len - suffix_len) == 0)
99207753Smm		return src_len - suffix_len;
100207753Smm
101207753Smm	return 0;
102207753Smm}
103207753Smm
104207753Smm
105207753Smm/// \brief      Removes the filename suffix of the compressed file
106207753Smm///
107207753Smm/// \return     Name of the uncompressed file, or NULL if file has unknown
108207753Smm///             suffix.
109207753Smmstatic char *
110207753Smmuncompressed_name(const char *src_name, const size_t src_len)
111207753Smm{
112223935Smm	static const struct {
113223935Smm		const char *compressed;
114223935Smm		const char *uncompressed;
115223935Smm	} suffixes[] = {
116207753Smm		{ ".xz",    "" },
117207753Smm		{ ".txz",   ".tar" }, // .txz abbreviation for .txt.gz is rare.
118207753Smm		{ ".lzma",  "" },
119292588Sdelphij#ifdef __DJGPP__
120292588Sdelphij		{ ".lzm",   "" },
121292588Sdelphij#endif
122207753Smm		{ ".tlz",   ".tar" },
123207753Smm		// { ".gz",    "" },
124207753Smm		// { ".tgz",   ".tar" },
125207753Smm	};
126207753Smm
127207753Smm	const char *new_suffix = "";
128207753Smm	size_t new_len = 0;
129207753Smm
130207753Smm	if (opt_format == FORMAT_RAW) {
131207753Smm		// Don't check for known suffixes when --format=raw was used.
132207753Smm		if (custom_suffix == NULL) {
133207753Smm			message_error(_("%s: With --format=raw, "
134207753Smm					"--suffix=.SUF is required unless "
135207753Smm					"writing to stdout"), src_name);
136207753Smm			return NULL;
137207753Smm		}
138207753Smm	} else {
139207753Smm		for (size_t i = 0; i < ARRAY_SIZE(suffixes); ++i) {
140207753Smm			new_len = test_suffix(suffixes[i].compressed,
141207753Smm					src_name, src_len);
142207753Smm			if (new_len != 0) {
143207753Smm				new_suffix = suffixes[i].uncompressed;
144207753Smm				break;
145207753Smm			}
146207753Smm		}
147292588Sdelphij
148292588Sdelphij#ifdef __DJGPP__
149292588Sdelphij		// Support also *.?- -> *.? and *.??- -> *.?? on DOS.
150292588Sdelphij		// This is done also when long filenames are available
151292588Sdelphij		// to keep it easy to decompress files created when
152292588Sdelphij		// long filename support wasn't available.
153292588Sdelphij		if (new_len == 0 && has_sfn_suffix(src_name, src_len)) {
154292588Sdelphij			new_suffix = "";
155292588Sdelphij			new_len = src_len - 1;
156292588Sdelphij		}
157292588Sdelphij#endif
158207753Smm	}
159207753Smm
160207753Smm	if (new_len == 0 && custom_suffix != NULL)
161207753Smm		new_len = test_suffix(custom_suffix, src_name, src_len);
162207753Smm
163207753Smm	if (new_len == 0) {
164207753Smm		message_warning(_("%s: Filename has an unknown suffix, "
165207753Smm				"skipping"), src_name);
166207753Smm		return NULL;
167207753Smm	}
168207753Smm
169207753Smm	const size_t new_suffix_len = strlen(new_suffix);
170207753Smm	char *dest_name = xmalloc(new_len + new_suffix_len + 1);
171207753Smm
172207753Smm	memcpy(dest_name, src_name, new_len);
173207753Smm	memcpy(dest_name + new_len, new_suffix, new_suffix_len);
174207753Smm	dest_name[new_len + new_suffix_len] = '\0';
175207753Smm
176207753Smm	return dest_name;
177207753Smm}
178207753Smm
179207753Smm
180292588Sdelphij/// This message is needed in multiple places in compressed_name(),
181292588Sdelphij/// so the message has been put into its own function.
182292588Sdelphijstatic void
183292588Sdelphijmsg_suffix(const char *src_name, const char *suffix)
184292588Sdelphij{
185292588Sdelphij	message_warning(_("%s: File already has `%s' suffix, skipping"),
186292588Sdelphij			src_name, suffix);
187292588Sdelphij	return;
188292588Sdelphij}
189292588Sdelphij
190292588Sdelphij
191207753Smm/// \brief      Appends suffix to src_name
192207753Smm///
193207753Smm/// In contrast to uncompressed_name(), we check only suffixes that are valid
194207753Smm/// for the specified file format.
195207753Smmstatic char *
196292588Sdelphijcompressed_name(const char *src_name, size_t src_len)
197207753Smm{
198207753Smm	// The order of these must match the order in args.h.
199292588Sdelphij	static const char *const all_suffixes[][4] = {
200207753Smm		{
201223935Smm			".xz",
202223935Smm			".txz",
203223935Smm			NULL
204207753Smm		}, {
205223935Smm			".lzma",
206292588Sdelphij#ifdef __DJGPP__
207292588Sdelphij			".lzm",
208292588Sdelphij#endif
209223935Smm			".tlz",
210223935Smm			NULL
211207753Smm/*
212207753Smm		}, {
213223935Smm			".gz",
214223935Smm			".tgz",
215223935Smm			NULL
216207753Smm*/
217207753Smm		}, {
218207753Smm			// --format=raw requires specifying the suffix
219207753Smm			// manually or using stdout.
220223935Smm			NULL
221207753Smm		}
222207753Smm	};
223207753Smm
224207753Smm	// args.c ensures this.
225207753Smm	assert(opt_format != FORMAT_AUTO);
226207753Smm
227207753Smm	const size_t format = opt_format - 1;
228223935Smm	const char *const *suffixes = all_suffixes[format];
229207753Smm
230292588Sdelphij	// Look for known filename suffixes and refuse to compress them.
231223935Smm	for (size_t i = 0; suffixes[i] != NULL; ++i) {
232223935Smm		if (test_suffix(suffixes[i], src_name, src_len) != 0) {
233292588Sdelphij			msg_suffix(src_name, suffixes[i]);
234207753Smm			return NULL;
235207753Smm		}
236207753Smm	}
237207753Smm
238292588Sdelphij#ifdef __DJGPP__
239292588Sdelphij	// Recognize also the special suffix that is used when long
240292588Sdelphij	// filename (LFN) support isn't available. This suffix is
241292588Sdelphij	// recognized on LFN systems too.
242292588Sdelphij	if (opt_format == FORMAT_XZ && has_sfn_suffix(src_name, src_len)) {
243292588Sdelphij		msg_suffix(src_name, "-");
244292588Sdelphij		return NULL;
245292588Sdelphij	}
246292588Sdelphij#endif
247292588Sdelphij
248223935Smm	if (custom_suffix != NULL) {
249223935Smm		if (test_suffix(custom_suffix, src_name, src_len) != 0) {
250292588Sdelphij			msg_suffix(src_name, custom_suffix);
251223935Smm			return NULL;
252223935Smm		}
253223935Smm	}
254223935Smm
255207753Smm	// TODO: Hmm, maybe it would be better to validate this in args.c,
256207753Smm	// since the suffix handling when decoding is weird now.
257207753Smm	if (opt_format == FORMAT_RAW && custom_suffix == NULL) {
258207753Smm		message_error(_("%s: With --format=raw, "
259207753Smm				"--suffix=.SUF is required unless "
260207753Smm				"writing to stdout"), src_name);
261207753Smm		return NULL;
262207753Smm	}
263207753Smm
264207753Smm	const char *suffix = custom_suffix != NULL
265223935Smm			? custom_suffix : suffixes[0];
266292588Sdelphij	size_t suffix_len = strlen(suffix);
267207753Smm
268292588Sdelphij#ifdef __DJGPP__
269292588Sdelphij	if (!_use_lfn(src_name)) {
270292588Sdelphij		// Long filename (LFN) support isn't available and we are
271292588Sdelphij		// limited to 8.3 short filenames (SFN).
272292588Sdelphij		//
273292588Sdelphij		// Look for suffix separator from the filename, and make sure
274292588Sdelphij		// that it is in the filename, not in a directory name.
275292588Sdelphij		const char *sufsep = strrchr(src_name, '.');
276292588Sdelphij		if (sufsep == NULL || sufsep[1] == '\0'
277292588Sdelphij				|| has_dir_sep(sufsep)) {
278292588Sdelphij			// src_name has no filename extension.
279292588Sdelphij			//
280292588Sdelphij			// Examples:
281292588Sdelphij			// xz foo         -> foo.xz
282292588Sdelphij			// xz -F lzma foo -> foo.lzm
283292588Sdelphij			// xz -S x foo    -> foox
284292588Sdelphij			// xz -S x foo.   -> foo.x
285292588Sdelphij			// xz -S x.y foo  -> foox.y
286292588Sdelphij			// xz -S .x foo   -> foo.x
287292588Sdelphij			// xz -S .x foo.  -> foo.x
288292588Sdelphij			//
289292588Sdelphij			// Avoid double dots:
290292588Sdelphij			if (sufsep != NULL && sufsep[1] == '\0'
291292588Sdelphij					&& suffix[0] == '.')
292292588Sdelphij				--src_len;
293292588Sdelphij
294292588Sdelphij		} else if (custom_suffix == NULL
295292588Sdelphij				&& strcasecmp(sufsep, ".tar") == 0) {
296292588Sdelphij			// ".tar" is handled specially.
297292588Sdelphij			//
298292588Sdelphij			// Examples:
299292588Sdelphij			// xz foo.tar          -> foo.txz
300292588Sdelphij			// xz -F lzma foo.tar  -> foo.tlz
301292588Sdelphij			static const char *const tar_suffixes[] = {
302292588Sdelphij				".txz",
303292588Sdelphij				".tlz",
304292588Sdelphij				// ".tgz",
305292588Sdelphij			};
306292588Sdelphij			suffix = tar_suffixes[format];
307292588Sdelphij			suffix_len = 4;
308292588Sdelphij			src_len -= 4;
309292588Sdelphij
310292588Sdelphij		} else {
311292588Sdelphij			if (custom_suffix == NULL && opt_format == FORMAT_XZ) {
312292588Sdelphij				// Instead of the .xz suffix, use a single
313292588Sdelphij				// character at the end of the filename
314292588Sdelphij				// extension. This is to minimize name
315292588Sdelphij				// conflicts when compressing multiple files
316292588Sdelphij				// with the same basename. E.g. foo.txt and
317292588Sdelphij				// foo.exe become foo.tx- and foo.ex-. Dash
318292588Sdelphij				// is rare as the last character of the
319292588Sdelphij				// filename extension, so it seems to be
320292588Sdelphij				// quite safe choice and it stands out better
321292588Sdelphij				// in directory listings than e.g. x. For
322292588Sdelphij				// comparison, gzip uses z.
323292588Sdelphij				suffix = "-";
324292588Sdelphij				suffix_len = 1;
325292588Sdelphij			}
326292588Sdelphij
327292588Sdelphij			if (suffix[0] == '.') {
328292588Sdelphij				// The first character of the suffix is a dot.
329292588Sdelphij				// Throw away the original filename extension
330292588Sdelphij				// and replace it with the new suffix.
331292588Sdelphij				//
332292588Sdelphij				// Examples:
333292588Sdelphij				// xz -F lzma foo.txt  -> foo.lzm
334292588Sdelphij				// xz -S .x  foo.txt   -> foo.x
335292588Sdelphij				src_len = sufsep - src_name;
336292588Sdelphij
337292588Sdelphij			} else {
338292588Sdelphij				// The first character of the suffix is not
339292588Sdelphij				// a dot. Preserve the first 0-2 characters
340292588Sdelphij				// of the original filename extension.
341292588Sdelphij				//
342292588Sdelphij				// Examples:
343292588Sdelphij				// xz foo.txt         -> foo.tx-
344292588Sdelphij				// xz -S x  foo.c     -> foo.cx
345292588Sdelphij				// xz -S ab foo.c     -> foo.cab
346292588Sdelphij				// xz -S ab foo.txt   -> foo.tab
347292588Sdelphij				// xz -S abc foo.txt  -> foo.abc
348292588Sdelphij				//
349292588Sdelphij				// Truncate the suffix to three chars:
350292588Sdelphij				if (suffix_len > 3)
351292588Sdelphij					suffix_len = 3;
352292588Sdelphij
353292588Sdelphij				// If needed, overwrite 1-3 characters.
354292588Sdelphij				if (strlen(sufsep) > 4 - suffix_len)
355292588Sdelphij					src_len = sufsep - src_name
356292588Sdelphij							+ 4 - suffix_len;
357292588Sdelphij			}
358292588Sdelphij		}
359292588Sdelphij	}
360292588Sdelphij#endif
361292588Sdelphij
362207753Smm	char *dest_name = xmalloc(src_len + suffix_len + 1);
363207753Smm
364207753Smm	memcpy(dest_name, src_name, src_len);
365207753Smm	memcpy(dest_name + src_len, suffix, suffix_len);
366207753Smm	dest_name[src_len + suffix_len] = '\0';
367207753Smm
368207753Smm	return dest_name;
369207753Smm}
370207753Smm
371207753Smm
372207753Smmextern char *
373207753Smmsuffix_get_dest_name(const char *src_name)
374207753Smm{
375207753Smm	assert(src_name != NULL);
376207753Smm
377207753Smm	// Length of the name is needed in all cases to locate the end of
378207753Smm	// the string to compare the suffix, so calculate the length here.
379207753Smm	const size_t src_len = strlen(src_name);
380207753Smm
381207753Smm	return opt_mode == MODE_COMPRESS
382207753Smm			? compressed_name(src_name, src_len)
383207753Smm			: uncompressed_name(src_name, src_len);
384207753Smm}
385207753Smm
386207753Smm
387207753Smmextern void
388207753Smmsuffix_set(const char *suffix)
389207753Smm{
390219001Smm	// Empty suffix and suffixes having a directory separator are
391219001Smm	// rejected. Such suffixes would break things later.
392219001Smm	if (suffix[0] == '\0' || has_dir_sep(suffix))
393274261Sdelphij		message_fatal(_("%s: Invalid filename suffix"), suffix);
394207753Smm
395207753Smm	// Replace the old custom_suffix (if any) with the new suffix.
396207753Smm	free(custom_suffix);
397207753Smm	custom_suffix = xstrdup(suffix);
398207753Smm	return;
399207753Smm}
400