1207753Smm///////////////////////////////////////////////////////////////////////////////
2207753Smm//
3207753Smm/// \file       suffix.c
4207753Smm/// \brief      Checks filename suffix and creates the destination filename
5207753Smm//
6207753Smm//  Author:     Lasse Collin
7207753Smm//
8207753Smm//  This file has been put into the public domain.
9207753Smm//  You can do whatever you want with this file.
10207753Smm//
11207753Smm///////////////////////////////////////////////////////////////////////////////
12207753Smm
13207753Smm#include "private.h"
14207753Smm
15278433Srpaulo#ifdef __DJGPP__
16278433Srpaulo#	include <fcntl.h>
17278433Srpaulo#endif
18278433Srpaulo
19207753Smm// For case-insensitive filename suffix on case-insensitive systems
20207753Smm#if defined(TUKLIB_DOSLIKE) || defined(__VMS)
21207753Smm#	define strcmp strcasecmp
22207753Smm#endif
23207753Smm
24207753Smm
25207753Smmstatic char *custom_suffix = NULL;
26207753Smm
27207753Smm
28219001Smm/// \brief      Test if the char is a directory separator
29219001Smmstatic bool
30219001Smmis_dir_sep(char c)
31219001Smm{
32219001Smm#ifdef TUKLIB_DOSLIKE
33219001Smm	return c == '/' || c == '\\' || c == ':';
34219001Smm#else
35219001Smm	return c == '/';
36219001Smm#endif
37219001Smm}
38219001Smm
39219001Smm
40219001Smm/// \brief      Test if the string contains a directory separator
41219001Smmstatic bool
42219001Smmhas_dir_sep(const char *str)
43219001Smm{
44219001Smm#ifdef TUKLIB_DOSLIKE
45219001Smm	return strpbrk(str, "/\\:") != NULL;
46219001Smm#else
47219001Smm	return strchr(str, '/') != NULL;
48219001Smm#endif
49219001Smm}
50219001Smm
51219001Smm
52278433Srpaulo#ifdef __DJGPP__
53278433Srpaulo/// \brief      Test for special suffix used for 8.3 short filenames (SFN)
54278433Srpaulo///
55278433Srpaulo/// \return     If str matches *.?- or *.??-, true is returned. Otherwise
56278433Srpaulo///             false is returned.
57278433Srpaulostatic bool
58278433Srpaulohas_sfn_suffix(const char *str, size_t len)
59278433Srpaulo{
60278433Srpaulo	if (len >= 4 && str[len - 1] == '-' && str[len - 2] != '.'
61278433Srpaulo			&& !is_dir_sep(str[len - 2])) {
62278433Srpaulo		// *.?-
63278433Srpaulo		if (str[len - 3] == '.')
64278433Srpaulo			return !is_dir_sep(str[len - 4]);
65278433Srpaulo
66278433Srpaulo		// *.??-
67278433Srpaulo		if (len >= 5 && !is_dir_sep(str[len - 3])
68278433Srpaulo				&& str[len - 4] == '.')
69278433Srpaulo			return !is_dir_sep(str[len - 5]);
70278433Srpaulo	}
71278433Srpaulo
72278433Srpaulo	return false;
73278433Srpaulo}
74278433Srpaulo#endif
75278433Srpaulo
76278433Srpaulo
77207753Smm/// \brief      Checks if src_name has given compressed_suffix
78207753Smm///
79207753Smm/// \param      suffix      Filename suffix to look for
80207753Smm/// \param      src_name    Input filename
81207753Smm/// \param      src_len     strlen(src_name)
82207753Smm///
83207753Smm/// \return     If src_name has the suffix, src_len - strlen(suffix) is
84207753Smm///             returned. It's always a positive integer. Otherwise zero
85207753Smm///             is returned.
86207753Smmstatic size_t
87207753Smmtest_suffix(const char *suffix, const char *src_name, size_t src_len)
88207753Smm{
89207753Smm	const size_t suffix_len = strlen(suffix);
90207753Smm
91207753Smm	// The filename must have at least one character in addition to
92207753Smm	// the suffix. src_name may contain path to the filename, so we
93207753Smm	// need to check for directory separator too.
94219001Smm	if (src_len <= suffix_len
95219001Smm			|| is_dir_sep(src_name[src_len - suffix_len - 1]))
96207753Smm		return 0;
97207753Smm
98207753Smm	if (strcmp(suffix, src_name + src_len - suffix_len) == 0)
99207753Smm		return src_len - suffix_len;
100207753Smm
101207753Smm	return 0;
102207753Smm}
103207753Smm
104207753Smm
105207753Smm/// \brief      Removes the filename suffix of the compressed file
106207753Smm///
107207753Smm/// \return     Name of the uncompressed file, or NULL if file has unknown
108207753Smm///             suffix.
109207753Smmstatic char *
110207753Smmuncompressed_name(const char *src_name, const size_t src_len)
111207753Smm{
112223935Smm	static const struct {
113223935Smm		const char *compressed;
114223935Smm		const char *uncompressed;
115223935Smm	} suffixes[] = {
116207753Smm		{ ".xz",    "" },
117207753Smm		{ ".txz",   ".tar" }, // .txz abbreviation for .txt.gz is rare.
118207753Smm		{ ".lzma",  "" },
119278433Srpaulo#ifdef __DJGPP__
120278433Srpaulo		{ ".lzm",   "" },
121278433Srpaulo#endif
122207753Smm		{ ".tlz",   ".tar" },
123207753Smm		// { ".gz",    "" },
124207753Smm		// { ".tgz",   ".tar" },
125207753Smm	};
126207753Smm
127207753Smm	const char *new_suffix = "";
128207753Smm	size_t new_len = 0;
129207753Smm
130207753Smm	if (opt_format == FORMAT_RAW) {
131207753Smm		// Don't check for known suffixes when --format=raw was used.
132207753Smm		if (custom_suffix == NULL) {
133207753Smm			message_error(_("%s: With --format=raw, "
134207753Smm					"--suffix=.SUF is required unless "
135207753Smm					"writing to stdout"), src_name);
136207753Smm			return NULL;
137207753Smm		}
138207753Smm	} else {
139207753Smm		for (size_t i = 0; i < ARRAY_SIZE(suffixes); ++i) {
140207753Smm			new_len = test_suffix(suffixes[i].compressed,
141207753Smm					src_name, src_len);
142207753Smm			if (new_len != 0) {
143207753Smm				new_suffix = suffixes[i].uncompressed;
144207753Smm				break;
145207753Smm			}
146207753Smm		}
147278433Srpaulo
148278433Srpaulo#ifdef __DJGPP__
149278433Srpaulo		// Support also *.?- -> *.? and *.??- -> *.?? on DOS.
150278433Srpaulo		// This is done also when long filenames are available
151278433Srpaulo		// to keep it easy to decompress files created when
152278433Srpaulo		// long filename support wasn't available.
153278433Srpaulo		if (new_len == 0 && has_sfn_suffix(src_name, src_len)) {
154278433Srpaulo			new_suffix = "";
155278433Srpaulo			new_len = src_len - 1;
156278433Srpaulo		}
157278433Srpaulo#endif
158207753Smm	}
159207753Smm
160207753Smm	if (new_len == 0 && custom_suffix != NULL)
161207753Smm		new_len = test_suffix(custom_suffix, src_name, src_len);
162207753Smm
163207753Smm	if (new_len == 0) {
164207753Smm		message_warning(_("%s: Filename has an unknown suffix, "
165207753Smm				"skipping"), src_name);
166207753Smm		return NULL;
167207753Smm	}
168207753Smm
169207753Smm	const size_t new_suffix_len = strlen(new_suffix);
170207753Smm	char *dest_name = xmalloc(new_len + new_suffix_len + 1);
171207753Smm
172207753Smm	memcpy(dest_name, src_name, new_len);
173207753Smm	memcpy(dest_name + new_len, new_suffix, new_suffix_len);
174207753Smm	dest_name[new_len + new_suffix_len] = '\0';
175207753Smm
176207753Smm	return dest_name;
177207753Smm}
178207753Smm
179207753Smm
180278433Srpaulo/// This message is needed in multiple places in compressed_name(),
181278433Srpaulo/// so the message has been put into its own function.
182278433Srpaulostatic void
183278433Srpaulomsg_suffix(const char *src_name, const char *suffix)
184278433Srpaulo{
185278433Srpaulo	message_warning(_("%s: File already has `%s' suffix, skipping"),
186278433Srpaulo			src_name, suffix);
187278433Srpaulo	return;
188278433Srpaulo}
189278433Srpaulo
190278433Srpaulo
191207753Smm/// \brief      Appends suffix to src_name
192207753Smm///
193207753Smm/// In contrast to uncompressed_name(), we check only suffixes that are valid
194207753Smm/// for the specified file format.
195207753Smmstatic char *
196278433Srpaulocompressed_name(const char *src_name, size_t src_len)
197207753Smm{
198207753Smm	// The order of these must match the order in args.h.
199278433Srpaulo	static const char *const all_suffixes[][4] = {
200207753Smm		{
201223935Smm			".xz",
202223935Smm			".txz",
203223935Smm			NULL
204207753Smm		}, {
205223935Smm			".lzma",
206278433Srpaulo#ifdef __DJGPP__
207278433Srpaulo			".lzm",
208278433Srpaulo#endif
209223935Smm			".tlz",
210223935Smm			NULL
211207753Smm/*
212207753Smm		}, {
213223935Smm			".gz",
214223935Smm			".tgz",
215223935Smm			NULL
216207753Smm*/
217207753Smm		}, {
218207753Smm			// --format=raw requires specifying the suffix
219207753Smm			// manually or using stdout.
220223935Smm			NULL
221207753Smm		}
222207753Smm	};
223207753Smm
224207753Smm	// args.c ensures this.
225207753Smm	assert(opt_format != FORMAT_AUTO);
226207753Smm
227207753Smm	const size_t format = opt_format - 1;
228223935Smm	const char *const *suffixes = all_suffixes[format];
229207753Smm
230278433Srpaulo	// Look for known filename suffixes and refuse to compress them.
231223935Smm	for (size_t i = 0; suffixes[i] != NULL; ++i) {
232223935Smm		if (test_suffix(suffixes[i], src_name, src_len) != 0) {
233278433Srpaulo			msg_suffix(src_name, suffixes[i]);
234207753Smm			return NULL;
235207753Smm		}
236207753Smm	}
237207753Smm
238278433Srpaulo#ifdef __DJGPP__
239278433Srpaulo	// Recognize also the special suffix that is used when long
240278433Srpaulo	// filename (LFN) support isn't available. This suffix is
241278433Srpaulo	// recognized on LFN systems too.
242278433Srpaulo	if (opt_format == FORMAT_XZ && has_sfn_suffix(src_name, src_len)) {
243278433Srpaulo		msg_suffix(src_name, "-");
244278433Srpaulo		return NULL;
245278433Srpaulo	}
246278433Srpaulo#endif
247278433Srpaulo
248223935Smm	if (custom_suffix != NULL) {
249223935Smm		if (test_suffix(custom_suffix, src_name, src_len) != 0) {
250278433Srpaulo			msg_suffix(src_name, custom_suffix);
251223935Smm			return NULL;
252223935Smm		}
253223935Smm	}
254223935Smm
255207753Smm	// TODO: Hmm, maybe it would be better to validate this in args.c,
256207753Smm	// since the suffix handling when decoding is weird now.
257207753Smm	if (opt_format == FORMAT_RAW && custom_suffix == NULL) {
258207753Smm		message_error(_("%s: With --format=raw, "
259207753Smm				"--suffix=.SUF is required unless "
260207753Smm				"writing to stdout"), src_name);
261207753Smm		return NULL;
262207753Smm	}
263207753Smm
264207753Smm	const char *suffix = custom_suffix != NULL
265223935Smm			? custom_suffix : suffixes[0];
266278433Srpaulo	size_t suffix_len = strlen(suffix);
267207753Smm
268278433Srpaulo#ifdef __DJGPP__
269278433Srpaulo	if (!_use_lfn(src_name)) {
270278433Srpaulo		// Long filename (LFN) support isn't available and we are
271278433Srpaulo		// limited to 8.3 short filenames (SFN).
272278433Srpaulo		//
273278433Srpaulo		// Look for suffix separator from the filename, and make sure
274278433Srpaulo		// that it is in the filename, not in a directory name.
275278433Srpaulo		const char *sufsep = strrchr(src_name, '.');
276278433Srpaulo		if (sufsep == NULL || sufsep[1] == '\0'
277278433Srpaulo				|| has_dir_sep(sufsep)) {
278278433Srpaulo			// src_name has no filename extension.
279278433Srpaulo			//
280278433Srpaulo			// Examples:
281278433Srpaulo			// xz foo         -> foo.xz
282278433Srpaulo			// xz -F lzma foo -> foo.lzm
283278433Srpaulo			// xz -S x foo    -> foox
284278433Srpaulo			// xz -S x foo.   -> foo.x
285278433Srpaulo			// xz -S x.y foo  -> foox.y
286278433Srpaulo			// xz -S .x foo   -> foo.x
287278433Srpaulo			// xz -S .x foo.  -> foo.x
288278433Srpaulo			//
289278433Srpaulo			// Avoid double dots:
290278433Srpaulo			if (sufsep != NULL && sufsep[1] == '\0'
291278433Srpaulo					&& suffix[0] == '.')
292278433Srpaulo				--src_len;
293278433Srpaulo
294278433Srpaulo		} else if (custom_suffix == NULL
295278433Srpaulo				&& strcasecmp(sufsep, ".tar") == 0) {
296278433Srpaulo			// ".tar" is handled specially.
297278433Srpaulo			//
298278433Srpaulo			// Examples:
299278433Srpaulo			// xz foo.tar          -> foo.txz
300278433Srpaulo			// xz -F lzma foo.tar  -> foo.tlz
301278433Srpaulo			static const char *const tar_suffixes[] = {
302278433Srpaulo				".txz",
303278433Srpaulo				".tlz",
304278433Srpaulo				// ".tgz",
305278433Srpaulo			};
306278433Srpaulo			suffix = tar_suffixes[format];
307278433Srpaulo			suffix_len = 4;
308278433Srpaulo			src_len -= 4;
309278433Srpaulo
310278433Srpaulo		} else {
311278433Srpaulo			if (custom_suffix == NULL && opt_format == FORMAT_XZ) {
312278433Srpaulo				// Instead of the .xz suffix, use a single
313278433Srpaulo				// character at the end of the filename
314278433Srpaulo				// extension. This is to minimize name
315278433Srpaulo				// conflicts when compressing multiple files
316278433Srpaulo				// with the same basename. E.g. foo.txt and
317278433Srpaulo				// foo.exe become foo.tx- and foo.ex-. Dash
318278433Srpaulo				// is rare as the last character of the
319278433Srpaulo				// filename extension, so it seems to be
320278433Srpaulo				// quite safe choice and it stands out better
321278433Srpaulo				// in directory listings than e.g. x. For
322278433Srpaulo				// comparison, gzip uses z.
323278433Srpaulo				suffix = "-";
324278433Srpaulo				suffix_len = 1;
325278433Srpaulo			}
326278433Srpaulo
327278433Srpaulo			if (suffix[0] == '.') {
328278433Srpaulo				// The first character of the suffix is a dot.
329278433Srpaulo				// Throw away the original filename extension
330278433Srpaulo				// and replace it with the new suffix.
331278433Srpaulo				//
332278433Srpaulo				// Examples:
333278433Srpaulo				// xz -F lzma foo.txt  -> foo.lzm
334278433Srpaulo				// xz -S .x  foo.txt   -> foo.x
335278433Srpaulo				src_len = sufsep - src_name;
336278433Srpaulo
337278433Srpaulo			} else {
338278433Srpaulo				// The first character of the suffix is not
339278433Srpaulo				// a dot. Preserve the first 0-2 characters
340278433Srpaulo				// of the original filename extension.
341278433Srpaulo				//
342278433Srpaulo				// Examples:
343278433Srpaulo				// xz foo.txt         -> foo.tx-
344278433Srpaulo				// xz -S x  foo.c     -> foo.cx
345278433Srpaulo				// xz -S ab foo.c     -> foo.cab
346278433Srpaulo				// xz -S ab foo.txt   -> foo.tab
347278433Srpaulo				// xz -S abc foo.txt  -> foo.abc
348278433Srpaulo				//
349278433Srpaulo				// Truncate the suffix to three chars:
350278433Srpaulo				if (suffix_len > 3)
351278433Srpaulo					suffix_len = 3;
352278433Srpaulo
353278433Srpaulo				// If needed, overwrite 1-3 characters.
354278433Srpaulo				if (strlen(sufsep) > 4 - suffix_len)
355278433Srpaulo					src_len = sufsep - src_name
356278433Srpaulo							+ 4 - suffix_len;
357278433Srpaulo			}
358278433Srpaulo		}
359278433Srpaulo	}
360278433Srpaulo#endif
361278433Srpaulo
362207753Smm	char *dest_name = xmalloc(src_len + suffix_len + 1);
363207753Smm
364207753Smm	memcpy(dest_name, src_name, src_len);
365207753Smm	memcpy(dest_name + src_len, suffix, suffix_len);
366207753Smm	dest_name[src_len + suffix_len] = '\0';
367207753Smm
368207753Smm	return dest_name;
369207753Smm}
370207753Smm
371207753Smm
372207753Smmextern char *
373207753Smmsuffix_get_dest_name(const char *src_name)
374207753Smm{
375207753Smm	assert(src_name != NULL);
376207753Smm
377207753Smm	// Length of the name is needed in all cases to locate the end of
378207753Smm	// the string to compare the suffix, so calculate the length here.
379207753Smm	const size_t src_len = strlen(src_name);
380207753Smm
381207753Smm	return opt_mode == MODE_COMPRESS
382207753Smm			? compressed_name(src_name, src_len)
383207753Smm			: uncompressed_name(src_name, src_len);
384207753Smm}
385207753Smm
386207753Smm
387207753Smmextern void
388207753Smmsuffix_set(const char *suffix)
389207753Smm{
390219001Smm	// Empty suffix and suffixes having a directory separator are
391219001Smm	// rejected. Such suffixes would break things later.
392219001Smm	if (suffix[0] == '\0' || has_dir_sep(suffix))
393273498Sdelphij		message_fatal(_("%s: Invalid filename suffix"), suffix);
394207753Smm
395207753Smm	// Replace the old custom_suffix (if any) with the new suffix.
396207753Smm	free(custom_suffix);
397207753Smm	custom_suffix = xstrdup(suffix);
398207753Smm	return;
399207753Smm}
400