1/* Shell format strings. 2 Copyright (C) 2003-2004, 2006-2007 Free Software Foundation, Inc. 3 Written by Bruno Haible <bruno@clisp.org>, 2003. 4 5 This program is free software: you can redistribute it and/or modify 6 it under the terms of the GNU General Public License as published by 7 the Free Software Foundation; either version 3 of the License, or 8 (at your option) any later version. 9 10 This program is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 GNU General Public License for more details. 14 15 You should have received a copy of the GNU General Public License 16 along with this program. If not, see <http://www.gnu.org/licenses/>. */ 17 18#ifdef HAVE_CONFIG_H 19# include <config.h> 20#endif 21 22#include <stdbool.h> 23#include <stdlib.h> 24#include <string.h> 25 26#include "format.h" 27#include "c-ctype.h" 28#include "xalloc.h" 29#include "format-invalid.h" 30#include "gettext.h" 31 32#define _(str) gettext (str) 33 34/* Shell format strings are simply strings subjects to variable substitution. 35 A variable substitution starts with '$' and is finished by either 36 - a nonempty sequence of alphanumeric ASCII characters, the first being 37 not a digit, or 38 - an opening brace '{', a nonempty sequence of alphanumeric ASCII 39 characters, the first being not a digit, and a closing brace '}'. 40 We don't support variable references like $1, $$ or $? since they make 41 no sense when 'envsubst' is invoked. 42 We don't support non-ASCII variable names, to avoid dependencies w.r.t. the 43 current encoding: While "${\xe0}" looks like a variable access in ISO-8859-1 44 encoding, it doesn't look like one in the BIG5, BIG5-HKSCS, GBK, GB18030, 45 SHIFT_JIS, JOHAB encodings, because \xe0\x7d is a single character in these 46 encodings. 47 We don't support the POSIX syntax for default or alternate values: 48 ${variable-default} ${variable:-default} 49 ${variable=default} ${variable:=default} 50 ${variable+replacement} ${variable:+replacement} 51 ${variable?ignored} ${variable:?ignored} 52 because the translator might be tempted to change the default value; if 53 we allow it we have a security problem; if we don't allow it the translator 54 will be surprised. 55 */ 56 57struct named_arg 58{ 59 char *name; 60}; 61 62struct spec 63{ 64 unsigned int directives; 65 unsigned int named_arg_count; 66 unsigned int allocated; 67 struct named_arg *named; 68}; 69 70 71static int 72named_arg_compare (const void *p1, const void *p2) 73{ 74 return strcmp (((const struct named_arg *) p1)->name, 75 ((const struct named_arg *) p2)->name); 76} 77 78#define INVALID_NON_ASCII_VARIABLE() \ 79 xstrdup (_("The string refers to a shell variable with a non-ASCII name.")) 80#define INVALID_SHELL_SYNTAX() \ 81 xstrdup (_("The string refers to a shell variable with complex shell brace syntax. This syntax is unsupported here due to security reasons.")) 82#define INVALID_CONTEXT_DEPENDENT_VARIABLE() \ 83 xstrdup (_("The string refers to a shell variable whose value may be different inside shell functions.")) 84#define INVALID_EMPTY_VARIABLE() \ 85 xstrdup (_("The string refers to a shell variable with an empty name.")) 86 87static void * 88format_parse (const char *format, bool translated, char *fdi, 89 char **invalid_reason) 90{ 91 const char *const format_start = format; 92 struct spec spec; 93 struct spec *result; 94 95 spec.directives = 0; 96 spec.named_arg_count = 0; 97 spec.allocated = 0; 98 spec.named = NULL; 99 100 for (; *format != '\0';) 101 if (*format++ == '$') 102 { 103 /* A variable substitution. */ 104 char *name; 105 106 FDI_SET (format - 1, FMTDIR_START); 107 spec.directives++; 108 109 if (*format == '{') 110 { 111 const char *name_start; 112 const char *name_end; 113 size_t n; 114 115 name_start = ++format; 116 for (; *format != '\0'; format++) 117 { 118 if (*format == '}') 119 break; 120 if (!c_isascii (*format)) 121 { 122 *invalid_reason = INVALID_NON_ASCII_VARIABLE (); 123 FDI_SET (format, FMTDIR_ERROR); 124 goto bad_format; 125 } 126 if (format > name_start 127 && (*format == '-' || *format == '=' || *format == '+' 128 || *format == '?' || *format == ':')) 129 { 130 *invalid_reason = INVALID_SHELL_SYNTAX (); 131 FDI_SET (format, FMTDIR_ERROR); 132 goto bad_format; 133 } 134 if (!(c_isalnum (*format) || *format == '_') 135 || (format == name_start && c_isdigit (*format))) 136 { 137 *invalid_reason = INVALID_CONTEXT_DEPENDENT_VARIABLE (); 138 FDI_SET (format, FMTDIR_ERROR); 139 goto bad_format; 140 } 141 } 142 if (*format == '\0') 143 { 144 *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE (); 145 FDI_SET (format - 1, FMTDIR_ERROR); 146 goto bad_format; 147 } 148 name_end = format++; 149 150 n = name_end - name_start; 151 if (n == 0) 152 { 153 *invalid_reason = INVALID_EMPTY_VARIABLE (); 154 FDI_SET (format - 1, FMTDIR_ERROR); 155 goto bad_format; 156 } 157 name = XNMALLOC (n + 1, char); 158 memcpy (name, name_start, n); 159 name[n] = '\0'; 160 } 161 else if (c_isalpha (*format) || *format == '_') 162 { 163 const char *name_start; 164 const char *name_end; 165 size_t n; 166 167 name_start = format; 168 do 169 format++; 170 while (*format != '\0' && (c_isalnum (*format) || *format == '_')); 171 name_end = format; 172 173 n = name_end - name_start; 174 name = XNMALLOC (n + 1, char); 175 memcpy (name, name_start, n); 176 name[n] = '\0'; 177 } 178 else if (*format != '\0') 179 { 180 if (!c_isascii (*format)) 181 { 182 *invalid_reason = INVALID_NON_ASCII_VARIABLE (); 183 FDI_SET (format, FMTDIR_ERROR); 184 goto bad_format; 185 } 186 else 187 { 188 *invalid_reason = INVALID_CONTEXT_DEPENDENT_VARIABLE (); 189 FDI_SET (format, FMTDIR_ERROR); 190 goto bad_format; 191 } 192 } 193 else 194 { 195 *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE (); 196 FDI_SET (format - 1, FMTDIR_ERROR); 197 goto bad_format; 198 } 199 200 /* Named argument. */ 201 if (spec.allocated == spec.named_arg_count) 202 { 203 spec.allocated = 2 * spec.allocated + 1; 204 spec.named = (struct named_arg *) xrealloc (spec.named, spec.allocated * sizeof (struct named_arg)); 205 } 206 spec.named[spec.named_arg_count].name = name; 207 spec.named_arg_count++; 208 209 FDI_SET (format - 1, FMTDIR_END); 210 } 211 212 /* Sort the named argument array, and eliminate duplicates. */ 213 if (spec.named_arg_count > 1) 214 { 215 unsigned int i, j; 216 217 qsort (spec.named, spec.named_arg_count, sizeof (struct named_arg), 218 named_arg_compare); 219 220 /* Remove duplicates: Copy from i to j, keeping 0 <= j <= i. */ 221 for (i = j = 0; i < spec.named_arg_count; i++) 222 if (j > 0 && strcmp (spec.named[i].name, spec.named[j-1].name) == 0) 223 free (spec.named[i].name); 224 else 225 { 226 if (j < i) 227 spec.named[j].name = spec.named[i].name; 228 j++; 229 } 230 spec.named_arg_count = j; 231 } 232 233 result = XMALLOC (struct spec); 234 *result = spec; 235 return result; 236 237 bad_format: 238 if (spec.named != NULL) 239 { 240 unsigned int i; 241 for (i = 0; i < spec.named_arg_count; i++) 242 free (spec.named[i].name); 243 free (spec.named); 244 } 245 return NULL; 246} 247 248static void 249format_free (void *descr) 250{ 251 struct spec *spec = (struct spec *) descr; 252 253 if (spec->named != NULL) 254 { 255 unsigned int i; 256 for (i = 0; i < spec->named_arg_count; i++) 257 free (spec->named[i].name); 258 free (spec->named); 259 } 260 free (spec); 261} 262 263static int 264format_get_number_of_directives (void *descr) 265{ 266 struct spec *spec = (struct spec *) descr; 267 268 return spec->directives; 269} 270 271static bool 272format_check (void *msgid_descr, void *msgstr_descr, bool equality, 273 formatstring_error_logger_t error_logger, 274 const char *pretty_msgstr) 275{ 276 struct spec *spec1 = (struct spec *) msgid_descr; 277 struct spec *spec2 = (struct spec *) msgstr_descr; 278 bool err = false; 279 280 if (spec1->named_arg_count + spec2->named_arg_count > 0) 281 { 282 unsigned int i, j; 283 unsigned int n1 = spec1->named_arg_count; 284 unsigned int n2 = spec2->named_arg_count; 285 286 /* Check the argument names are the same. 287 Both arrays are sorted. We search for the first difference. */ 288 for (i = 0, j = 0; i < n1 || j < n2; ) 289 { 290 int cmp = (i >= n1 ? 1 : 291 j >= n2 ? -1 : 292 strcmp (spec1->named[i].name, spec2->named[j].name)); 293 294 if (cmp > 0) 295 { 296 if (error_logger) 297 error_logger (_("a format specification for argument '%s', as in '%s', doesn't exist in 'msgid'"), 298 spec2->named[j].name, pretty_msgstr); 299 err = true; 300 break; 301 } 302 else if (cmp < 0) 303 { 304 if (equality) 305 { 306 if (error_logger) 307 error_logger (_("a format specification for argument '%s' doesn't exist in '%s'"), 308 spec1->named[i].name, pretty_msgstr); 309 err = true; 310 break; 311 } 312 else 313 i++; 314 } 315 else 316 j++, i++; 317 } 318 } 319 320 return err; 321} 322 323 324struct formatstring_parser formatstring_sh = 325{ 326 format_parse, 327 format_free, 328 format_get_number_of_directives, 329 NULL, 330 format_check 331}; 332 333 334#ifdef TEST 335 336/* Test program: Print the argument list specification returned by 337 format_parse for strings read from standard input. */ 338 339#include <stdio.h> 340 341static void 342format_print (void *descr) 343{ 344 struct spec *spec = (struct spec *) descr; 345 unsigned int i; 346 347 if (spec == NULL) 348 { 349 printf ("INVALID"); 350 return; 351 } 352 353 printf ("{"); 354 for (i = 0; i < spec->named_arg_count; i++) 355 { 356 if (i > 0) 357 printf (", "); 358 printf ("'%s'", spec->named[i].name); 359 } 360 printf ("}"); 361} 362 363int 364main () 365{ 366 for (;;) 367 { 368 char *line = NULL; 369 size_t line_size = 0; 370 int line_len; 371 char *invalid_reason; 372 void *descr; 373 374 line_len = getline (&line, &line_size, stdin); 375 if (line_len < 0) 376 break; 377 if (line_len > 0 && line[line_len - 1] == '\n') 378 line[--line_len] = '\0'; 379 380 invalid_reason = NULL; 381 descr = format_parse (line, false, NULL, &invalid_reason); 382 383 format_print (descr); 384 printf ("\n"); 385 if (descr == NULL) 386 printf ("%s\n", invalid_reason); 387 388 free (invalid_reason); 389 free (line); 390 } 391 392 return 0; 393} 394 395/* 396 * For Emacs M-x compile 397 * Local Variables: 398 * compile-command: "/bin/sh ../libtool --tag=CC --mode=link gcc -o a.out -static -O -g -Wall -I.. -I../gnulib-lib -I../intl -DHAVE_CONFIG_H -DTEST format-sh.c ../gnulib-lib/libgettextlib.la" 399 * End: 400 */ 401 402#endif /* TEST */ 403