1/* Reading binary .mo files. 2 Copyright (C) 1995-1998, 2000-2007 Free Software Foundation, Inc. 3 Written by Ulrich Drepper <drepper@gnu.ai.mit.edu>, April 1995. 4 5 This program is free software: you can redistribute it and/or modify 6 it under the terms of the GNU General Public License as published by 7 the Free Software Foundation; either version 3 of the License, or 8 (at your option) any later version. 9 10 This program is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 GNU General Public License for more details. 14 15 You should have received a copy of the GNU General Public License 16 along with this program. If not, see <http://www.gnu.org/licenses/>. */ 17 18#ifdef HAVE_CONFIG_H 19# include <config.h> 20#endif 21 22/* Specification. */ 23#include "read-mo.h" 24 25#include <errno.h> 26#include <stdbool.h> 27#include <stdio.h> 28#include <stddef.h> 29#include <stdlib.h> 30#include <string.h> 31 32/* This include file describes the main part of binary .mo format. */ 33#include "gmo.h" 34 35#include "error.h" 36#include "xalloc.h" 37#include "binary-io.h" 38#include "message.h" 39#include "format.h" 40#include "gettext.h" 41 42#define _(str) gettext (str) 43 44 45enum mo_endianness 46{ 47 MO_LITTLE_ENDIAN, 48 MO_BIG_ENDIAN 49}; 50 51/* We read the file completely into memory. This is more efficient than 52 lots of lseek(). This struct represents the .mo file in memory. */ 53struct binary_mo_file 54{ 55 const char *filename; 56 char *data; 57 size_t size; 58 enum mo_endianness endian; 59}; 60 61 62/* Read the contents of the given input stream. */ 63static void 64read_binary_mo_file (struct binary_mo_file *bfp, 65 FILE *fp, const char *filename) 66{ 67 char *buf = NULL; 68 size_t alloc = 0; 69 size_t size = 0; 70 size_t count; 71 72 while (!feof (fp)) 73 { 74 const size_t increment = 4096; 75 if (size + increment > alloc) 76 { 77 alloc = alloc + alloc / 2; 78 if (alloc < size + increment) 79 alloc = size + increment; 80 buf = (char *) xrealloc (buf, alloc); 81 } 82 count = fread (buf + size, 1, increment, fp); 83 if (count == 0) 84 { 85 if (ferror (fp)) 86 error (EXIT_FAILURE, errno, _("error while reading \"%s\""), 87 filename); 88 } 89 else 90 size += count; 91 } 92 buf = (char *) xrealloc (buf, size); 93 bfp->filename = filename; 94 bfp->data = buf; 95 bfp->size = size; 96} 97 98/* Get a 32-bit number from the file, at the given file position. */ 99static nls_uint32 100get_uint32 (const struct binary_mo_file *bfp, size_t offset) 101{ 102 nls_uint32 b0, b1, b2, b3; 103 104 if (offset + 4 > bfp->size) 105 error (EXIT_FAILURE, 0, _("file \"%s\" is truncated"), bfp->filename); 106 107 b0 = *(unsigned char *) (bfp->data + offset + 0); 108 b1 = *(unsigned char *) (bfp->data + offset + 1); 109 b2 = *(unsigned char *) (bfp->data + offset + 2); 110 b3 = *(unsigned char *) (bfp->data + offset + 3); 111 if (bfp->endian == MO_LITTLE_ENDIAN) 112 return b0 | (b1 << 8) | (b2 << 16) | (b3 << 24); 113 else 114 return (b0 << 24) | (b1 << 16) | (b2 << 8) | b3; 115} 116 117/* Get a static string from the file, at the given file position. */ 118static char * 119get_string (const struct binary_mo_file *bfp, size_t offset, size_t *lengthp) 120{ 121 /* See 'struct string_desc'. */ 122 nls_uint32 s_length = get_uint32 (bfp, offset); 123 nls_uint32 s_offset = get_uint32 (bfp, offset + 4); 124 125 if (s_offset + s_length + 1 > bfp->size) 126 error (EXIT_FAILURE, 0, _("file \"%s\" is truncated"), bfp->filename); 127 if (bfp->data[s_offset + s_length] != '\0') 128 error (EXIT_FAILURE, 0, 129 _("file \"%s\" contains a not NUL terminated string"), 130 bfp->filename); 131 132 *lengthp = s_length + 1; 133 return bfp->data + s_offset; 134} 135 136/* Get a system dependent string from the file, at the given file position. */ 137static char * 138get_sysdep_string (const struct binary_mo_file *bfp, size_t offset, 139 const struct mo_file_header *header, size_t *lengthp) 140{ 141 /* See 'struct sysdep_string'. */ 142 size_t length; 143 char *string; 144 size_t i; 145 char *p; 146 nls_uint32 s_offset; 147 148 /* Compute the length. */ 149 length = 0; 150 for (i = 4; ; i += 8) 151 { 152 nls_uint32 segsize = get_uint32 (bfp, offset + i); 153 nls_uint32 sysdepref = get_uint32 (bfp, offset + i + 4); 154 nls_uint32 sysdep_segment_offset; 155 nls_uint32 ss_length; 156 nls_uint32 ss_offset; 157 size_t n; 158 159 length += segsize; 160 161 if (sysdepref == SEGMENTS_END) 162 break; 163 if (sysdepref >= header->n_sysdep_segments) 164 /* Invalid. */ 165 error (EXIT_FAILURE, 0, _("file \"%s\" is not in GNU .mo format"), 166 bfp->filename); 167 /* See 'struct sysdep_segment'. */ 168 sysdep_segment_offset = header->sysdep_segments_offset + sysdepref * 8; 169 ss_length = get_uint32 (bfp, sysdep_segment_offset); 170 ss_offset = get_uint32 (bfp, sysdep_segment_offset + 4); 171 if (ss_offset + ss_length > bfp->size) 172 error (EXIT_FAILURE, 0, _("file \"%s\" is truncated"), bfp->filename); 173 if (!(ss_length > 0 && bfp->data[ss_offset + ss_length - 1] == '\0')) 174 { 175 char location[30]; 176 sprintf (location, "sysdep_segment[%u]", (unsigned int) sysdepref); 177 error (EXIT_FAILURE, 0, 178 _("file \"%s\" contains a not NUL terminated string, at %s"), 179 bfp->filename, location); 180 } 181 n = strlen (bfp->data + ss_offset); 182 length += (n > 1 ? 1 + n + 1 : n); 183 } 184 185 /* Allocate and fill the string. */ 186 string = XNMALLOC (length, char); 187 p = string; 188 s_offset = get_uint32 (bfp, offset); 189 for (i = 4; ; i += 8) 190 { 191 nls_uint32 segsize = get_uint32 (bfp, offset + i); 192 nls_uint32 sysdepref = get_uint32 (bfp, offset + i + 4); 193 nls_uint32 sysdep_segment_offset; 194 nls_uint32 ss_length; 195 nls_uint32 ss_offset; 196 size_t n; 197 198 if (s_offset + segsize > bfp->size) 199 error (EXIT_FAILURE, 0, _("file \"%s\" is truncated"), bfp->filename); 200 memcpy (p, bfp->data + s_offset, segsize); 201 p += segsize; 202 s_offset += segsize; 203 204 if (sysdepref == SEGMENTS_END) 205 break; 206 if (sysdepref >= header->n_sysdep_segments) 207 abort (); 208 /* See 'struct sysdep_segment'. */ 209 sysdep_segment_offset = header->sysdep_segments_offset + sysdepref * 8; 210 ss_length = get_uint32 (bfp, sysdep_segment_offset); 211 ss_offset = get_uint32 (bfp, sysdep_segment_offset + 4); 212 if (ss_offset + ss_length > bfp->size) 213 abort (); 214 if (!(ss_length > 0 && bfp->data[ss_offset + ss_length - 1] == '\0')) 215 abort (); 216 n = strlen (bfp->data + ss_offset); 217 if (n > 1) 218 *p++ = '<'; 219 memcpy (p, bfp->data + ss_offset, n); 220 p += n; 221 if (n > 1) 222 *p++ = '>'; 223 } 224 225 if (p != string + length) 226 abort (); 227 228 *lengthp = length; 229 return string; 230} 231 232/* Reads an existing .mo file and adds the messages to mlp. */ 233void 234read_mo_file (message_list_ty *mlp, const char *filename) 235{ 236 FILE *fp; 237 struct binary_mo_file bf; 238 struct mo_file_header header; 239 unsigned int i; 240 static lex_pos_ty pos = { __FILE__, __LINE__ }; 241 242 if (strcmp (filename, "-") == 0 || strcmp (filename, "/dev/stdin") == 0) 243 { 244 fp = stdin; 245 SET_BINARY (fileno (fp)); 246 } 247 else 248 { 249 fp = fopen (filename, "rb"); 250 if (fp == NULL) 251 error (EXIT_FAILURE, errno, 252 _("error while opening \"%s\" for reading"), filename); 253 } 254 255 /* Read the file contents into memory. */ 256 read_binary_mo_file (&bf, fp, filename); 257 258 /* Get a 32-bit number from the file header. */ 259# define GET_HEADER_FIELD(field) \ 260 get_uint32 (&bf, offsetof (struct mo_file_header, field)) 261 262 /* We must grope the file to determine which endian it is. 263 Perversity of the universe tends towards maximum, so it will 264 probably not match the currently executing architecture. */ 265 bf.endian = MO_BIG_ENDIAN; 266 header.magic = GET_HEADER_FIELD (magic); 267 if (header.magic != _MAGIC) 268 { 269 bf.endian = MO_LITTLE_ENDIAN; 270 header.magic = GET_HEADER_FIELD (magic); 271 if (header.magic != _MAGIC) 272 { 273 unrecognised: 274 error (EXIT_FAILURE, 0, _("file \"%s\" is not in GNU .mo format"), 275 filename); 276 } 277 } 278 279 header.revision = GET_HEADER_FIELD (revision); 280 281 /* We support only the major revisions 0 and 1. */ 282 switch (header.revision >> 16) 283 { 284 case 0: 285 case 1: 286 /* Fill the header parts that apply to major revisions 0 and 1. */ 287 header.nstrings = GET_HEADER_FIELD (nstrings); 288 header.orig_tab_offset = GET_HEADER_FIELD (orig_tab_offset); 289 header.trans_tab_offset = GET_HEADER_FIELD (trans_tab_offset); 290 header.hash_tab_size = GET_HEADER_FIELD (hash_tab_size); 291 header.hash_tab_offset = GET_HEADER_FIELD (hash_tab_offset); 292 293 for (i = 0; i < header.nstrings; i++) 294 { 295 message_ty *mp; 296 char *msgctxt; 297 char *msgid; 298 size_t msgid_len; 299 char *separator; 300 char *msgstr; 301 size_t msgstr_len; 302 303 /* Read the msgctxt and msgid. */ 304 msgid = get_string (&bf, header.orig_tab_offset + i * 8, 305 &msgid_len); 306 /* Split into msgctxt and msgid. */ 307 separator = strchr (msgid, MSGCTXT_SEPARATOR); 308 if (separator != NULL) 309 { 310 /* The part before the MSGCTXT_SEPARATOR is the msgctxt. */ 311 *separator = '\0'; 312 msgctxt = msgid; 313 msgid = separator + 1; 314 msgid_len -= msgid - msgctxt; 315 } 316 else 317 msgctxt = NULL; 318 319 /* Read the msgstr. */ 320 msgstr = get_string (&bf, header.trans_tab_offset + i * 8, 321 &msgstr_len); 322 323 mp = message_alloc (msgctxt, 324 msgid, 325 (strlen (msgid) + 1 < msgid_len 326 ? msgid + strlen (msgid) + 1 327 : NULL), 328 msgstr, msgstr_len, 329 &pos); 330 message_list_append (mlp, mp); 331 } 332 333 switch (header.revision & 0xffff) 334 { 335 case 0: 336 break; 337 case 1: 338 default: 339 /* Fill the header parts that apply to minor revision >= 1. */ 340 header.n_sysdep_segments = GET_HEADER_FIELD (n_sysdep_segments); 341 header.sysdep_segments_offset = 342 GET_HEADER_FIELD (sysdep_segments_offset); 343 header.n_sysdep_strings = GET_HEADER_FIELD (n_sysdep_strings); 344 header.orig_sysdep_tab_offset = 345 GET_HEADER_FIELD (orig_sysdep_tab_offset); 346 header.trans_sysdep_tab_offset = 347 GET_HEADER_FIELD (trans_sysdep_tab_offset); 348 349 for (i = 0; i < header.n_sysdep_strings; i++) 350 { 351 message_ty *mp; 352 char *msgctxt; 353 char *msgid; 354 size_t msgid_len; 355 char *separator; 356 char *msgstr; 357 size_t msgstr_len; 358 nls_uint32 offset; 359 size_t f; 360 361 /* Read the msgctxt and msgid. */ 362 offset = get_uint32 (&bf, header.orig_sysdep_tab_offset + i * 4); 363 msgid = get_sysdep_string (&bf, offset, &header, &msgid_len); 364 /* Split into msgctxt and msgid. */ 365 separator = strchr (msgid, MSGCTXT_SEPARATOR); 366 if (separator != NULL) 367 { 368 /* The part before the MSGCTXT_SEPARATOR is the msgctxt. */ 369 *separator = '\0'; 370 msgctxt = msgid; 371 msgid = separator + 1; 372 msgid_len -= msgid - msgctxt; 373 } 374 else 375 msgctxt = NULL; 376 377 /* Read the msgstr. */ 378 offset = get_uint32 (&bf, header.trans_sysdep_tab_offset + i * 4); 379 msgstr = get_sysdep_string (&bf, offset, &header, &msgstr_len); 380 381 mp = message_alloc (msgctxt, 382 msgid, 383 (strlen (msgid) + 1 < msgid_len 384 ? msgid + strlen (msgid) + 1 385 : NULL), 386 msgstr, msgstr_len, 387 &pos); 388 389 /* Only messages with c-format or objc-format annotation are 390 recognized as having system-dependent strings by msgfmt. 391 Which one of the two, we don't know. We have to guess, 392 assuming that c-format is more probable than objc-format and 393 that the .mo was likely produced by "msgfmt -c". */ 394 for (f = format_c; ; f = format_objc) 395 { 396 bool valid = true; 397 struct formatstring_parser *parser = formatstring_parsers[f]; 398 const char *str_end; 399 const char *str; 400 401 str_end = msgid + msgid_len; 402 for (str = msgid; str < str_end; str += strlen (str) + 1) 403 { 404 char *invalid_reason = NULL; 405 void *descr = 406 parser->parse (str, false, NULL, &invalid_reason); 407 408 if (descr != NULL) 409 parser->free (descr); 410 else 411 { 412 free (invalid_reason); 413 valid = false; 414 break; 415 } 416 } 417 if (valid) 418 { 419 str_end = msgstr + msgstr_len; 420 for (str = msgstr; str < str_end; str += strlen (str) + 1) 421 { 422 char *invalid_reason = NULL; 423 void *descr = 424 parser->parse (str, true, NULL, &invalid_reason); 425 426 if (descr != NULL) 427 parser->free (descr); 428 else 429 { 430 free (invalid_reason); 431 valid = false; 432 break; 433 } 434 } 435 } 436 437 if (valid) 438 { 439 /* Found the most likely among c-format, objc-format. */ 440 mp->is_format[f] = yes; 441 break; 442 } 443 444 /* Try next f. */ 445 if (f == format_objc) 446 break; 447 } 448 449 message_list_append (mlp, mp); 450 } 451 break; 452 } 453 break; 454 455 default: 456 goto unrecognised; 457 } 458 459 if (fp != stdin) 460 fclose (fp); 461} 462