1/* Reading binary .mo files. 2 Copyright (C) 1995-1998, 2000-2006 Free Software Foundation, Inc. 3 Written by Ulrich Drepper <drepper@gnu.ai.mit.edu>, April 1995. 4 5 This program is free software; you can redistribute it and/or modify 6 it under the terms of the GNU General Public License as published by 7 the Free Software Foundation; either version 2, or (at your option) 8 any later version. 9 10 This program is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 GNU General Public License for more details. 14 15 You should have received a copy of the GNU General Public License 16 along with this program; if not, write to the Free Software Foundation, 17 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ 18 19#ifdef HAVE_CONFIG_H 20# include <config.h> 21#endif 22 23/* Specification. */ 24#include "read-mo.h" 25 26#include <errno.h> 27#include <stdbool.h> 28#include <stdio.h> 29#include <stddef.h> 30#include <stdlib.h> 31#include <string.h> 32 33/* This include file describes the main part of binary .mo format. */ 34#include "gmo.h" 35 36#include "error.h" 37#include "xalloc.h" 38#include "binary-io.h" 39#include "exit.h" 40#include "message.h" 41#include "format.h" 42#include "gettext.h" 43 44#define _(str) gettext (str) 45 46 47/* We read the file completely into memory. This is more efficient than 48 lots of lseek(). This struct represents the .mo file in memory. */ 49struct binary_mo_file 50{ 51 const char *filename; 52 char *data; 53 size_t size; 54 enum { MO_LITTLE_ENDIAN, MO_BIG_ENDIAN } endian; 55}; 56 57 58/* Read the contents of the given input stream. */ 59static void 60read_binary_mo_file (struct binary_mo_file *bfp, 61 FILE *fp, const char *filename) 62{ 63 char *buf = NULL; 64 size_t alloc = 0; 65 size_t size = 0; 66 size_t count; 67 68 while (!feof (fp)) 69 { 70 const size_t increment = 4096; 71 if (size + increment > alloc) 72 { 73 alloc = alloc + alloc / 2; 74 if (alloc < size + increment) 75 alloc = size + increment; 76 buf = (char *) xrealloc (buf, alloc); 77 } 78 count = fread (buf + size, 1, increment, fp); 79 if (count == 0) 80 { 81 if (ferror (fp)) 82 error (EXIT_FAILURE, errno, _("error while reading \"%s\""), 83 filename); 84 } 85 else 86 size += count; 87 } 88 buf = (char *) xrealloc (buf, size); 89 bfp->filename = filename; 90 bfp->data = buf; 91 bfp->size = size; 92} 93 94/* Get a 32-bit number from the file, at the given file position. */ 95static nls_uint32 96get_uint32 (const struct binary_mo_file *bfp, size_t offset) 97{ 98 nls_uint32 b0, b1, b2, b3; 99 100 if (offset + 4 > bfp->size) 101 error (EXIT_FAILURE, 0, _("file \"%s\" is truncated"), bfp->filename); 102 103 b0 = *(unsigned char *) (bfp->data + offset + 0); 104 b1 = *(unsigned char *) (bfp->data + offset + 1); 105 b2 = *(unsigned char *) (bfp->data + offset + 2); 106 b3 = *(unsigned char *) (bfp->data + offset + 3); 107 if (bfp->endian == MO_LITTLE_ENDIAN) 108 return b0 | (b1 << 8) | (b2 << 16) | (b3 << 24); 109 else 110 return (b0 << 24) | (b1 << 16) | (b2 << 8) | b3; 111} 112 113/* Get a static string from the file, at the given file position. */ 114static char * 115get_string (const struct binary_mo_file *bfp, size_t offset, size_t *lengthp) 116{ 117 /* See 'struct string_desc'. */ 118 nls_uint32 s_length = get_uint32 (bfp, offset); 119 nls_uint32 s_offset = get_uint32 (bfp, offset + 4); 120 121 if (s_offset + s_length + 1 > bfp->size) 122 error (EXIT_FAILURE, 0, _("file \"%s\" is truncated"), bfp->filename); 123 if (bfp->data[s_offset + s_length] != '\0') 124 error (EXIT_FAILURE, 0, 125 _("file \"%s\" contains a not NUL terminated string"), 126 bfp->filename); 127 128 *lengthp = s_length + 1; 129 return bfp->data + s_offset; 130} 131 132/* Get a system dependent string from the file, at the given file position. */ 133static char * 134get_sysdep_string (const struct binary_mo_file *bfp, size_t offset, 135 const struct mo_file_header *header, size_t *lengthp) 136{ 137 /* See 'struct sysdep_string'. */ 138 size_t length; 139 char *string; 140 size_t i; 141 char *p; 142 nls_uint32 s_offset; 143 144 /* Compute the length. */ 145 length = 0; 146 for (i = 4; ; i += 8) 147 { 148 nls_uint32 segsize = get_uint32 (bfp, offset + i); 149 nls_uint32 sysdepref = get_uint32 (bfp, offset + i + 4); 150 nls_uint32 sysdep_segment_offset; 151 nls_uint32 ss_length; 152 nls_uint32 ss_offset; 153 size_t n; 154 155 length += segsize; 156 157 if (sysdepref == SEGMENTS_END) 158 break; 159 if (sysdepref >= header->n_sysdep_segments) 160 /* Invalid. */ 161 error (EXIT_FAILURE, 0, _("file \"%s\" is not in GNU .mo format"), 162 bfp->filename); 163 /* See 'struct sysdep_segment'. */ 164 sysdep_segment_offset = header->sysdep_segments_offset + sysdepref * 8; 165 ss_length = get_uint32 (bfp, sysdep_segment_offset); 166 ss_offset = get_uint32 (bfp, sysdep_segment_offset + 4); 167 if (ss_offset + ss_length > bfp->size) 168 error (EXIT_FAILURE, 0, _("file \"%s\" is truncated"), bfp->filename); 169 if (!(ss_length > 0 && bfp->data[ss_offset + ss_length - 1] == '\0')) 170 { 171 char location[30]; 172 sprintf (location, "sysdep_segment[%u]", (unsigned int) sysdepref); 173 error (EXIT_FAILURE, 0, 174 _("file \"%s\" contains a not NUL terminated string, at %s"), 175 bfp->filename, location); 176 } 177 n = strlen (bfp->data + ss_offset); 178 length += (n > 1 ? 1 + n + 1 : n); 179 } 180 181 /* Allocate and fill the string. */ 182 string = (char *) xmalloc (length); 183 p = string; 184 s_offset = get_uint32 (bfp, offset); 185 for (i = 4; ; i += 8) 186 { 187 nls_uint32 segsize = get_uint32 (bfp, offset + i); 188 nls_uint32 sysdepref = get_uint32 (bfp, offset + i + 4); 189 nls_uint32 sysdep_segment_offset; 190 nls_uint32 ss_length; 191 nls_uint32 ss_offset; 192 size_t n; 193 194 if (s_offset + segsize > bfp->size) 195 error (EXIT_FAILURE, 0, _("file \"%s\" is truncated"), bfp->filename); 196 memcpy (p, bfp->data + s_offset, segsize); 197 p += segsize; 198 s_offset += segsize; 199 200 if (sysdepref == SEGMENTS_END) 201 break; 202 if (sysdepref >= header->n_sysdep_segments) 203 abort (); 204 /* See 'struct sysdep_segment'. */ 205 sysdep_segment_offset = header->sysdep_segments_offset + sysdepref * 8; 206 ss_length = get_uint32 (bfp, sysdep_segment_offset); 207 ss_offset = get_uint32 (bfp, sysdep_segment_offset + 4); 208 if (ss_offset + ss_length > bfp->size) 209 abort (); 210 if (!(ss_length > 0 && bfp->data[ss_offset + ss_length - 1] == '\0')) 211 abort (); 212 n = strlen (bfp->data + ss_offset); 213 if (n > 1) 214 *p++ = '<'; 215 memcpy (p, bfp->data + ss_offset, n); 216 p += n; 217 if (n > 1) 218 *p++ = '>'; 219 } 220 221 if (p != string + length) 222 abort (); 223 224 *lengthp = length; 225 return string; 226} 227 228/* Reads an existing .mo file and adds the messages to mlp. */ 229void 230read_mo_file (message_list_ty *mlp, const char *filename) 231{ 232 FILE *fp; 233 struct binary_mo_file bf; 234 struct mo_file_header header; 235 unsigned int i; 236 static lex_pos_ty pos = { __FILE__, __LINE__ }; 237 238 if (strcmp (filename, "-") == 0 || strcmp (filename, "/dev/stdin") == 0) 239 { 240 fp = stdin; 241 SET_BINARY (fileno (fp)); 242 } 243 else 244 { 245 fp = fopen (filename, "rb"); 246 if (fp == NULL) 247 error (EXIT_FAILURE, errno, 248 _("error while opening \"%s\" for reading"), filename); 249 } 250 251 /* Read the file contents into memory. */ 252 read_binary_mo_file (&bf, fp, filename); 253 254 /* Get a 32-bit number from the file header. */ 255# define GET_HEADER_FIELD(field) \ 256 get_uint32 (&bf, offsetof (struct mo_file_header, field)) 257 258 /* We must grope the file to determine which endian it is. 259 Perversity of the universe tends towards maximum, so it will 260 probably not match the currently executing architecture. */ 261 bf.endian = MO_BIG_ENDIAN; 262 header.magic = GET_HEADER_FIELD (magic); 263 if (header.magic != _MAGIC) 264 { 265 bf.endian = MO_LITTLE_ENDIAN; 266 header.magic = GET_HEADER_FIELD (magic); 267 if (header.magic != _MAGIC) 268 { 269 unrecognised: 270 error (EXIT_FAILURE, 0, _("file \"%s\" is not in GNU .mo format"), 271 filename); 272 } 273 } 274 275 header.revision = GET_HEADER_FIELD (revision); 276 277 /* We support only the major revisions 0 and 1. */ 278 switch (header.revision >> 16) 279 { 280 case 0: 281 case 1: 282 /* Fill the header parts that apply to major revisions 0 and 1. */ 283 header.nstrings = GET_HEADER_FIELD (nstrings); 284 header.orig_tab_offset = GET_HEADER_FIELD (orig_tab_offset); 285 header.trans_tab_offset = GET_HEADER_FIELD (trans_tab_offset); 286 header.hash_tab_size = GET_HEADER_FIELD (hash_tab_size); 287 header.hash_tab_offset = GET_HEADER_FIELD (hash_tab_offset); 288 289 for (i = 0; i < header.nstrings; i++) 290 { 291 message_ty *mp; 292 char *msgctxt; 293 char *msgid; 294 size_t msgid_len; 295 char *separator; 296 char *msgstr; 297 size_t msgstr_len; 298 299 /* Read the msgctxt and msgid. */ 300 msgid = get_string (&bf, header.orig_tab_offset + i * 8, 301 &msgid_len); 302 /* Split into msgctxt and msgid. */ 303 separator = strchr (msgid, MSGCTXT_SEPARATOR); 304 if (separator != NULL) 305 { 306 /* The part before the MSGCTXT_SEPARATOR is the msgctxt. */ 307 *separator = '\0'; 308 msgctxt = msgid; 309 msgid = separator + 1; 310 msgid_len -= msgid - msgctxt; 311 } 312 else 313 msgctxt = NULL; 314 315 /* Read the msgstr. */ 316 msgstr = get_string (&bf, header.trans_tab_offset + i * 8, 317 &msgstr_len); 318 319 mp = message_alloc (msgctxt, 320 msgid, 321 (strlen (msgid) + 1 < msgid_len 322 ? msgid + strlen (msgid) + 1 323 : NULL), 324 msgstr, msgstr_len, 325 &pos); 326 message_list_append (mlp, mp); 327 } 328 329 switch (header.revision & 0xffff) 330 { 331 case 0: 332 break; 333 case 1: 334 default: 335 /* Fill the header parts that apply to minor revision >= 1. */ 336 header.n_sysdep_segments = GET_HEADER_FIELD (n_sysdep_segments); 337 header.sysdep_segments_offset = 338 GET_HEADER_FIELD (sysdep_segments_offset); 339 header.n_sysdep_strings = GET_HEADER_FIELD (n_sysdep_strings); 340 header.orig_sysdep_tab_offset = 341 GET_HEADER_FIELD (orig_sysdep_tab_offset); 342 header.trans_sysdep_tab_offset = 343 GET_HEADER_FIELD (trans_sysdep_tab_offset); 344 345 for (i = 0; i < header.n_sysdep_strings; i++) 346 { 347 message_ty *mp; 348 char *msgctxt; 349 char *msgid; 350 size_t msgid_len; 351 char *separator; 352 char *msgstr; 353 size_t msgstr_len; 354 nls_uint32 offset; 355 size_t f; 356 357 /* Read the msgctxt and msgid. */ 358 offset = get_uint32 (&bf, header.orig_sysdep_tab_offset + i * 4); 359 msgid = get_sysdep_string (&bf, offset, &header, &msgid_len); 360 /* Split into msgctxt and msgid. */ 361 separator = strchr (msgid, MSGCTXT_SEPARATOR); 362 if (separator != NULL) 363 { 364 /* The part before the MSGCTXT_SEPARATOR is the msgctxt. */ 365 *separator = '\0'; 366 msgctxt = msgid; 367 msgid = separator + 1; 368 msgid_len -= msgid - msgctxt; 369 } 370 else 371 msgctxt = NULL; 372 373 /* Read the msgstr. */ 374 offset = get_uint32 (&bf, header.trans_sysdep_tab_offset + i * 4); 375 msgstr = get_sysdep_string (&bf, offset, &header, &msgstr_len); 376 377 mp = message_alloc (msgctxt, 378 msgid, 379 (strlen (msgid) + 1 < msgid_len 380 ? msgid + strlen (msgid) + 1 381 : NULL), 382 msgstr, msgstr_len, 383 &pos); 384 385 /* Only messages with c-format or objc-format annotation are 386 recognized as having system-dependent strings by msgfmt. 387 Which one of the two, we don't know. We have to guess, 388 assuming that c-format is more probable than objc-format and 389 that the .mo was likely produced by "msgfmt -c". */ 390 for (f = format_c; ; f = format_objc) 391 { 392 bool valid = true; 393 struct formatstring_parser *parser = formatstring_parsers[f]; 394 const char *str_end; 395 const char *str; 396 397 str_end = msgid + msgid_len; 398 for (str = msgid; str < str_end; str += strlen (str) + 1) 399 { 400 char *invalid_reason = NULL; 401 void *descr = parser->parse (str, false, &invalid_reason); 402 403 if (descr != NULL) 404 parser->free (descr); 405 else 406 { 407 free (invalid_reason); 408 valid = false; 409 break; 410 } 411 } 412 if (valid) 413 { 414 str_end = msgstr + msgstr_len; 415 for (str = msgstr; str < str_end; str += strlen (str) + 1) 416 { 417 char *invalid_reason = NULL; 418 void *descr = 419 parser->parse (str, true, &invalid_reason); 420 421 if (descr != NULL) 422 parser->free (descr); 423 else 424 { 425 free (invalid_reason); 426 valid = false; 427 break; 428 } 429 } 430 } 431 432 if (valid) 433 { 434 /* Found the most likely among c-format, objc-format. */ 435 mp->is_format[f] = yes; 436 break; 437 } 438 439 /* Try next f. */ 440 if (f == format_objc) 441 break; 442 } 443 444 message_list_append (mlp, mp); 445 } 446 break; 447 } 448 break; 449 450 default: 451 goto unrecognised; 452 } 453 454 if (fp != stdin) 455 fclose (fp); 456} 457