1/*	$OpenBSD: gencat.c,v 1.22 2022/12/26 19:16:01 jmc Exp $	*/
2/*	$NetBSD: gencat.c,v 1.9 1998/10/09 17:00:56 itohy Exp $	*/
3
4/*-
5 * Copyright (c) 1996 The NetBSD Foundation, Inc.
6 * All rights reserved.
7 *
8 * This code is derived from software contributed to The NetBSD Foundation
9 * by J.T. Conklin.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33
34/***********************************************************
35Copyright 1990, by Alfalfa Software Incorporated, Cambridge, Massachusetts.
36
37                        All Rights Reserved
38
39Permission to use, copy, modify, and distribute this software and its
40documentation for any purpose and without fee is hereby granted,
41provided that the above copyright notice appear in all copies and that
42both that copyright notice and this permission notice appear in
43supporting documentation, and that Alfalfa's name not be used in
44advertising or publicity pertaining to distribution of the software
45without specific, written prior permission.
46
47ALPHALPHA DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
48ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
49ALPHALPHA BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
50ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
51WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
52ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
53SOFTWARE.
54
55If you make any modifications, bugfixes or other changes to this software
56we'd appreciate it if you could send a copy to us so we can keep things
57up-to-date.  Many thanks.
58				Kee Hinckley
59				Alfalfa Software, Inc.
60				267 Allston St., #3
61				Cambridge, MA 02139  USA
62				nazgul@alfalfa.com
63
64******************************************************************/
65
66#define _NLS_PRIVATE
67
68/* ensure 8-bit cleanliness */
69#define ISSPACE(c) \
70    (isascii((unsigned char)c) && isspace((unsigned char)c))
71
72#include <sys/queue.h>
73#include <ctype.h>
74#include <err.h>
75#include <fcntl.h>
76#include <nl_types.h>
77#include <stdio.h>
78#include <stdlib.h>
79#include <string.h>
80#include <unistd.h>
81
82struct _msgT {
83	long    msgId;
84	char   *str;
85        LIST_ENTRY(_msgT) entries;
86};
87
88struct _setT {
89	long    setId;
90        LIST_HEAD(msghead, _msgT) msghead;
91        LIST_ENTRY(_setT) entries;
92};
93
94LIST_HEAD(sethead, _setT) sethead;
95static struct _setT *curSet;
96
97static char *curline = NULL;
98static long lineno = 0;
99
100extern	char	*__progname;		/* from crt0.o */
101
102static	char   *cskip(char *);
103static	void	error(char *, char *);
104static	void	nomem(void);
105static	char   *get_line(int);
106static	char   *getmsg(int, char *, char);
107static	void	warning(char *, char *);
108static	char   *wskip(char *);
109static	char   *xstrdup(const char *);
110static	void   *xmalloc(size_t);
111static	void   *xrealloc(void *, size_t);
112
113void	MCParse(int fd);
114void	MCWriteCat(int fd);
115void	MCDelMsg(int msgId);
116void	MCAddMsg(int msgId, const char *msg);
117void	MCAddSet(int setId);
118void	MCDelSet(int setId);
119int	main(int, char **);
120void	usage(void);
121
122
123void
124usage(void)
125{
126	fprintf(stderr, "usage: %s catfile msgfile ...\n", __progname);
127	exit(1);
128}
129
130int
131main(int argc, char *argv[])
132{
133	int     ofd, ifd;
134	char   *catfile = NULL;
135	int     c;
136
137	if (pledge("stdio rpath wpath cpath", NULL) == -1)
138		err(1, "pledge");
139
140	while ((c = getopt(argc, argv, "")) != -1) {
141		switch (c) {
142		default:
143			usage();
144			/* NOTREACHED */
145		}
146	}
147	argc -= optind;
148	argv += optind;
149
150	if (argc < 2) {
151		usage();
152		/* NOTREACHED */
153	}
154	catfile = *argv++;
155
156	for (; *argv; argv++) {
157		if ((ifd = open(*argv, O_RDONLY)) == -1)
158			err(1, "Unable to read %s", *argv);
159		MCParse(ifd);
160		close(ifd);
161	}
162
163	if ((ofd = open(catfile, O_WRONLY | O_TRUNC | O_CREAT, 0666)) == -1)
164		err(1, "Unable to create a new %s", catfile);
165	MCWriteCat(ofd);
166	exit(0);
167}
168
169static void
170warning(char *cptr, char *msg)
171{
172	warnx("%s on line %ld\n%s", msg, lineno, curline);
173	if (cptr) {
174		char   *tptr;
175		for (tptr = curline; tptr < cptr; ++tptr)
176			putc(' ', stderr);
177		fprintf(stderr, "^\n");
178	}
179}
180
181static void
182error(char *cptr, char *msg)
183{
184	warning(cptr, msg);
185	exit(1);
186}
187
188static void
189nomem(void)
190{
191	error(NULL, "out of memory");
192}
193
194static void *
195xmalloc(size_t len)
196{
197	void   *p;
198
199	if ((p = malloc(len)) == NULL)
200		nomem();
201	return (p);
202}
203
204static void *
205xrealloc(void *ptr, size_t size)
206{
207	if ((ptr = realloc(ptr, size)) == NULL)
208		nomem();
209	return (ptr);
210}
211
212static char *
213xstrdup(const char *str)
214{
215	char *nstr;
216
217	if ((nstr = strdup(str)) == NULL)
218		nomem();
219	return (nstr);
220}
221
222static char *
223get_line(int fd)
224{
225	static long curlen = BUFSIZ;
226	static char buf[BUFSIZ], *bptr = buf, *bend = buf;
227	char   *cptr, *cend;
228	long    buflen;
229
230	if (!curline) {
231		curline = xmalloc(curlen);
232	}
233	++lineno;
234
235	cptr = curline;
236	cend = curline + curlen;
237	for (;;) {
238		for (; bptr < bend && cptr < cend; ++cptr, ++bptr) {
239			if (*bptr == '\n') {
240				*cptr = '\0';
241				++bptr;
242				return (curline);
243			} else
244				*cptr = *bptr;
245		}
246		if (bptr == bend) {
247			buflen = read(fd, buf, BUFSIZ);
248			if (buflen <= 0) {
249				if (cptr > curline) {
250					*cptr = '\0';
251					return (curline);
252				}
253				return (NULL);
254			}
255			bend = buf + buflen;
256			bptr = buf;
257		}
258		if (cptr == cend) {
259			cptr = curline = xrealloc(curline, curlen *= 2);
260			cend = curline + curlen;
261		}
262	}
263}
264
265static char *
266wskip(char *cptr)
267{
268	if (!*cptr || !ISSPACE(*cptr)) {
269		warning(cptr, "expected a space");
270		return (cptr);
271	}
272	while (*cptr && ISSPACE(*cptr))
273		++cptr;
274	return (cptr);
275}
276
277static char *
278cskip(char *cptr)
279{
280	if (!*cptr || ISSPACE(*cptr)) {
281		warning(cptr, "wasn't expecting a space");
282		return (cptr);
283	}
284	while (*cptr && !ISSPACE(*cptr))
285		++cptr;
286	return (cptr);
287}
288
289static char *
290getmsg(int fd, char *cptr, char quote)
291{
292	static char *msg = NULL;
293	static long msglen = 0;
294	long    clen, i;
295	char   *tptr;
296
297	if (quote && *cptr == quote) {
298		++cptr;
299	}
300
301	clen = strlen(cptr) + 1;
302	if (clen > msglen) {
303		if (msglen)
304			msg = xrealloc(msg, clen);
305		else
306			msg = xmalloc(clen);
307		msglen = clen;
308	}
309	tptr = msg;
310
311	while (*cptr) {
312		if (quote && *cptr == quote) {
313			char   *tmp;
314			tmp = cptr + 1;
315
316			if (*tmp && (!ISSPACE(*tmp) || *wskip(tmp))) {
317				warning(cptr, "unexpected quote character, ignoring");
318				*tptr++ = *cptr++;
319			} else {
320				*cptr = '\0';
321			}
322		} else if (*cptr == '\\') {
323			++cptr;
324			switch (*cptr) {
325			case '\0':
326				cptr = get_line(fd);
327				if (!cptr)
328					error(NULL, "premature end of file");
329				msglen += strlen(cptr);
330				i = tptr - msg;
331				msg = xrealloc(msg, msglen);
332				tptr = msg + i;
333				break;
334			case 'n':
335				*tptr++ = '\n';
336				++cptr;
337				break;
338			case 't':
339				*tptr++ = '\t';
340				++cptr;
341				break;
342			case 'v':
343				*tptr++ = '\v';
344				++cptr;
345				break;
346			case 'b':
347				*tptr++ = '\b';
348				++cptr;
349				break;
350			case 'r':
351				*tptr++ = '\r';
352				++cptr;
353				break;
354			case 'f':
355				*tptr++ = '\f';
356				++cptr;
357				break;
358			case '\\':
359				*tptr++ = '\\';
360				++cptr;
361				break;
362			case '"':
363				/* FALLTHROUGH */
364			case '\'':
365				/*
366				 * While it isn't necessary to
367				 * escape ' and ", let's accept
368				 * them escaped and not complain.
369				 * (XPG4 states that '\' should be
370				 * ignored when not used in a
371				 * valid escape sequence)
372				 */
373				*tptr++ = '"';
374				++cptr;
375				break;
376			default:
377				if (quote && *cptr == quote) {
378					*tptr++ = *cptr++;
379				} else if (isdigit((unsigned char) *cptr)) {
380					*tptr = 0;
381					for (i = 0; i < 3; ++i) {
382						if (!isdigit((unsigned char) *cptr))
383							break;
384						if (*cptr > '7')
385							warning(cptr, "octal number greater than 7?!");
386						*tptr *= 8;
387						*tptr += (*cptr - '0');
388						++cptr;
389					}
390				} else {
391					warning(cptr, "unrecognized escape sequence; ignoring escape character");
392				}
393				break;
394			}
395		} else {
396			*tptr++ = *cptr++;
397		}
398	}
399	*tptr = '\0';
400	return (msg);
401}
402
403void
404MCParse(int fd)
405{
406	char   *cptr, *str;
407	int     setid, msgid;
408	char    quote = 0;
409
410	/* XXX: init sethead? */
411
412	setid = 0;
413	while ((cptr = get_line(fd))) {
414		if (*cptr == '$') {
415			++cptr;
416			if (strncmp(cptr, "set", 3) == 0) {
417				cptr += 3;
418				cptr = wskip(cptr);
419				setid = atoi(cptr);
420				MCAddSet(setid);
421			} else if (strncmp(cptr, "delset", 6) == 0) {
422				cptr += 6;
423				cptr = wskip(cptr);
424				setid = atoi(cptr);
425				MCDelSet(setid);
426			} else if (strncmp(cptr, "quote", 5) == 0) {
427				cptr += 5;
428				if (!*cptr)
429					quote = 0;
430				else {
431					cptr = wskip(cptr);
432					if (!*cptr)
433						quote = 0;
434					else
435						quote = *cptr;
436				}
437			} else if (ISSPACE(*cptr)) {
438				;
439			} else {
440				if (*cptr) {
441					cptr = wskip(cptr);
442					if (*cptr)
443						warning(cptr, "unrecognized line");
444				}
445			}
446		} else {
447			/*
448			 * First check for (and eat) empty lines....
449			 */
450			if (!*cptr)
451				continue;
452			/*
453			 * We have a digit? Start of a message. Else,
454			 * syntax error.
455			 */
456			if (isdigit((unsigned char) *cptr)) {
457				msgid = atoi(cptr);
458				cptr = cskip(cptr);
459				cptr = wskip(cptr);
460				/* if (*cptr) ++cptr; */
461			} else {
462				warning(cptr, "neither blank line nor start of a message id");
463				continue;
464			}
465			if (setid == 0) {
466				setid = NL_SETD;
467				MCAddSet(setid);
468			}
469			/*
470			 * If we have a message ID, but no message,
471			 * then this means "delete this message id
472			 * from the catalog".
473			 */
474			if (!*cptr) {
475				MCDelMsg(msgid);
476			} else {
477				str = getmsg(fd, cptr, quote);
478				MCAddMsg(msgid, str);
479			}
480		}
481	}
482}
483
484/*
485 * Write message catalog.
486 *
487 * The message catalog is first converted from its internal to its
488 * external representation in a chunk of memory allocated for this
489 * purpose.  Then the completed catalog is written.  This approach
490 * avoids additional housekeeping variables and/or a lot of seeks
491 * that would otherwise be required.
492 */
493void
494MCWriteCat(int fd)
495{
496	int     nsets;		/* number of sets */
497	int     nmsgs;		/* number of msgs */
498	int     string_size;	/* total size of string pool */
499	int     msgcat_size;	/* total size of message catalog */
500	void   *msgcat;		/* message catalog data */
501	struct _nls_cat_hdr *cat_hdr;
502	struct _nls_set_hdr *set_hdr;
503	struct _nls_msg_hdr *msg_hdr;
504	char   *strings;
505	struct _setT *set;
506	struct _msgT *msg;
507	int     msg_index;
508	int     msg_offset;
509
510	/* determine number of sets, number of messages, and size of the
511	 * string pool */
512	nsets = 0;
513	nmsgs = 0;
514	string_size = 0;
515
516	LIST_FOREACH(set, &sethead, entries) {
517		nsets++;
518
519		LIST_FOREACH(msg, &set->msghead, entries) {
520			nmsgs++;
521			string_size += strlen(msg->str) + 1;
522		}
523	}
524
525#ifdef DEBUG
526	printf("number of sets: %d\n", nsets);
527	printf("number of msgs: %d\n", nmsgs);
528	printf("string pool size: %d\n", string_size);
529#endif
530
531	/* determine size and then allocate buffer for constructing external
532	 * message catalog representation */
533	msgcat_size = sizeof(struct _nls_cat_hdr)
534	    + (nsets * sizeof(struct _nls_set_hdr))
535	    + (nmsgs * sizeof(struct _nls_msg_hdr))
536	    + string_size;
537
538	msgcat = xmalloc(msgcat_size);
539	memset(msgcat, '\0', msgcat_size);
540
541	/* fill in msg catalog header */
542	cat_hdr = (struct _nls_cat_hdr *) msgcat;
543	cat_hdr->__magic = htonl(_NLS_MAGIC);
544	cat_hdr->__nsets = htonl(nsets);
545	cat_hdr->__mem = htonl(msgcat_size - sizeof(struct _nls_cat_hdr));
546	cat_hdr->__msg_hdr_offset =
547	    htonl(nsets * sizeof(struct _nls_set_hdr));
548	cat_hdr->__msg_txt_offset =
549	    htonl(nsets * sizeof(struct _nls_set_hdr) +
550	    nmsgs * sizeof(struct _nls_msg_hdr));
551
552	/* compute offsets for set & msg header tables and string pool */
553	set_hdr = (struct _nls_set_hdr *) ((char *) msgcat +
554	    sizeof(struct _nls_cat_hdr));
555	msg_hdr = (struct _nls_msg_hdr *) ((char *) msgcat +
556	    sizeof(struct _nls_cat_hdr) +
557	    nsets * sizeof(struct _nls_set_hdr));
558	strings = (char *) msgcat +
559	    sizeof(struct _nls_cat_hdr) +
560	    nsets * sizeof(struct _nls_set_hdr) +
561	    nmsgs * sizeof(struct _nls_msg_hdr);
562
563	msg_index = 0;
564	msg_offset = 0;
565	LIST_FOREACH(set, &sethead, entries) {
566
567		nmsgs = 0;
568		LIST_FOREACH(msg, &set->msghead, entries) {
569			int     msg_len = strlen(msg->str) + 1;
570
571			msg_hdr->__msgno = htonl(msg->msgId);
572			msg_hdr->__msglen = htonl(msg_len);
573			msg_hdr->__offset = htonl(msg_offset);
574
575			memcpy(strings, msg->str, msg_len);
576			strings += msg_len;
577			msg_offset += msg_len;
578
579			nmsgs++;
580			msg_hdr++;
581		}
582
583		set_hdr->__setno = htonl(set->setId);
584		set_hdr->__nmsgs = htonl(nmsgs);
585		set_hdr->__index = htonl(msg_index);
586		msg_index += nmsgs;
587		set_hdr++;
588	}
589
590	/* write out catalog.  XXX: should this be done in small chunks? */
591	write(fd, msgcat, msgcat_size);
592}
593
594void
595MCAddSet(int setId)
596{
597	struct _setT *p, *q;
598
599	if (setId <= 0) {
600		error(NULL, "setId's must be greater than zero");
601		/* NOTREACHED */
602	}
603#if 0
604	/* XXX */
605	if (setId > NL_SETMAX) {
606		error(NULL, "setId %d exceeds limit (%d)");
607		/* NOTREACHED */
608	}
609#endif
610
611	p = LIST_FIRST(&sethead);
612	q = NULL;
613	for (; p != NULL && p->setId < setId; q = p, p = LIST_NEXT(p, entries));
614
615	if (p && p->setId == setId) {
616		;
617	} else {
618		p = xmalloc(sizeof(struct _setT));
619		memset(p, '\0', sizeof(struct _setT));
620		LIST_INIT(&p->msghead);
621
622		p->setId = setId;
623
624		if (q == NULL) {
625			LIST_INSERT_HEAD(&sethead, p, entries);
626		} else {
627			LIST_INSERT_AFTER(q, p, entries);
628		}
629	}
630
631	curSet = p;
632}
633
634void
635MCAddMsg(int msgId, const char *str)
636{
637	struct _msgT *p, *q;
638
639	if (!curSet)
640		error(NULL, "can't specify a message when no set exists");
641
642	if (msgId <= 0) {
643		error(NULL, "msgId's must be greater than zero");
644		/* NOTREACHED */
645	}
646#if 0
647	/* XXX */
648	if (msgId > NL_SETMAX) {
649		error(NULL, "msgId %d exceeds limit (%d)");
650		/* NOTREACHED */
651	}
652#endif
653
654	p = LIST_FIRST(&curSet->msghead);
655	q = NULL;
656	for (; p != NULL && p->msgId < msgId; q = p, p = LIST_NEXT(p, entries));
657
658	if (p && p->msgId == msgId) {
659		free(p->str);
660	} else {
661		p = xmalloc(sizeof(struct _msgT));
662		memset(p, '\0', sizeof(struct _msgT));
663
664		if (q == NULL) {
665			LIST_INSERT_HEAD(&curSet->msghead, p, entries);
666		} else {
667			LIST_INSERT_AFTER(q, p, entries);
668		}
669	}
670
671	p->msgId = msgId;
672	p->str = xstrdup(str);
673}
674
675void
676MCDelSet(int setId)
677{
678	struct _setT *set;
679	struct _msgT *msg;
680
681	set = LIST_FIRST(&sethead);
682	for (; set != NULL && set->setId < setId;
683	    set = LIST_NEXT(set, entries));
684
685	if (set && set->setId == setId) {
686
687		msg = LIST_FIRST(&set->msghead);
688		while (msg) {
689			free(msg->str);
690			LIST_REMOVE(msg, entries);
691		}
692
693		LIST_REMOVE(set, entries);
694		return;
695	}
696	warning(NULL, "specified set doesn't exist");
697}
698
699void
700MCDelMsg(int msgId)
701{
702	struct _msgT *msg;
703
704	if (!curSet)
705		error(NULL, "you can't delete a message before defining the set");
706
707	msg = LIST_FIRST(&curSet->msghead);
708	for (; msg != NULL && msg->msgId < msgId;
709	    msg = LIST_NEXT(msg, entries));
710
711	if (msg && msg->msgId == msgId) {
712		free(msg->str);
713		LIST_REMOVE(msg, entries);
714		return;
715	}
716	warning(NULL, "specified msg doesn't exist");
717}
718