1%{
2/*
3 * Copyright 2010 Nexenta Systems, Inc.  All rights reserved.
4 * Copyright 2015 John Marino <draco@marino.st>
5 *
6 * This source code is derived from the illumos localedef command, and
7 * provided under BSD-style license terms by Nexenta Systems, Inc.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 *
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
23 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 *
31 * $FreeBSD: stable/11/usr.bin/localedef/parser.y 308330 2016-11-05 09:46:48Z bapt $
32 */
33
34/*
35 * POSIX localedef grammar.
36 */
37
38#include <wchar.h>
39#include <stdio.h>
40#include <limits.h>
41#include "localedef.h"
42
43%}
44%union {
45	int		num;
46	wchar_t		wc;
47	char		*token;
48	collsym_t	*collsym;
49	collelem_t	*collelem;
50}
51
52%token		T_CODE_SET
53%token		T_MB_CUR_MAX
54%token		T_MB_CUR_MIN
55%token		T_COM_CHAR
56%token		T_ESC_CHAR
57%token		T_LT
58%token		T_GT
59%token		T_NL
60%token		T_SEMI
61%token		T_COMMA
62%token		T_ELLIPSIS
63%token		T_RPAREN
64%token		T_LPAREN
65%token		T_QUOTE
66%token		T_NULL
67%token		T_WS
68%token		T_END
69%token		T_COPY
70%token		T_CHARMAP
71%token		T_WIDTH
72%token		T_CTYPE
73%token		T_ISUPPER
74%token		T_ISLOWER
75%token		T_ISALPHA
76%token		T_ISDIGIT
77%token		T_ISPUNCT
78%token		T_ISXDIGIT
79%token		T_ISSPACE
80%token		T_ISPRINT
81%token		T_ISGRAPH
82%token		T_ISBLANK
83%token		T_ISCNTRL
84%token		T_ISALNUM
85%token		T_ISSPECIAL
86%token		T_ISPHONOGRAM
87%token		T_ISIDEOGRAM
88%token		T_ISENGLISH
89%token		T_ISNUMBER
90%token		T_TOUPPER
91%token		T_TOLOWER
92%token		T_COLLATE
93%token		T_COLLATING_SYMBOL
94%token		T_COLLATING_ELEMENT
95%token		T_ORDER_START
96%token		T_ORDER_END
97%token		T_FORWARD
98%token		T_BACKWARD
99%token		T_POSITION
100%token		T_FROM
101%token		T_UNDEFINED
102%token		T_IGNORE
103%token		T_MESSAGES
104%token		T_YESSTR
105%token		T_NOSTR
106%token		T_YESEXPR
107%token		T_NOEXPR
108%token		T_MONETARY
109%token		T_INT_CURR_SYMBOL
110%token		T_CURRENCY_SYMBOL
111%token		T_MON_DECIMAL_POINT
112%token		T_MON_THOUSANDS_SEP
113%token		T_POSITIVE_SIGN
114%token		T_NEGATIVE_SIGN
115%token		T_MON_GROUPING
116%token		T_INT_FRAC_DIGITS
117%token		T_FRAC_DIGITS
118%token		T_P_CS_PRECEDES
119%token		T_P_SEP_BY_SPACE
120%token		T_N_CS_PRECEDES
121%token		T_N_SEP_BY_SPACE
122%token		T_P_SIGN_POSN
123%token		T_N_SIGN_POSN
124%token		T_INT_P_CS_PRECEDES
125%token		T_INT_N_CS_PRECEDES
126%token		T_INT_P_SEP_BY_SPACE
127%token		T_INT_N_SEP_BY_SPACE
128%token		T_INT_P_SIGN_POSN
129%token		T_INT_N_SIGN_POSN
130%token		T_NUMERIC
131%token		T_DECIMAL_POINT
132%token		T_THOUSANDS_SEP
133%token		T_GROUPING
134%token		T_TIME
135%token		T_ABDAY
136%token		T_DAY
137%token		T_ABMON
138%token		T_MON
139%token		T_ERA
140%token		T_ERA_D_FMT
141%token		T_ERA_T_FMT
142%token		T_ERA_D_T_FMT
143%token		T_ALT_DIGITS
144%token		T_D_T_FMT
145%token		T_D_FMT
146%token		T_T_FMT
147%token		T_AM_PM
148%token		T_T_FMT_AMPM
149%token		T_DATE_FMT
150%token	<wc>		T_CHAR
151%token	<token>		T_NAME
152%token	<num>		T_NUMBER
153%token	<token>		T_SYMBOL
154%token	<collsym>	T_COLLSYM
155%token	<collelem>	T_COLLELEM
156
157%%
158
159localedef	: setting_list categories
160		| categories
161		;
162
163string		: T_QUOTE charlist T_QUOTE
164		| T_QUOTE T_QUOTE
165		;
166
167charlist	: charlist T_CHAR
168		{
169			add_wcs($2);
170		}
171		| T_CHAR
172		{
173			add_wcs($1);
174		}
175		;
176
177setting_list	: setting_list setting
178		| setting
179		;
180
181
182setting		: T_COM_CHAR T_CHAR T_NL
183		{
184			com_char = $2;
185		}
186		| T_ESC_CHAR T_CHAR T_NL
187		{
188			esc_char = $2;
189		}
190		| T_MB_CUR_MAX T_NUMBER T_NL
191		{
192			mb_cur_max = $2;
193		}
194		| T_MB_CUR_MIN T_NUMBER T_NL
195		{
196			mb_cur_min = $2;
197		}
198		| T_CODE_SET string T_NL
199		{
200			wchar_t *w = get_wcs();
201			set_wide_encoding(to_mb_string(w));
202			free(w);
203		}
204		| T_CODE_SET T_NAME T_NL
205		{
206			set_wide_encoding($2);
207		}
208		;
209
210copycat		: T_COPY T_NAME T_NL
211		{
212			copy_category($2);
213		}
214		| T_COPY string T_NL
215		{
216			wchar_t *w = get_wcs();
217			copy_category(to_mb_string(w));
218			free(w);
219		}
220		;
221
222categories	: categories category
223		| category
224		;
225
226
227category	: charmap
228		| messages
229		| monetary
230		| ctype
231		| collate
232		| numeric
233		| time
234		;
235
236
237charmap		: T_CHARMAP T_NL charmap_list T_END T_CHARMAP T_NL
238		| T_WIDTH T_NL width_list T_END T_WIDTH T_NL
239		;
240
241
242charmap_list	: charmap_list charmap_entry
243		| charmap_entry
244		;
245
246
247charmap_entry	: T_SYMBOL T_CHAR
248		{
249			add_charmap($1, $2);
250			scan_to_eol();
251		}
252		| T_SYMBOL T_ELLIPSIS T_SYMBOL T_CHAR
253		{
254			add_charmap_range($1, $3, $4);
255			scan_to_eol();
256		}
257		| T_NL
258		;
259
260width_list	: width_list width_entry
261		| width_entry
262		;
263
264width_entry	: T_CHAR T_NUMBER T_NL
265		{
266			add_width($1, $2);
267		}
268		| T_SYMBOL T_NUMBER T_NL
269		{
270			add_charmap_undefined($1);
271		}
272		| T_CHAR T_ELLIPSIS T_CHAR T_NUMBER T_NL
273		{
274			add_width_range($1, $3, $4);
275		}
276		| T_SYMBOL T_ELLIPSIS T_SYMBOL T_NUMBER T_NL
277		{
278			add_charmap_undefined($1);
279			add_charmap_undefined($3);
280		}
281		| T_CHAR T_ELLIPSIS T_SYMBOL T_NUMBER T_NL
282		{
283			add_width($1, $4);
284			add_charmap_undefined($3);
285		}
286		| T_SYMBOL T_ELLIPSIS T_CHAR T_NUMBER T_NL
287		{
288			add_width($3, $4);
289			add_charmap_undefined($1);
290		}
291		| T_NL
292		;
293
294ctype		: T_CTYPE T_NL ctype_list T_END T_CTYPE T_NL
295		{
296			dump_ctype();
297		}
298		| T_CTYPE T_NL copycat  T_END T_CTYPE T_NL
299		;
300
301ctype_list	: ctype_list ctype_kw
302		| ctype_kw
303		;
304
305ctype_kw	: T_ISUPPER cc_list T_NL
306		| T_ISLOWER cc_list T_NL
307		| T_ISALPHA cc_list T_NL
308		| T_ISDIGIT cc_list T_NL
309		| T_ISPUNCT cc_list T_NL
310		| T_ISXDIGIT cc_list T_NL
311		| T_ISSPACE cc_list T_NL
312		| T_ISPRINT cc_list T_NL
313		| T_ISGRAPH cc_list T_NL
314		| T_ISBLANK cc_list T_NL
315		| T_ISCNTRL cc_list T_NL
316		| T_ISALNUM cc_list T_NL
317		| T_ISSPECIAL cc_list T_NL
318		| T_ISENGLISH cc_list T_NL
319		| T_ISNUMBER cc_list T_NL
320		| T_ISIDEOGRAM cc_list T_NL
321		| T_ISPHONOGRAM cc_list T_NL
322		| T_TOUPPER conv_list T_NL
323		| T_TOLOWER conv_list T_NL
324		;
325
326cc_list		: cc_list T_SEMI cc_range_end
327		| cc_list T_SEMI cc_char
328		| cc_char
329		;
330
331cc_range_end	: T_ELLIPSIS T_SEMI T_CHAR
332		{
333			add_ctype_range($3);
334		}
335		;
336
337cc_char		: T_CHAR
338		{
339			add_ctype($1);
340		}
341		| T_SYMBOL
342		{
343			add_charmap_undefined($1);
344		}
345		;
346
347conv_list	: conv_list T_SEMI conv_pair
348		| conv_pair
349		;
350
351
352conv_pair	: T_LPAREN T_CHAR T_COMMA T_CHAR T_RPAREN
353		{
354			add_caseconv($2, $4);
355		}
356		| T_LPAREN T_SYMBOL T_COMMA T_CHAR T_RPAREN
357		{
358			add_charmap_undefined($2);
359		}
360		| T_LPAREN T_SYMBOL T_COMMA T_SYMBOL T_RPAREN
361		{
362			add_charmap_undefined($2);
363			add_charmap_undefined($4);
364		}
365		| T_LPAREN T_CHAR T_COMMA T_SYMBOL T_RPAREN
366		{
367			add_charmap_undefined($4);
368		}
369		;
370
371collate		: T_COLLATE T_NL coll_order T_END T_COLLATE T_NL
372		{
373			dump_collate();
374		}
375		| T_COLLATE T_NL coll_optional coll_order T_END T_COLLATE T_NL
376		{
377			dump_collate();
378		}
379		| T_COLLATE T_NL copycat T_END T_COLLATE T_NL
380		;
381
382
383coll_optional	: coll_optional coll_symbols
384		| coll_optional coll_elements
385		| coll_symbols
386		| coll_elements
387		;
388
389
390coll_symbols	: T_COLLATING_SYMBOL T_SYMBOL T_NL
391		{
392			define_collsym($2);
393		}
394		;
395
396
397coll_elements	: T_COLLATING_ELEMENT T_SYMBOL T_FROM string T_NL
398		{
399			define_collelem($2, get_wcs());
400		}
401		;
402
403coll_order	: T_ORDER_START T_NL order_list T_ORDER_END T_NL
404		{
405			/* If no order list supplied default to one forward */
406			add_order_bit(T_FORWARD);
407			add_order_directive();
408		}
409		| T_ORDER_START order_args T_NL order_list T_ORDER_END T_NL
410		;
411
412
413order_args	: order_args T_SEMI order_arg
414		{
415			add_order_directive();
416		}
417		| order_arg
418		{
419			add_order_directive();
420		}
421		;
422
423order_arg	: order_arg T_COMMA order_dir
424		| order_dir
425		;
426
427order_dir	: T_FORWARD
428		{
429			add_order_bit(T_FORWARD);
430		}
431		| T_BACKWARD
432		{
433			add_order_bit(T_BACKWARD);
434		}
435		| T_POSITION
436		{
437			add_order_bit(T_POSITION);
438		}
439		;
440
441order_list	: order_list order_item
442		| order_item
443		;
444
445order_item	: T_COLLSYM T_NL
446		{
447			end_order_collsym($1);
448		}
449		| order_itemkw T_NL
450		{
451			end_order();
452		}
453		| order_itemkw order_weights T_NL
454		{
455			end_order();
456		}
457		;
458
459order_itemkw	: T_CHAR
460		{
461			start_order_char($1);
462		}
463		| T_ELLIPSIS
464		{
465			start_order_ellipsis();
466		}
467		| T_COLLELEM
468		{
469			start_order_collelem($1);
470		}
471		| T_UNDEFINED
472		{
473			start_order_undefined();
474		}
475		| T_SYMBOL
476		{
477			start_order_symbol($1);
478		}
479		;
480
481order_weights	: order_weights T_SEMI order_weight
482		| order_weights T_SEMI
483		| order_weight
484		;
485
486order_weight	: T_COLLELEM
487		{
488			add_order_collelem($1);
489		}
490		| T_COLLSYM
491		{
492			add_order_collsym($1);
493		}
494		| T_CHAR
495		{
496			add_order_char($1);
497		}
498		| T_ELLIPSIS
499		{
500			add_order_ellipsis();
501		}
502		| T_IGNORE
503		{
504			add_order_ignore();
505		}
506		| T_SYMBOL
507		{
508			add_order_symbol($1);
509		}
510		| T_QUOTE order_str T_QUOTE
511		{
512			add_order_subst();
513		}
514		;
515
516order_str	: order_str order_stritem
517		| order_stritem
518		;
519
520order_stritem	: T_CHAR
521		{
522			add_subst_char($1);
523		}
524		| T_COLLSYM
525		{
526			add_subst_collsym($1);
527		}
528		| T_COLLELEM
529		{
530			add_subst_collelem($1);
531		}
532		| T_SYMBOL
533		{
534			add_subst_symbol($1);
535		}
536		;
537
538messages	: T_MESSAGES T_NL messages_list T_END T_MESSAGES T_NL
539		{
540			dump_messages();
541		}
542		| T_MESSAGES T_NL copycat T_END T_MESSAGES T_NL
543		;
544
545messages_list	: messages_list messages_item
546		| messages_item
547		;
548
549messages_kw	: T_YESSTR
550		| T_NOSTR
551		| T_YESEXPR
552		| T_NOEXPR
553		;
554
555messages_item	: messages_kw string T_NL
556		{
557			add_message(get_wcs());
558		}
559		;
560
561monetary	: T_MONETARY T_NL monetary_list T_END T_MONETARY T_NL
562		{
563			dump_monetary();
564		}
565		| T_MONETARY T_NL copycat T_END T_MONETARY T_NL
566		;
567
568monetary_list	: monetary_list monetary_kw
569		| monetary_kw
570		;
571
572monetary_strkw	: T_INT_CURR_SYMBOL
573		| T_CURRENCY_SYMBOL
574		| T_MON_DECIMAL_POINT
575		| T_MON_THOUSANDS_SEP
576		| T_POSITIVE_SIGN
577		| T_NEGATIVE_SIGN
578		;
579
580monetary_numkw	: T_INT_FRAC_DIGITS
581		| T_FRAC_DIGITS
582		| T_P_CS_PRECEDES
583		| T_P_SEP_BY_SPACE
584		| T_N_CS_PRECEDES
585		| T_N_SEP_BY_SPACE
586		| T_P_SIGN_POSN
587		| T_N_SIGN_POSN
588		| T_INT_P_CS_PRECEDES
589		| T_INT_N_CS_PRECEDES
590		| T_INT_P_SEP_BY_SPACE
591		| T_INT_N_SEP_BY_SPACE
592		| T_INT_P_SIGN_POSN
593		| T_INT_N_SIGN_POSN
594		;
595
596monetary_kw	: monetary_strkw string T_NL
597		{
598			add_monetary_str(get_wcs());
599		}
600		| monetary_numkw T_NUMBER T_NL
601		{
602			add_monetary_num($2);
603		}
604		| T_MON_GROUPING mon_group_list T_NL
605		;
606
607mon_group_list	: T_NUMBER
608		{
609			reset_monetary_group();
610			add_monetary_group($1);
611		}
612		| mon_group_list T_SEMI T_NUMBER
613		{
614			add_monetary_group($3);
615		}
616		;
617
618
619numeric		: T_NUMERIC T_NL numeric_list T_END T_NUMERIC T_NL
620		{
621			dump_numeric();
622		}
623		| T_NUMERIC T_NL copycat T_END T_NUMERIC T_NL
624		;
625
626
627numeric_list	: numeric_list numeric_item
628		| numeric_item
629		;
630
631
632numeric_item	: numeric_strkw string T_NL
633		{
634			add_numeric_str(get_wcs());
635		}
636		| T_GROUPING group_list T_NL
637		;
638
639numeric_strkw	: T_DECIMAL_POINT
640		| T_THOUSANDS_SEP
641		;
642
643
644group_list	: T_NUMBER
645		{
646			reset_numeric_group();
647			add_numeric_group($1);
648		}
649		| group_list T_SEMI T_NUMBER
650		{
651			add_numeric_group($3);
652		}
653		;
654
655
656time		: T_TIME T_NL time_kwlist T_END T_TIME T_NL
657		{
658			dump_time();
659		}
660		| T_TIME T_NL copycat T_END T_NUMERIC T_NL
661		;
662
663time_kwlist	: time_kwlist time_kw
664		| time_kw
665		;
666
667time_kw		: time_strkw string T_NL
668		{
669			add_time_str(get_wcs());
670		}
671		| time_listkw time_list T_NL
672		{
673			check_time_list();
674		}
675		;
676
677time_listkw	: T_ABDAY
678		| T_DAY
679		| T_ABMON
680		| T_MON
681		| T_ERA
682		| T_ALT_DIGITS
683		| T_AM_PM
684		;
685
686time_strkw	: T_ERA_D_T_FMT
687		| T_ERA_T_FMT
688		| T_ERA_D_FMT
689		| T_D_T_FMT
690		| T_D_FMT
691		| T_T_FMT
692		| T_T_FMT_AMPM
693		| T_DATE_FMT
694		;
695
696time_list	: time_list T_SEMI string
697		{
698			add_time_list(get_wcs());
699		}
700		| string
701		{
702			reset_time_list();
703			add_time_list(get_wcs());
704		}
705		;
706