1/* $NetBSD: lsym_word.c,v 1.7 2023/06/17 22:09:24 rillig Exp $ */
2
3/*
4 * Tests for the token lsym_word, which represents a constant, a string
5 * literal or an identifier.
6 *
7 * See also:
8 *	lsym_funcname.c		for an identifier followed by '('
9 */
10
11// TODO: Is '"string"(' syntactically valid in any context?
12// TODO: Is '123(' syntactically valid in any context?
13// TODO: Would the output of the above depend on -pcs/-npcs?
14// TODO: Add more systematic tests.
15// TODO: Completely cover each state transition in lex_number_state.
16
17// TODO: Consider splitting this token into lsym_name and lsym_value, to
18// TODO: make it easier to skip tokens during lookahead, for example since
19// TODO: L"" is not an identifier but a string literal.
20
21//indent input
22// TODO: add input
23//indent end
24
25//indent run-equals-input
26
27
28/*
29 * Since 2019-04-04 and before NetBSD lexi.c 1.149 from 2021-11-20, the first
30 * character after a backslash continuation was always considered part of a
31 * word, no matter whether it was a word character or not.
32 */
33//indent input
34int var\
35+name = 4;
36//indent end
37
38//indent run
39int		var + name = 4;
40//indent end
41
42
43//indent input
44wchar_t wide_string[] = L"wide string";
45//indent end
46
47/*
48 * Regardless of the line length, the 'L' must never be separated from the
49 * string literal.  Before lexi.c 1.167 from 2021-11-28, the 'L' was a
50 * separate token, which could have resulted in accidental spacing between the
51 * 'L' and the following "".
52 */
53//indent run-equals-input -di0
54
55//indent run-equals-input -di0 -l25
56
57//indent run-equals-input -di0 -l1
58
59
60//indent input
61wchar_t wide_char[] = L'w';
62//indent end
63
64//indent run-equals-input -di0
65
66
67/* Binary number literals, a GCC extension that was added in C11. */
68//indent input
69#define b00101010 -1
70void t(void) {
71	unsigned a[] = {0b00101010, 0x00005678, 02, 17U};
72	float x[] = {.7f, 0.7f};
73	unsigned long ul[] = {0b00001111UL, 0x01010101UL, 02UL, 17UL};
74
75	if (0 b00101010)
76		return;
77	/* $ '0r' is not a number base prefix, so the tokens are split. */
78	if (0r12345)
79		return;
80}
81//indent end
82
83//indent run
84#define b00101010 -1
85void
86t(void)
87{
88	unsigned	a[] = {0b00101010, 0x00005678, 02, 17U};
89	float		x[] = {.7f, 0.7f};
90	unsigned long	ul[] = {0b00001111UL, 0x01010101UL, 02UL, 17UL};
91
92	if (0 b00101010)
93		return;
94	if (0 r12345)
95		return;
96}
97//indent end
98
99
100/* Floating point numbers. */
101//indent input
102void t(void) {
103	unsigned long x = 314UL;
104	double y[] = {0x1P+9F, 0.3, .1, 1.2f, 0xa.p01f, 3.14f, 2.L};
105	int z = 0b0101;
106	DO_NOTHING;
107	x._y = 5;
108}
109//indent end
110
111//indent run
112void
113t(void)
114{
115	unsigned long	x = 314UL;
116	double		y[] = {0x1P+9F, 0.3, .1, 1.2f, 0xa.p01f, 3.14f, 2.L};
117	int		z = 0b0101;
118	DO_NOTHING;
119	x._y = 5;
120}
121//indent end
122
123
124/*
125 * Test identifiers containing '$', which some compilers support as an
126 * extension to the C standard.
127 */
128//indent input
129int $		= jQuery;			// just kidding
130const char SYS$LOGIN[]="$HOME";
131//indent end
132
133//indent run
134int		$ = jQuery;	// just kidding
135const char	SYS$LOGIN[] = "$HOME";
136//indent end
137
138
139/*
140 * Test the tokenizer for number constants.
141 *
142 * When the tokenizer reads a character that makes a token invalid (such as
143 * '0x') but may later be extended to form a valid token (such as '0x123'),
144 * indent does not care about this invalid prefix and returns it nevertheless.
145 */
146//indent input
147int unfinished_hex_prefix = 0x;
148double unfinished_hex_float = 0x123p;
149//indent end
150
151//indent run-equals-input -di0
152