1/* $NetBSD: lsym_word.c,v 1.7 2023/06/17 22:09:24 rillig Exp $ */ 2 3/* 4 * Tests for the token lsym_word, which represents a constant, a string 5 * literal or an identifier. 6 * 7 * See also: 8 * lsym_funcname.c for an identifier followed by '(' 9 */ 10 11// TODO: Is '"string"(' syntactically valid in any context? 12// TODO: Is '123(' syntactically valid in any context? 13// TODO: Would the output of the above depend on -pcs/-npcs? 14// TODO: Add more systematic tests. 15// TODO: Completely cover each state transition in lex_number_state. 16 17// TODO: Consider splitting this token into lsym_name and lsym_value, to 18// TODO: make it easier to skip tokens during lookahead, for example since 19// TODO: L"" is not an identifier but a string literal. 20 21//indent input 22// TODO: add input 23//indent end 24 25//indent run-equals-input 26 27 28/* 29 * Since 2019-04-04 and before NetBSD lexi.c 1.149 from 2021-11-20, the first 30 * character after a backslash continuation was always considered part of a 31 * word, no matter whether it was a word character or not. 32 */ 33//indent input 34int var\ 35+name = 4; 36//indent end 37 38//indent run 39int var + name = 4; 40//indent end 41 42 43//indent input 44wchar_t wide_string[] = L"wide string"; 45//indent end 46 47/* 48 * Regardless of the line length, the 'L' must never be separated from the 49 * string literal. Before lexi.c 1.167 from 2021-11-28, the 'L' was a 50 * separate token, which could have resulted in accidental spacing between the 51 * 'L' and the following "". 52 */ 53//indent run-equals-input -di0 54 55//indent run-equals-input -di0 -l25 56 57//indent run-equals-input -di0 -l1 58 59 60//indent input 61wchar_t wide_char[] = L'w'; 62//indent end 63 64//indent run-equals-input -di0 65 66 67/* Binary number literals, a GCC extension that was added in C11. */ 68//indent input 69#define b00101010 -1 70void t(void) { 71 unsigned a[] = {0b00101010, 0x00005678, 02, 17U}; 72 float x[] = {.7f, 0.7f}; 73 unsigned long ul[] = {0b00001111UL, 0x01010101UL, 02UL, 17UL}; 74 75 if (0 b00101010) 76 return; 77 /* $ '0r' is not a number base prefix, so the tokens are split. */ 78 if (0r12345) 79 return; 80} 81//indent end 82 83//indent run 84#define b00101010 -1 85void 86t(void) 87{ 88 unsigned a[] = {0b00101010, 0x00005678, 02, 17U}; 89 float x[] = {.7f, 0.7f}; 90 unsigned long ul[] = {0b00001111UL, 0x01010101UL, 02UL, 17UL}; 91 92 if (0 b00101010) 93 return; 94 if (0 r12345) 95 return; 96} 97//indent end 98 99 100/* Floating point numbers. */ 101//indent input 102void t(void) { 103 unsigned long x = 314UL; 104 double y[] = {0x1P+9F, 0.3, .1, 1.2f, 0xa.p01f, 3.14f, 2.L}; 105 int z = 0b0101; 106 DO_NOTHING; 107 x._y = 5; 108} 109//indent end 110 111//indent run 112void 113t(void) 114{ 115 unsigned long x = 314UL; 116 double y[] = {0x1P+9F, 0.3, .1, 1.2f, 0xa.p01f, 3.14f, 2.L}; 117 int z = 0b0101; 118 DO_NOTHING; 119 x._y = 5; 120} 121//indent end 122 123 124/* 125 * Test identifiers containing '$', which some compilers support as an 126 * extension to the C standard. 127 */ 128//indent input 129int $ = jQuery; // just kidding 130const char SYS$LOGIN[]="$HOME"; 131//indent end 132 133//indent run 134int $ = jQuery; // just kidding 135const char SYS$LOGIN[] = "$HOME"; 136//indent end 137 138 139/* 140 * Test the tokenizer for number constants. 141 * 142 * When the tokenizer reads a character that makes a token invalid (such as 143 * '0x') but may later be extended to form a valid token (such as '0x123'), 144 * indent does not care about this invalid prefix and returns it nevertheless. 145 */ 146//indent input 147int unfinished_hex_prefix = 0x; 148double unfinished_hex_float = 0x123p; 149//indent end 150 151//indent run-equals-input -di0 152