1/* Character handling in C locale. 2 3 These functions work like the corresponding functions in <ctype.h>, 4 except that they have the C (POSIX) locale hardwired, whereas the 5 <ctype.h> functions' behaviour depends on the current locale set via 6 setlocale. 7 8 Copyright (C) 2000, 2001, 2002, 2003, 2006, 2008, 2009 Free Software 9 Foundation, Inc. 10 11This program is free software; you can redistribute it and/or modify 12it under the terms of the GNU General Public License as published by 13the Free Software Foundation; either version 3 of the License, or 14(at your option) any later version. 15 16This program is distributed in the hope that it will be useful, 17but WITHOUT ANY WARRANTY; without even the implied warranty of 18MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 19GNU General Public License for more details. 20 21You should have received a copy of the GNU General Public License 22along with this program; if not, write to the Free Software Foundation, 23Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ 24 25#ifndef C_CTYPE_H 26#define C_CTYPE_H 27 28#include <stdbool.h> 29 30 31#ifdef __cplusplus 32extern "C" { 33#endif 34 35 36/* The functions defined in this file assume the "C" locale and a character 37 set without diacritics (ASCII-US or EBCDIC-US or something like that). 38 Even if the "C" locale on a particular system is an extension of the ASCII 39 character set (like on BeOS, where it is UTF-8, or on AmigaOS, where it 40 is ISO-8859-1), the functions in this file recognize only the ASCII 41 characters. */ 42 43 44/* Check whether the ASCII optimizations apply. */ 45 46/* ANSI C89 (and ISO C99 5.2.1.3 too) already guarantees that 47 '0', '1', ..., '9' have consecutive integer values. */ 48#define C_CTYPE_CONSECUTIVE_DIGITS 1 49 50#if ('A' <= 'Z') \ 51 && ('A' + 1 == 'B') && ('B' + 1 == 'C') && ('C' + 1 == 'D') \ 52 && ('D' + 1 == 'E') && ('E' + 1 == 'F') && ('F' + 1 == 'G') \ 53 && ('G' + 1 == 'H') && ('H' + 1 == 'I') && ('I' + 1 == 'J') \ 54 && ('J' + 1 == 'K') && ('K' + 1 == 'L') && ('L' + 1 == 'M') \ 55 && ('M' + 1 == 'N') && ('N' + 1 == 'O') && ('O' + 1 == 'P') \ 56 && ('P' + 1 == 'Q') && ('Q' + 1 == 'R') && ('R' + 1 == 'S') \ 57 && ('S' + 1 == 'T') && ('T' + 1 == 'U') && ('U' + 1 == 'V') \ 58 && ('V' + 1 == 'W') && ('W' + 1 == 'X') && ('X' + 1 == 'Y') \ 59 && ('Y' + 1 == 'Z') 60#define C_CTYPE_CONSECUTIVE_UPPERCASE 1 61#endif 62 63#if ('a' <= 'z') \ 64 && ('a' + 1 == 'b') && ('b' + 1 == 'c') && ('c' + 1 == 'd') \ 65 && ('d' + 1 == 'e') && ('e' + 1 == 'f') && ('f' + 1 == 'g') \ 66 && ('g' + 1 == 'h') && ('h' + 1 == 'i') && ('i' + 1 == 'j') \ 67 && ('j' + 1 == 'k') && ('k' + 1 == 'l') && ('l' + 1 == 'm') \ 68 && ('m' + 1 == 'n') && ('n' + 1 == 'o') && ('o' + 1 == 'p') \ 69 && ('p' + 1 == 'q') && ('q' + 1 == 'r') && ('r' + 1 == 's') \ 70 && ('s' + 1 == 't') && ('t' + 1 == 'u') && ('u' + 1 == 'v') \ 71 && ('v' + 1 == 'w') && ('w' + 1 == 'x') && ('x' + 1 == 'y') \ 72 && ('y' + 1 == 'z') 73#define C_CTYPE_CONSECUTIVE_LOWERCASE 1 74#endif 75 76#if (' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \ 77 && ('%' == 37) && ('&' == 38) && ('\'' == 39) && ('(' == 40) \ 78 && (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44) \ 79 && ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48) \ 80 && ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52) \ 81 && ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) \ 82 && ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) \ 83 && ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65) \ 84 && ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69) \ 85 && ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73) \ 86 && ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) \ 87 && ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) \ 88 && ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85) \ 89 && ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89) \ 90 && ('Z' == 90) && ('[' == 91) && ('\\' == 92) && (']' == 93) \ 91 && ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98) \ 92 && ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \ 93 && ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \ 94 && ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \ 95 && ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \ 96 && ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \ 97 && ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \ 98 && ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126) 99/* The character set is ASCII or one of its variants or extensions, not EBCDIC. 100 Testing the value of '\n' and '\r' is not relevant. */ 101#define C_CTYPE_ASCII 1 102#endif 103 104 105/* Function declarations. */ 106 107/* Unlike the functions in <ctype.h>, which require an argument in the range 108 of the 'unsigned char' type, the functions here operate on values that are 109 in the 'unsigned char' range or in the 'char' range. In other words, 110 when you have a 'char' value, you need to cast it before using it as 111 argument to a <ctype.h> function: 112 113 const char *s = ...; 114 if (isalpha ((unsigned char) *s)) ... 115 116 but you don't need to cast it for the functions defined in this file: 117 118 const char *s = ...; 119 if (c_isalpha (*s)) ... 120 */ 121 122extern bool c_isascii (int c); /* not locale dependent */ 123 124extern bool c_isalnum (int c); 125extern bool c_isalpha (int c); 126extern bool c_isblank (int c); 127extern bool c_iscntrl (int c); 128extern bool c_isdigit (int c); 129extern bool c_islower (int c); 130extern bool c_isgraph (int c); 131extern bool c_isprint (int c); 132extern bool c_ispunct (int c); 133extern bool c_isspace (int c); 134extern bool c_isupper (int c); 135extern bool c_isxdigit (int c); 136 137extern int c_tolower (int c); 138extern int c_toupper (int c); 139 140 141#if defined __GNUC__ && defined __OPTIMIZE__ && !defined __OPTIMIZE_SIZE__ && !defined NO_C_CTYPE_MACROS 142 143/* ASCII optimizations. */ 144 145#undef c_isascii 146#define c_isascii(c) \ 147 ({ int __c = (c); \ 148 (__c >= 0x00 && __c <= 0x7f); \ 149 }) 150 151#if C_CTYPE_CONSECUTIVE_DIGITS \ 152 && C_CTYPE_CONSECUTIVE_UPPERCASE && C_CTYPE_CONSECUTIVE_LOWERCASE 153#if C_CTYPE_ASCII 154#undef c_isalnum 155#define c_isalnum(c) \ 156 ({ int __c = (c); \ 157 ((__c >= '0' && __c <= '9') \ 158 || ((__c & ~0x20) >= 'A' && (__c & ~0x20) <= 'Z')); \ 159 }) 160#else 161#undef c_isalnum 162#define c_isalnum(c) \ 163 ({ int __c = (c); \ 164 ((__c >= '0' && __c <= '9') \ 165 || (__c >= 'A' && __c <= 'Z') \ 166 || (__c >= 'a' && __c <= 'z')); \ 167 }) 168#endif 169#endif 170 171#if C_CTYPE_CONSECUTIVE_UPPERCASE && C_CTYPE_CONSECUTIVE_LOWERCASE 172#if C_CTYPE_ASCII 173#undef c_isalpha 174#define c_isalpha(c) \ 175 ({ int __c = (c); \ 176 ((__c & ~0x20) >= 'A' && (__c & ~0x20) <= 'Z'); \ 177 }) 178#else 179#undef c_isalpha 180#define c_isalpha(c) \ 181 ({ int __c = (c); \ 182 ((__c >= 'A' && __c <= 'Z') || (__c >= 'a' && __c <= 'z')); \ 183 }) 184#endif 185#endif 186 187#undef c_isblank 188#define c_isblank(c) \ 189 ({ int __c = (c); \ 190 (__c == ' ' || __c == '\t'); \ 191 }) 192 193#if C_CTYPE_ASCII 194#undef c_iscntrl 195#define c_iscntrl(c) \ 196 ({ int __c = (c); \ 197 ((__c & ~0x1f) == 0 || __c == 0x7f); \ 198 }) 199#endif 200 201#if C_CTYPE_CONSECUTIVE_DIGITS 202#undef c_isdigit 203#define c_isdigit(c) \ 204 ({ int __c = (c); \ 205 (__c >= '0' && __c <= '9'); \ 206 }) 207#endif 208 209#if C_CTYPE_CONSECUTIVE_LOWERCASE 210#undef c_islower 211#define c_islower(c) \ 212 ({ int __c = (c); \ 213 (__c >= 'a' && __c <= 'z'); \ 214 }) 215#endif 216 217#if C_CTYPE_ASCII 218#undef c_isgraph 219#define c_isgraph(c) \ 220 ({ int __c = (c); \ 221 (__c >= '!' && __c <= '~'); \ 222 }) 223#endif 224 225#if C_CTYPE_ASCII 226#undef c_isprint 227#define c_isprint(c) \ 228 ({ int __c = (c); \ 229 (__c >= ' ' && __c <= '~'); \ 230 }) 231#endif 232 233#if C_CTYPE_ASCII 234#undef c_ispunct 235#define c_ispunct(c) \ 236 ({ int _c = (c); \ 237 (c_isgraph (_c) && ! c_isalnum (_c)); \ 238 }) 239#endif 240 241#undef c_isspace 242#define c_isspace(c) \ 243 ({ int __c = (c); \ 244 (__c == ' ' || __c == '\t' \ 245 || __c == '\n' || __c == '\v' || __c == '\f' || __c == '\r'); \ 246 }) 247 248#if C_CTYPE_CONSECUTIVE_UPPERCASE 249#undef c_isupper 250#define c_isupper(c) \ 251 ({ int __c = (c); \ 252 (__c >= 'A' && __c <= 'Z'); \ 253 }) 254#endif 255 256#if C_CTYPE_CONSECUTIVE_DIGITS \ 257 && C_CTYPE_CONSECUTIVE_UPPERCASE && C_CTYPE_CONSECUTIVE_LOWERCASE 258#if C_CTYPE_ASCII 259#undef c_isxdigit 260#define c_isxdigit(c) \ 261 ({ int __c = (c); \ 262 ((__c >= '0' && __c <= '9') \ 263 || ((__c & ~0x20) >= 'A' && (__c & ~0x20) <= 'F')); \ 264 }) 265#else 266#undef c_isxdigit 267#define c_isxdigit(c) \ 268 ({ int __c = (c); \ 269 ((__c >= '0' && __c <= '9') \ 270 || (__c >= 'A' && __c <= 'F') \ 271 || (__c >= 'a' && __c <= 'f')); \ 272 }) 273#endif 274#endif 275 276#if C_CTYPE_CONSECUTIVE_UPPERCASE && C_CTYPE_CONSECUTIVE_LOWERCASE 277#undef c_tolower 278#define c_tolower(c) \ 279 ({ int __c = (c); \ 280 (__c >= 'A' && __c <= 'Z' ? __c - 'A' + 'a' : __c); \ 281 }) 282#undef c_toupper 283#define c_toupper(c) \ 284 ({ int __c = (c); \ 285 (__c >= 'a' && __c <= 'z' ? __c - 'a' + 'A' : __c); \ 286 }) 287#endif 288 289#endif /* optimizing for speed */ 290 291 292#ifdef __cplusplus 293} 294#endif 295 296#endif /* C_CTYPE_H */ 297