1169695Skan/* <ctype.h> replacement macros. 2169695Skan 3169695Skan Copyright (C) 2000, 2001, 2002, 2003, 2004, 4169695Skan 2005 Free Software Foundation, Inc. 5169695Skan Contributed by Zack Weinberg <zackw@stanford.edu>. 6169695Skan 7169695SkanThis file is part of the libiberty library. 8169695SkanLibiberty is free software; you can redistribute it and/or 9169695Skanmodify it under the terms of the GNU Library General Public 10169695SkanLicense as published by the Free Software Foundation; either 11169695Skanversion 2 of the License, or (at your option) any later version. 12169695Skan 13169695SkanLibiberty is distributed in the hope that it will be useful, 14169695Skanbut WITHOUT ANY WARRANTY; without even the implied warranty of 15169695SkanMERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16169695SkanLibrary General Public License for more details. 17169695Skan 18169695SkanYou should have received a copy of the GNU Library General Public 19169695SkanLicense along with libiberty; see the file COPYING.LIB. If 20169695Skannot, write to the Free Software Foundation, Inc., 51 Franklin Street - Fifth Floor, 21169695SkanBoston, MA 02110-1301, USA. */ 22169695Skan 23169695Skan/* 24169695Skan 25169695Skan@defvr Extension HOST_CHARSET 26169695SkanThis macro indicates the basic character set and encoding used by the 27169695Skanhost: more precisely, the encoding used for character constants in 28169695Skanpreprocessor @samp{#if} statements (the C "execution character set"). 29169695SkanIt is defined by @file{safe-ctype.h}, and will be an integer constant 30169695Skanwith one of the following values: 31169695Skan 32169695Skan@ftable @code 33169695Skan@item HOST_CHARSET_UNKNOWN 34169695SkanThe host character set is unknown - that is, not one of the next two 35169695Skanpossibilities. 36169695Skan 37169695Skan@item HOST_CHARSET_ASCII 38169695SkanThe host character set is ASCII. 39169695Skan 40169695Skan@item HOST_CHARSET_EBCDIC 41169695SkanThe host character set is some variant of EBCDIC. (Only one of the 42169695Skannineteen EBCDIC varying characters is tested; exercise caution.) 43169695Skan@end ftable 44169695Skan@end defvr 45169695Skan 46169695Skan@deffn Extension ISALPHA (@var{c}) 47169695Skan@deffnx Extension ISALNUM (@var{c}) 48169695Skan@deffnx Extension ISBLANK (@var{c}) 49169695Skan@deffnx Extension ISCNTRL (@var{c}) 50169695Skan@deffnx Extension ISDIGIT (@var{c}) 51169695Skan@deffnx Extension ISGRAPH (@var{c}) 52169695Skan@deffnx Extension ISLOWER (@var{c}) 53169695Skan@deffnx Extension ISPRINT (@var{c}) 54169695Skan@deffnx Extension ISPUNCT (@var{c}) 55169695Skan@deffnx Extension ISSPACE (@var{c}) 56169695Skan@deffnx Extension ISUPPER (@var{c}) 57169695Skan@deffnx Extension ISXDIGIT (@var{c}) 58169695Skan 59169695SkanThese twelve macros are defined by @file{safe-ctype.h}. Each has the 60169695Skansame meaning as the corresponding macro (with name in lowercase) 61169695Skandefined by the standard header @file{ctype.h}. For example, 62169695Skan@code{ISALPHA} returns true for alphabetic characters and false for 63169695Skanothers. However, there are two differences between these macros and 64169695Skanthose provided by @file{ctype.h}: 65169695Skan 66169695Skan@itemize @bullet 67169695Skan@item These macros are guaranteed to have well-defined behavior for all 68169695Skanvalues representable by @code{signed char} and @code{unsigned char}, and 69169695Skanfor @code{EOF}. 70169695Skan 71169695Skan@item These macros ignore the current locale; they are true for these 72169695Skanfixed sets of characters: 73169695Skan@multitable {@code{XDIGIT}} {yada yada yada yada yada yada yada yada} 74169695Skan@item @code{ALPHA} @tab @kbd{A-Za-z} 75169695Skan@item @code{ALNUM} @tab @kbd{A-Za-z0-9} 76169695Skan@item @code{BLANK} @tab @kbd{space tab} 77169695Skan@item @code{CNTRL} @tab @code{!PRINT} 78169695Skan@item @code{DIGIT} @tab @kbd{0-9} 79169695Skan@item @code{GRAPH} @tab @code{ALNUM || PUNCT} 80169695Skan@item @code{LOWER} @tab @kbd{a-z} 81169695Skan@item @code{PRINT} @tab @code{GRAPH ||} @kbd{space} 82169695Skan@item @code{PUNCT} @tab @kbd{`~!@@#$%^&*()_-=+[@{]@}\|;:'",<.>/?} 83169695Skan@item @code{SPACE} @tab @kbd{space tab \n \r \f \v} 84169695Skan@item @code{UPPER} @tab @kbd{A-Z} 85169695Skan@item @code{XDIGIT} @tab @kbd{0-9A-Fa-f} 86169695Skan@end multitable 87169695Skan 88169695SkanNote that, if the host character set is ASCII or a superset thereof, 89169695Skanall these macros will return false for all values of @code{char} outside 90169695Skanthe range of 7-bit ASCII. In particular, both ISPRINT and ISCNTRL return 91169695Skanfalse for characters with numeric values from 128 to 255. 92169695Skan@end itemize 93169695Skan@end deffn 94169695Skan 95169695Skan@deffn Extension ISIDNUM (@var{c}) 96169695Skan@deffnx Extension ISIDST (@var{c}) 97169695Skan@deffnx Extension IS_VSPACE (@var{c}) 98169695Skan@deffnx Extension IS_NVSPACE (@var{c}) 99169695Skan@deffnx Extension IS_SPACE_OR_NUL (@var{c}) 100169695Skan@deffnx Extension IS_ISOBASIC (@var{c}) 101169695SkanThese six macros are defined by @file{safe-ctype.h} and provide 102169695Skanadditional character classes which are useful when doing lexical 103169695Skananalysis of C or similar languages. They are true for the following 104169695Skansets of characters: 105169695Skan 106169695Skan@multitable {@code{SPACE_OR_NUL}} {yada yada yada yada yada yada yada yada} 107169695Skan@item @code{IDNUM} @tab @kbd{A-Za-z0-9_} 108169695Skan@item @code{IDST} @tab @kbd{A-Za-z_} 109169695Skan@item @code{VSPACE} @tab @kbd{\r \n} 110169695Skan@item @code{NVSPACE} @tab @kbd{space tab \f \v \0} 111169695Skan@item @code{SPACE_OR_NUL} @tab @code{VSPACE || NVSPACE} 112169695Skan@item @code{ISOBASIC} @tab @code{VSPACE || NVSPACE || PRINT} 113169695Skan@end multitable 114169695Skan@end deffn 115169695Skan 116169695Skan*/ 117169695Skan 118169695Skan#include "ansidecl.h" 119169695Skan#include <safe-ctype.h> 120169695Skan#include <stdio.h> /* for EOF */ 121169695Skan 122169695Skan#if EOF != -1 123169695Skan #error "<safe-ctype.h> requires EOF == -1" 124169695Skan#endif 125169695Skan 126169695Skan/* Shorthand */ 127169695Skan#define bl _sch_isblank 128169695Skan#define cn _sch_iscntrl 129169695Skan#define di _sch_isdigit 130169695Skan#define is _sch_isidst 131169695Skan#define lo _sch_islower 132169695Skan#define nv _sch_isnvsp 133169695Skan#define pn _sch_ispunct 134169695Skan#define pr _sch_isprint 135169695Skan#define sp _sch_isspace 136169695Skan#define up _sch_isupper 137169695Skan#define vs _sch_isvsp 138169695Skan#define xd _sch_isxdigit 139169695Skan 140169695Skan/* Masks. */ 141169695Skan#define L (const unsigned short) (lo|is |pr) /* lower case letter */ 142169695Skan#define XL (const unsigned short) (lo|is|xd|pr) /* lowercase hex digit */ 143169695Skan#define U (const unsigned short) (up|is |pr) /* upper case letter */ 144169695Skan#define XU (const unsigned short) (up|is|xd|pr) /* uppercase hex digit */ 145169695Skan#define D (const unsigned short) (di |xd|pr) /* decimal digit */ 146169695Skan#define P (const unsigned short) (pn |pr) /* punctuation */ 147169695Skan#define _ (const unsigned short) (pn|is |pr) /* underscore */ 148169695Skan 149169695Skan#define C (const unsigned short) ( cn) /* control character */ 150169695Skan#define Z (const unsigned short) (nv |cn) /* NUL */ 151169695Skan#define M (const unsigned short) (nv|sp |cn) /* cursor movement: \f \v */ 152169695Skan#define V (const unsigned short) (vs|sp |cn) /* vertical space: \r \n */ 153169695Skan#define T (const unsigned short) (nv|sp|bl|cn) /* tab */ 154169695Skan#define S (const unsigned short) (nv|sp|bl|pr) /* space */ 155169695Skan 156169695Skan/* Are we ASCII? */ 157169695Skan#if HOST_CHARSET == HOST_CHARSET_ASCII 158169695Skan 159169695Skanconst unsigned short _sch_istable[256] = 160169695Skan{ 161169695Skan Z, C, C, C, C, C, C, C, /* NUL SOH STX ETX EOT ENQ ACK BEL */ 162169695Skan C, T, V, M, M, V, C, C, /* BS HT LF VT FF CR SO SI */ 163169695Skan C, C, C, C, C, C, C, C, /* DLE DC1 DC2 DC3 DC4 NAK SYN ETB */ 164169695Skan C, C, C, C, C, C, C, C, /* CAN EM SUB ESC FS GS RS US */ 165169695Skan S, P, P, P, P, P, P, P, /* SP ! " # $ % & ' */ 166169695Skan P, P, P, P, P, P, P, P, /* ( ) * + , - . / */ 167169695Skan D, D, D, D, D, D, D, D, /* 0 1 2 3 4 5 6 7 */ 168169695Skan D, D, P, P, P, P, P, P, /* 8 9 : ; < = > ? */ 169169695Skan P, XU, XU, XU, XU, XU, XU, U, /* @ A B C D E F G */ 170169695Skan U, U, U, U, U, U, U, U, /* H I J K L M N O */ 171169695Skan U, U, U, U, U, U, U, U, /* P Q R S T U V W */ 172169695Skan U, U, U, P, P, P, P, _, /* X Y Z [ \ ] ^ _ */ 173169695Skan P, XL, XL, XL, XL, XL, XL, L, /* ` a b c d e f g */ 174169695Skan L, L, L, L, L, L, L, L, /* h i j k l m n o */ 175169695Skan L, L, L, L, L, L, L, L, /* p q r s t u v w */ 176169695Skan L, L, L, P, P, P, P, C, /* x y z { | } ~ DEL */ 177169695Skan 178169695Skan /* high half of unsigned char is locale-specific, so all tests are 179169695Skan false in "C" locale */ 180169695Skan 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 181169695Skan 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 182169695Skan 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 183169695Skan 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 184169695Skan 185169695Skan 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 186169695Skan 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 187169695Skan 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 188169695Skan 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 189169695Skan}; 190169695Skan 191169695Skanconst unsigned char _sch_tolower[256] = 192169695Skan{ 193169695Skan 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 194169695Skan 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 195169695Skan 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 196169695Skan 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 197169695Skan 64, 198169695Skan 199169695Skan 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 200169695Skan 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 201169695Skan 202169695Skan 91, 92, 93, 94, 95, 96, 203169695Skan 204169695Skan 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 205169695Skan 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 206169695Skan 207169695Skan 123,124,125,126,127, 208169695Skan 209169695Skan 128,129,130,131, 132,133,134,135, 136,137,138,139, 140,141,142,143, 210169695Skan 144,145,146,147, 148,149,150,151, 152,153,154,155, 156,157,158,159, 211169695Skan 160,161,162,163, 164,165,166,167, 168,169,170,171, 172,173,174,175, 212169695Skan 176,177,178,179, 180,181,182,183, 184,185,186,187, 188,189,190,191, 213169695Skan 214169695Skan 192,193,194,195, 196,197,198,199, 200,201,202,203, 204,205,206,207, 215169695Skan 208,209,210,211, 212,213,214,215, 216,217,218,219, 220,221,222,223, 216169695Skan 224,225,226,227, 228,229,230,231, 232,233,234,235, 236,237,238,239, 217169695Skan 240,241,242,243, 244,245,246,247, 248,249,250,251, 252,253,254,255, 218169695Skan}; 219169695Skan 220169695Skanconst unsigned char _sch_toupper[256] = 221169695Skan{ 222169695Skan 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 223169695Skan 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 224169695Skan 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 225169695Skan 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 226169695Skan 64, 227169695Skan 228169695Skan 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 229169695Skan 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 230169695Skan 231169695Skan 91, 92, 93, 94, 95, 96, 232169695Skan 233169695Skan 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 234169695Skan 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 235169695Skan 236169695Skan 123,124,125,126,127, 237169695Skan 238169695Skan 128,129,130,131, 132,133,134,135, 136,137,138,139, 140,141,142,143, 239169695Skan 144,145,146,147, 148,149,150,151, 152,153,154,155, 156,157,158,159, 240169695Skan 160,161,162,163, 164,165,166,167, 168,169,170,171, 172,173,174,175, 241169695Skan 176,177,178,179, 180,181,182,183, 184,185,186,187, 188,189,190,191, 242169695Skan 243169695Skan 192,193,194,195, 196,197,198,199, 200,201,202,203, 204,205,206,207, 244169695Skan 208,209,210,211, 212,213,214,215, 216,217,218,219, 220,221,222,223, 245169695Skan 224,225,226,227, 228,229,230,231, 232,233,234,235, 236,237,238,239, 246169695Skan 240,241,242,243, 244,245,246,247, 248,249,250,251, 252,253,254,255, 247169695Skan}; 248169695Skan 249169695Skan#else 250169695Skan# if HOST_CHARSET == HOST_CHARSET_EBCDIC 251169695Skan #error "FIXME: write tables for EBCDIC" 252169695Skan# else 253169695Skan #error "Unrecognized host character set" 254169695Skan# endif 255169695Skan#endif 256