177298Sobrien/* <ctype.h> replacement macros. 277298Sobrien 3218822Sdim Copyright (C) 2000, 2001, 2002, 2003, 2004, 4218822Sdim 2005 Free Software Foundation, Inc. 577298Sobrien Contributed by Zack Weinberg <zackw@stanford.edu>. 677298Sobrien 777298SobrienThis file is part of the libiberty library. 877298SobrienLibiberty is free software; you can redistribute it and/or 977298Sobrienmodify it under the terms of the GNU Library General Public 1077298SobrienLicense as published by the Free Software Foundation; either 1177298Sobrienversion 2 of the License, or (at your option) any later version. 1277298Sobrien 1377298SobrienLibiberty is distributed in the hope that it will be useful, 1477298Sobrienbut WITHOUT ANY WARRANTY; without even the implied warranty of 1577298SobrienMERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 1677298SobrienLibrary General Public License for more details. 1777298Sobrien 1877298SobrienYou should have received a copy of the GNU Library General Public 1977298SobrienLicense along with libiberty; see the file COPYING.LIB. If 20218822Sdimnot, write to the Free Software Foundation, Inc., 51 Franklin Street - Fifth Floor, 21218822SdimBoston, MA 02110-1301, USA. */ 2277298Sobrien 23130561Sobrien/* 2477298Sobrien 25130561Sobrien@defvr Extension HOST_CHARSET 26130561SobrienThis macro indicates the basic character set and encoding used by the 27130561Sobrienhost: more precisely, the encoding used for character constants in 28130561Sobrienpreprocessor @samp{#if} statements (the C "execution character set"). 29130561SobrienIt is defined by @file{safe-ctype.h}, and will be an integer constant 30130561Sobrienwith one of the following values: 3177298Sobrien 32130561Sobrien@ftable @code 33130561Sobrien@item HOST_CHARSET_UNKNOWN 34130561SobrienThe host character set is unknown - that is, not one of the next two 35130561Sobrienpossibilities. 36130561Sobrien 37130561Sobrien@item HOST_CHARSET_ASCII 38130561SobrienThe host character set is ASCII. 39130561Sobrien 40130561Sobrien@item HOST_CHARSET_EBCDIC 41130561SobrienThe host character set is some variant of EBCDIC. (Only one of the 42130561Sobriennineteen EBCDIC varying characters is tested; exercise caution.) 43130561Sobrien@end ftable 44130561Sobrien@end defvr 45130561Sobrien 46130561Sobrien@deffn Extension ISALPHA (@var{c}) 47130561Sobrien@deffnx Extension ISALNUM (@var{c}) 48130561Sobrien@deffnx Extension ISBLANK (@var{c}) 49130561Sobrien@deffnx Extension ISCNTRL (@var{c}) 50130561Sobrien@deffnx Extension ISDIGIT (@var{c}) 51130561Sobrien@deffnx Extension ISGRAPH (@var{c}) 52130561Sobrien@deffnx Extension ISLOWER (@var{c}) 53130561Sobrien@deffnx Extension ISPRINT (@var{c}) 54130561Sobrien@deffnx Extension ISPUNCT (@var{c}) 55130561Sobrien@deffnx Extension ISSPACE (@var{c}) 56130561Sobrien@deffnx Extension ISUPPER (@var{c}) 57130561Sobrien@deffnx Extension ISXDIGIT (@var{c}) 58130561Sobrien 59130561SobrienThese twelve macros are defined by @file{safe-ctype.h}. Each has the 60130561Sobriensame meaning as the corresponding macro (with name in lowercase) 61130561Sobriendefined by the standard header @file{ctype.h}. For example, 62130561Sobrien@code{ISALPHA} returns true for alphabetic characters and false for 63130561Sobrienothers. However, there are two differences between these macros and 64130561Sobrienthose provided by @file{ctype.h}: 65130561Sobrien 66130561Sobrien@itemize @bullet 67130561Sobrien@item These macros are guaranteed to have well-defined behavior for all 68130561Sobrienvalues representable by @code{signed char} and @code{unsigned char}, and 69130561Sobrienfor @code{EOF}. 70130561Sobrien 71130561Sobrien@item These macros ignore the current locale; they are true for these 72130561Sobrienfixed sets of characters: 73130561Sobrien@multitable {@code{XDIGIT}} {yada yada yada yada yada yada yada yada} 74130561Sobrien@item @code{ALPHA} @tab @kbd{A-Za-z} 75130561Sobrien@item @code{ALNUM} @tab @kbd{A-Za-z0-9} 76130561Sobrien@item @code{BLANK} @tab @kbd{space tab} 77130561Sobrien@item @code{CNTRL} @tab @code{!PRINT} 78130561Sobrien@item @code{DIGIT} @tab @kbd{0-9} 79130561Sobrien@item @code{GRAPH} @tab @code{ALNUM || PUNCT} 80130561Sobrien@item @code{LOWER} @tab @kbd{a-z} 81130561Sobrien@item @code{PRINT} @tab @code{GRAPH ||} @kbd{space} 82130561Sobrien@item @code{PUNCT} @tab @kbd{`~!@@#$%^&*()_-=+[@{]@}\|;:'",<.>/?} 83130561Sobrien@item @code{SPACE} @tab @kbd{space tab \n \r \f \v} 84130561Sobrien@item @code{UPPER} @tab @kbd{A-Z} 85130561Sobrien@item @code{XDIGIT} @tab @kbd{0-9A-Fa-f} 86130561Sobrien@end multitable 87130561Sobrien 88130561SobrienNote that, if the host character set is ASCII or a superset thereof, 89130561Sobrienall these macros will return false for all values of @code{char} outside 90130561Sobrienthe range of 7-bit ASCII. In particular, both ISPRINT and ISCNTRL return 91130561Sobrienfalse for characters with numeric values from 128 to 255. 92130561Sobrien@end itemize 93130561Sobrien@end deffn 94130561Sobrien 95130561Sobrien@deffn Extension ISIDNUM (@var{c}) 96130561Sobrien@deffnx Extension ISIDST (@var{c}) 97130561Sobrien@deffnx Extension IS_VSPACE (@var{c}) 98130561Sobrien@deffnx Extension IS_NVSPACE (@var{c}) 99130561Sobrien@deffnx Extension IS_SPACE_OR_NUL (@var{c}) 100130561Sobrien@deffnx Extension IS_ISOBASIC (@var{c}) 101130561SobrienThese six macros are defined by @file{safe-ctype.h} and provide 102130561Sobrienadditional character classes which are useful when doing lexical 103130561Sobrienanalysis of C or similar languages. They are true for the following 104130561Sobriensets of characters: 105130561Sobrien 106130561Sobrien@multitable {@code{SPACE_OR_NUL}} {yada yada yada yada yada yada yada yada} 107130561Sobrien@item @code{IDNUM} @tab @kbd{A-Za-z0-9_} 108130561Sobrien@item @code{IDST} @tab @kbd{A-Za-z_} 109130561Sobrien@item @code{VSPACE} @tab @kbd{\r \n} 110130561Sobrien@item @code{NVSPACE} @tab @kbd{space tab \f \v \0} 111130561Sobrien@item @code{SPACE_OR_NUL} @tab @code{VSPACE || NVSPACE} 112130561Sobrien@item @code{ISOBASIC} @tab @code{VSPACE || NVSPACE || PRINT} 113130561Sobrien@end multitable 114130561Sobrien@end deffn 115130561Sobrien 116130561Sobrien*/ 117130561Sobrien 11877298Sobrien#include "ansidecl.h" 11977298Sobrien#include <safe-ctype.h> 12077298Sobrien#include <stdio.h> /* for EOF */ 12177298Sobrien 122130561Sobrien#if EOF != -1 123130561Sobrien #error "<safe-ctype.h> requires EOF == -1" 124130561Sobrien#endif 125130561Sobrien 12677298Sobrien/* Shorthand */ 12777298Sobrien#define bl _sch_isblank 12877298Sobrien#define cn _sch_iscntrl 12977298Sobrien#define di _sch_isdigit 13077298Sobrien#define is _sch_isidst 13177298Sobrien#define lo _sch_islower 13277298Sobrien#define nv _sch_isnvsp 13377298Sobrien#define pn _sch_ispunct 13477298Sobrien#define pr _sch_isprint 13577298Sobrien#define sp _sch_isspace 13677298Sobrien#define up _sch_isupper 13777298Sobrien#define vs _sch_isvsp 13877298Sobrien#define xd _sch_isxdigit 13977298Sobrien 14077298Sobrien/* Masks. */ 141130561Sobrien#define L (const unsigned short) (lo|is |pr) /* lower case letter */ 142130561Sobrien#define XL (const unsigned short) (lo|is|xd|pr) /* lowercase hex digit */ 143130561Sobrien#define U (const unsigned short) (up|is |pr) /* upper case letter */ 144130561Sobrien#define XU (const unsigned short) (up|is|xd|pr) /* uppercase hex digit */ 145130561Sobrien#define D (const unsigned short) (di |xd|pr) /* decimal digit */ 146130561Sobrien#define P (const unsigned short) (pn |pr) /* punctuation */ 147130561Sobrien#define _ (const unsigned short) (pn|is |pr) /* underscore */ 14877298Sobrien 149130561Sobrien#define C (const unsigned short) ( cn) /* control character */ 150130561Sobrien#define Z (const unsigned short) (nv |cn) /* NUL */ 151130561Sobrien#define M (const unsigned short) (nv|sp |cn) /* cursor movement: \f \v */ 152130561Sobrien#define V (const unsigned short) (vs|sp |cn) /* vertical space: \r \n */ 153130561Sobrien#define T (const unsigned short) (nv|sp|bl|cn) /* tab */ 154130561Sobrien#define S (const unsigned short) (nv|sp|bl|pr) /* space */ 15577298Sobrien 15677298Sobrien/* Are we ASCII? */ 157130561Sobrien#if HOST_CHARSET == HOST_CHARSET_ASCII 15877298Sobrien 15977298Sobrienconst unsigned short _sch_istable[256] = 16077298Sobrien{ 16177298Sobrien Z, C, C, C, C, C, C, C, /* NUL SOH STX ETX EOT ENQ ACK BEL */ 16277298Sobrien C, T, V, M, M, V, C, C, /* BS HT LF VT FF CR SO SI */ 16377298Sobrien C, C, C, C, C, C, C, C, /* DLE DC1 DC2 DC3 DC4 NAK SYN ETB */ 16477298Sobrien C, C, C, C, C, C, C, C, /* CAN EM SUB ESC FS GS RS US */ 16577298Sobrien S, P, P, P, P, P, P, P, /* SP ! " # $ % & ' */ 16677298Sobrien P, P, P, P, P, P, P, P, /* ( ) * + , - . / */ 16777298Sobrien D, D, D, D, D, D, D, D, /* 0 1 2 3 4 5 6 7 */ 16877298Sobrien D, D, P, P, P, P, P, P, /* 8 9 : ; < = > ? */ 16977298Sobrien P, XU, XU, XU, XU, XU, XU, U, /* @ A B C D E F G */ 17077298Sobrien U, U, U, U, U, U, U, U, /* H I J K L M N O */ 17177298Sobrien U, U, U, U, U, U, U, U, /* P Q R S T U V W */ 17277298Sobrien U, U, U, P, P, P, P, _, /* X Y Z [ \ ] ^ _ */ 17377298Sobrien P, XL, XL, XL, XL, XL, XL, L, /* ` a b c d e f g */ 17477298Sobrien L, L, L, L, L, L, L, L, /* h i j k l m n o */ 17577298Sobrien L, L, L, L, L, L, L, L, /* p q r s t u v w */ 17677298Sobrien L, L, L, P, P, P, P, C, /* x y z { | } ~ DEL */ 17777298Sobrien 17877298Sobrien /* high half of unsigned char is locale-specific, so all tests are 17977298Sobrien false in "C" locale */ 18077298Sobrien 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 18177298Sobrien 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 18277298Sobrien 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 18377298Sobrien 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 18477298Sobrien 18577298Sobrien 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 18677298Sobrien 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 18777298Sobrien 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 18877298Sobrien 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 18977298Sobrien}; 19077298Sobrien 19177298Sobrienconst unsigned char _sch_tolower[256] = 19277298Sobrien{ 19377298Sobrien 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 19477298Sobrien 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 19577298Sobrien 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 19677298Sobrien 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 19777298Sobrien 64, 19877298Sobrien 19977298Sobrien 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 20077298Sobrien 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 20177298Sobrien 20277298Sobrien 91, 92, 93, 94, 95, 96, 20377298Sobrien 20477298Sobrien 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 20577298Sobrien 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 20677298Sobrien 20777298Sobrien 123,124,125,126,127, 20877298Sobrien 20977298Sobrien 128,129,130,131, 132,133,134,135, 136,137,138,139, 140,141,142,143, 21077298Sobrien 144,145,146,147, 148,149,150,151, 152,153,154,155, 156,157,158,159, 21177298Sobrien 160,161,162,163, 164,165,166,167, 168,169,170,171, 172,173,174,175, 21277298Sobrien 176,177,178,179, 180,181,182,183, 184,185,186,187, 188,189,190,191, 21377298Sobrien 21477298Sobrien 192,193,194,195, 196,197,198,199, 200,201,202,203, 204,205,206,207, 21577298Sobrien 208,209,210,211, 212,213,214,215, 216,217,218,219, 220,221,222,223, 21677298Sobrien 224,225,226,227, 228,229,230,231, 232,233,234,235, 236,237,238,239, 21777298Sobrien 240,241,242,243, 244,245,246,247, 248,249,250,251, 252,253,254,255, 21877298Sobrien}; 21977298Sobrien 22077298Sobrienconst unsigned char _sch_toupper[256] = 22177298Sobrien{ 22277298Sobrien 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 22377298Sobrien 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 22477298Sobrien 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 22577298Sobrien 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 22677298Sobrien 64, 22777298Sobrien 22877298Sobrien 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 22977298Sobrien 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 23077298Sobrien 23177298Sobrien 91, 92, 93, 94, 95, 96, 23277298Sobrien 23377298Sobrien 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 23477298Sobrien 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 23577298Sobrien 23677298Sobrien 123,124,125,126,127, 23777298Sobrien 23877298Sobrien 128,129,130,131, 132,133,134,135, 136,137,138,139, 140,141,142,143, 23977298Sobrien 144,145,146,147, 148,149,150,151, 152,153,154,155, 156,157,158,159, 24077298Sobrien 160,161,162,163, 164,165,166,167, 168,169,170,171, 172,173,174,175, 24177298Sobrien 176,177,178,179, 180,181,182,183, 184,185,186,187, 188,189,190,191, 24277298Sobrien 24377298Sobrien 192,193,194,195, 196,197,198,199, 200,201,202,203, 204,205,206,207, 24477298Sobrien 208,209,210,211, 212,213,214,215, 216,217,218,219, 220,221,222,223, 24577298Sobrien 224,225,226,227, 228,229,230,231, 232,233,234,235, 236,237,238,239, 24677298Sobrien 240,241,242,243, 244,245,246,247, 248,249,250,251, 252,253,254,255, 24777298Sobrien}; 24877298Sobrien 24977298Sobrien#else 250130561Sobrien# if HOST_CHARSET == HOST_CHARSET_EBCDIC 251130561Sobrien #error "FIXME: write tables for EBCDIC" 252130561Sobrien# else 253130561Sobrien #error "Unrecognized host character set" 254130561Sobrien# endif 255130561Sobrien#endif 256