1169695Skan/* <ctype.h> replacement macros.
2169695Skan
3169695Skan   Copyright (C) 2000, 2001, 2002, 2003, 2004,
4169695Skan   2005 Free Software Foundation, Inc.
5169695Skan   Contributed by Zack Weinberg <zackw@stanford.edu>.
6169695Skan
7169695SkanThis file is part of the libiberty library.
8169695SkanLibiberty is free software; you can redistribute it and/or
9169695Skanmodify it under the terms of the GNU Library General Public
10169695SkanLicense as published by the Free Software Foundation; either
11169695Skanversion 2 of the License, or (at your option) any later version.
12169695Skan
13169695SkanLibiberty is distributed in the hope that it will be useful,
14169695Skanbut WITHOUT ANY WARRANTY; without even the implied warranty of
15169695SkanMERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16169695SkanLibrary General Public License for more details.
17169695Skan
18169695SkanYou should have received a copy of the GNU Library General Public
19169695SkanLicense along with libiberty; see the file COPYING.LIB.  If
20169695Skannot, write to the Free Software Foundation, Inc., 51 Franklin Street - Fifth Floor,
21169695SkanBoston, MA 02110-1301, USA.  */
22169695Skan
23169695Skan/*
24169695Skan
25169695Skan@defvr Extension HOST_CHARSET
26169695SkanThis macro indicates the basic character set and encoding used by the
27169695Skanhost: more precisely, the encoding used for character constants in
28169695Skanpreprocessor @samp{#if} statements (the C "execution character set").
29169695SkanIt is defined by @file{safe-ctype.h}, and will be an integer constant
30169695Skanwith one of the following values:
31169695Skan
32169695Skan@ftable @code
33169695Skan@item HOST_CHARSET_UNKNOWN
34169695SkanThe host character set is unknown - that is, not one of the next two
35169695Skanpossibilities.
36169695Skan
37169695Skan@item HOST_CHARSET_ASCII
38169695SkanThe host character set is ASCII.
39169695Skan
40169695Skan@item HOST_CHARSET_EBCDIC
41169695SkanThe host character set is some variant of EBCDIC.  (Only one of the
42169695Skannineteen EBCDIC varying characters is tested; exercise caution.)
43169695Skan@end ftable
44169695Skan@end defvr
45169695Skan
46169695Skan@deffn  Extension ISALPHA  (@var{c})
47169695Skan@deffnx Extension ISALNUM  (@var{c})
48169695Skan@deffnx Extension ISBLANK  (@var{c})
49169695Skan@deffnx Extension ISCNTRL  (@var{c})
50169695Skan@deffnx Extension ISDIGIT  (@var{c})
51169695Skan@deffnx Extension ISGRAPH  (@var{c})
52169695Skan@deffnx Extension ISLOWER  (@var{c})
53169695Skan@deffnx Extension ISPRINT  (@var{c})
54169695Skan@deffnx Extension ISPUNCT  (@var{c})
55169695Skan@deffnx Extension ISSPACE  (@var{c})
56169695Skan@deffnx Extension ISUPPER  (@var{c})
57169695Skan@deffnx Extension ISXDIGIT (@var{c})
58169695Skan
59169695SkanThese twelve macros are defined by @file{safe-ctype.h}.  Each has the
60169695Skansame meaning as the corresponding macro (with name in lowercase)
61169695Skandefined by the standard header @file{ctype.h}.  For example,
62169695Skan@code{ISALPHA} returns true for alphabetic characters and false for
63169695Skanothers.  However, there are two differences between these macros and
64169695Skanthose provided by @file{ctype.h}:
65169695Skan
66169695Skan@itemize @bullet
67169695Skan@item These macros are guaranteed to have well-defined behavior for all
68169695Skanvalues representable by @code{signed char} and @code{unsigned char}, and
69169695Skanfor @code{EOF}.
70169695Skan
71169695Skan@item These macros ignore the current locale; they are true for these
72169695Skanfixed sets of characters:
73169695Skan@multitable {@code{XDIGIT}} {yada yada yada yada yada yada yada yada}
74169695Skan@item @code{ALPHA}  @tab @kbd{A-Za-z}
75169695Skan@item @code{ALNUM}  @tab @kbd{A-Za-z0-9}
76169695Skan@item @code{BLANK}  @tab @kbd{space tab}
77169695Skan@item @code{CNTRL}  @tab @code{!PRINT}
78169695Skan@item @code{DIGIT}  @tab @kbd{0-9}
79169695Skan@item @code{GRAPH}  @tab @code{ALNUM || PUNCT}
80169695Skan@item @code{LOWER}  @tab @kbd{a-z}
81169695Skan@item @code{PRINT}  @tab @code{GRAPH ||} @kbd{space}
82169695Skan@item @code{PUNCT}  @tab @kbd{`~!@@#$%^&*()_-=+[@{]@}\|;:'",<.>/?}
83169695Skan@item @code{SPACE}  @tab @kbd{space tab \n \r \f \v}
84169695Skan@item @code{UPPER}  @tab @kbd{A-Z}
85169695Skan@item @code{XDIGIT} @tab @kbd{0-9A-Fa-f}
86169695Skan@end multitable
87169695Skan
88169695SkanNote that, if the host character set is ASCII or a superset thereof,
89169695Skanall these macros will return false for all values of @code{char} outside
90169695Skanthe range of 7-bit ASCII.  In particular, both ISPRINT and ISCNTRL return
91169695Skanfalse for characters with numeric values from 128 to 255.
92169695Skan@end itemize
93169695Skan@end deffn
94169695Skan
95169695Skan@deffn  Extension ISIDNUM         (@var{c})
96169695Skan@deffnx Extension ISIDST          (@var{c})
97169695Skan@deffnx Extension IS_VSPACE       (@var{c})
98169695Skan@deffnx Extension IS_NVSPACE      (@var{c})
99169695Skan@deffnx Extension IS_SPACE_OR_NUL (@var{c})
100169695Skan@deffnx Extension IS_ISOBASIC     (@var{c})
101169695SkanThese six macros are defined by @file{safe-ctype.h} and provide
102169695Skanadditional character classes which are useful when doing lexical
103169695Skananalysis of C or similar languages.  They are true for the following
104169695Skansets of characters:
105169695Skan
106169695Skan@multitable {@code{SPACE_OR_NUL}} {yada yada yada yada yada yada yada yada}
107169695Skan@item @code{IDNUM}        @tab @kbd{A-Za-z0-9_}
108169695Skan@item @code{IDST}         @tab @kbd{A-Za-z_}
109169695Skan@item @code{VSPACE}       @tab @kbd{\r \n}
110169695Skan@item @code{NVSPACE}      @tab @kbd{space tab \f \v \0}
111169695Skan@item @code{SPACE_OR_NUL} @tab @code{VSPACE || NVSPACE}
112169695Skan@item @code{ISOBASIC}     @tab @code{VSPACE || NVSPACE || PRINT}
113169695Skan@end multitable
114169695Skan@end deffn
115169695Skan
116169695Skan*/
117169695Skan
118169695Skan#include "ansidecl.h"
119169695Skan#include <safe-ctype.h>
120169695Skan#include <stdio.h>  /* for EOF */
121169695Skan
122169695Skan#if EOF != -1
123169695Skan #error "<safe-ctype.h> requires EOF == -1"
124169695Skan#endif
125169695Skan
126169695Skan/* Shorthand */
127169695Skan#define bl _sch_isblank
128169695Skan#define cn _sch_iscntrl
129169695Skan#define di _sch_isdigit
130169695Skan#define is _sch_isidst
131169695Skan#define lo _sch_islower
132169695Skan#define nv _sch_isnvsp
133169695Skan#define pn _sch_ispunct
134169695Skan#define pr _sch_isprint
135169695Skan#define sp _sch_isspace
136169695Skan#define up _sch_isupper
137169695Skan#define vs _sch_isvsp
138169695Skan#define xd _sch_isxdigit
139169695Skan
140169695Skan/* Masks.  */
141169695Skan#define L  (const unsigned short) (lo|is   |pr)	/* lower case letter */
142169695Skan#define XL (const unsigned short) (lo|is|xd|pr)	/* lowercase hex digit */
143169695Skan#define U  (const unsigned short) (up|is   |pr)	/* upper case letter */
144169695Skan#define XU (const unsigned short) (up|is|xd|pr)	/* uppercase hex digit */
145169695Skan#define D  (const unsigned short) (di   |xd|pr)	/* decimal digit */
146169695Skan#define P  (const unsigned short) (pn      |pr)	/* punctuation */
147169695Skan#define _  (const unsigned short) (pn|is   |pr)	/* underscore */
148169695Skan
149169695Skan#define C  (const unsigned short) (         cn)	/* control character */
150169695Skan#define Z  (const unsigned short) (nv      |cn)	/* NUL */
151169695Skan#define M  (const unsigned short) (nv|sp   |cn)	/* cursor movement: \f \v */
152169695Skan#define V  (const unsigned short) (vs|sp   |cn)	/* vertical space: \r \n */
153169695Skan#define T  (const unsigned short) (nv|sp|bl|cn)	/* tab */
154169695Skan#define S  (const unsigned short) (nv|sp|bl|pr)	/* space */
155169695Skan
156169695Skan/* Are we ASCII? */
157169695Skan#if HOST_CHARSET == HOST_CHARSET_ASCII
158169695Skan
159169695Skanconst unsigned short _sch_istable[256] =
160169695Skan{
161169695Skan  Z,  C,  C,  C,   C,  C,  C,  C,   /* NUL SOH STX ETX  EOT ENQ ACK BEL */
162169695Skan  C,  T,  V,  M,   M,  V,  C,  C,   /* BS  HT  LF  VT   FF  CR  SO  SI  */
163169695Skan  C,  C,  C,  C,   C,  C,  C,  C,   /* DLE DC1 DC2 DC3  DC4 NAK SYN ETB */
164169695Skan  C,  C,  C,  C,   C,  C,  C,  C,   /* CAN EM  SUB ESC  FS  GS  RS  US  */
165169695Skan  S,  P,  P,  P,   P,  P,  P,  P,   /* SP  !   "   #    $   %   &   '   */
166169695Skan  P,  P,  P,  P,   P,  P,  P,  P,   /* (   )   *   +    ,   -   .   /   */
167169695Skan  D,  D,  D,  D,   D,  D,  D,  D,   /* 0   1   2   3    4   5   6   7   */
168169695Skan  D,  D,  P,  P,   P,  P,  P,  P,   /* 8   9   :   ;    <   =   >   ?   */
169169695Skan  P, XU, XU, XU,  XU, XU, XU,  U,   /* @   A   B   C    D   E   F   G   */
170169695Skan  U,  U,  U,  U,   U,  U,  U,  U,   /* H   I   J   K    L   M   N   O   */
171169695Skan  U,  U,  U,  U,   U,  U,  U,  U,   /* P   Q   R   S    T   U   V   W   */
172169695Skan  U,  U,  U,  P,   P,  P,  P,  _,   /* X   Y   Z   [    \   ]   ^   _   */
173169695Skan  P, XL, XL, XL,  XL, XL, XL,  L,   /* `   a   b   c    d   e   f   g   */
174169695Skan  L,  L,  L,  L,   L,  L,  L,  L,   /* h   i   j   k    l   m   n   o   */
175169695Skan  L,  L,  L,  L,   L,  L,  L,  L,   /* p   q   r   s    t   u   v   w   */
176169695Skan  L,  L,  L,  P,   P,  P,  P,  C,   /* x   y   z   {    |   }   ~   DEL */
177169695Skan
178169695Skan  /* high half of unsigned char is locale-specific, so all tests are
179169695Skan     false in "C" locale */
180169695Skan  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
181169695Skan  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
182169695Skan  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
183169695Skan  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
184169695Skan
185169695Skan  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
186169695Skan  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
187169695Skan  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
188169695Skan  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
189169695Skan};
190169695Skan
191169695Skanconst unsigned char _sch_tolower[256] =
192169695Skan{
193169695Skan   0,  1,  2,  3,   4,  5,  6,  7,   8,  9, 10, 11,  12, 13, 14, 15,
194169695Skan  16, 17, 18, 19,  20, 21, 22, 23,  24, 25, 26, 27,  28, 29, 30, 31,
195169695Skan  32, 33, 34, 35,  36, 37, 38, 39,  40, 41, 42, 43,  44, 45, 46, 47,
196169695Skan  48, 49, 50, 51,  52, 53, 54, 55,  56, 57, 58, 59,  60, 61, 62, 63,
197169695Skan  64,
198169695Skan
199169695Skan  'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
200169695Skan  'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
201169695Skan
202169695Skan  91, 92, 93, 94, 95, 96,
203169695Skan
204169695Skan  'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
205169695Skan  'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
206169695Skan
207169695Skan 123,124,125,126,127,
208169695Skan
209169695Skan 128,129,130,131, 132,133,134,135, 136,137,138,139, 140,141,142,143,
210169695Skan 144,145,146,147, 148,149,150,151, 152,153,154,155, 156,157,158,159,
211169695Skan 160,161,162,163, 164,165,166,167, 168,169,170,171, 172,173,174,175,
212169695Skan 176,177,178,179, 180,181,182,183, 184,185,186,187, 188,189,190,191,
213169695Skan
214169695Skan 192,193,194,195, 196,197,198,199, 200,201,202,203, 204,205,206,207,
215169695Skan 208,209,210,211, 212,213,214,215, 216,217,218,219, 220,221,222,223,
216169695Skan 224,225,226,227, 228,229,230,231, 232,233,234,235, 236,237,238,239,
217169695Skan 240,241,242,243, 244,245,246,247, 248,249,250,251, 252,253,254,255,
218169695Skan};
219169695Skan
220169695Skanconst unsigned char _sch_toupper[256] =
221169695Skan{
222169695Skan   0,  1,  2,  3,   4,  5,  6,  7,   8,  9, 10, 11,  12, 13, 14, 15,
223169695Skan  16, 17, 18, 19,  20, 21, 22, 23,  24, 25, 26, 27,  28, 29, 30, 31,
224169695Skan  32, 33, 34, 35,  36, 37, 38, 39,  40, 41, 42, 43,  44, 45, 46, 47,
225169695Skan  48, 49, 50, 51,  52, 53, 54, 55,  56, 57, 58, 59,  60, 61, 62, 63,
226169695Skan  64,
227169695Skan
228169695Skan  'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
229169695Skan  'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
230169695Skan
231169695Skan  91, 92, 93, 94, 95, 96,
232169695Skan
233169695Skan  'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
234169695Skan  'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
235169695Skan
236169695Skan 123,124,125,126,127,
237169695Skan
238169695Skan 128,129,130,131, 132,133,134,135, 136,137,138,139, 140,141,142,143,
239169695Skan 144,145,146,147, 148,149,150,151, 152,153,154,155, 156,157,158,159,
240169695Skan 160,161,162,163, 164,165,166,167, 168,169,170,171, 172,173,174,175,
241169695Skan 176,177,178,179, 180,181,182,183, 184,185,186,187, 188,189,190,191,
242169695Skan
243169695Skan 192,193,194,195, 196,197,198,199, 200,201,202,203, 204,205,206,207,
244169695Skan 208,209,210,211, 212,213,214,215, 216,217,218,219, 220,221,222,223,
245169695Skan 224,225,226,227, 228,229,230,231, 232,233,234,235, 236,237,238,239,
246169695Skan 240,241,242,243, 244,245,246,247, 248,249,250,251, 252,253,254,255,
247169695Skan};
248169695Skan
249169695Skan#else
250169695Skan# if HOST_CHARSET == HOST_CHARSET_EBCDIC
251169695Skan  #error "FIXME: write tables for EBCDIC"
252169695Skan# else
253169695Skan  #error "Unrecognized host character set"
254169695Skan# endif
255169695Skan#endif
256