177298Sobrien/* <ctype.h> replacement macros.
277298Sobrien
3218822Sdim   Copyright (C) 2000, 2001, 2002, 2003, 2004,
4218822Sdim   2005 Free Software Foundation, Inc.
577298Sobrien   Contributed by Zack Weinberg <zackw@stanford.edu>.
677298Sobrien
777298SobrienThis file is part of the libiberty library.
877298SobrienLibiberty is free software; you can redistribute it and/or
977298Sobrienmodify it under the terms of the GNU Library General Public
1077298SobrienLicense as published by the Free Software Foundation; either
1177298Sobrienversion 2 of the License, or (at your option) any later version.
1277298Sobrien
1377298SobrienLibiberty is distributed in the hope that it will be useful,
1477298Sobrienbut WITHOUT ANY WARRANTY; without even the implied warranty of
1577298SobrienMERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
1677298SobrienLibrary General Public License for more details.
1777298Sobrien
1877298SobrienYou should have received a copy of the GNU Library General Public
1977298SobrienLicense along with libiberty; see the file COPYING.LIB.  If
20218822Sdimnot, write to the Free Software Foundation, Inc., 51 Franklin Street - Fifth Floor,
21218822SdimBoston, MA 02110-1301, USA.  */
2277298Sobrien
23130561Sobrien/*
2477298Sobrien
25130561Sobrien@defvr Extension HOST_CHARSET
26130561SobrienThis macro indicates the basic character set and encoding used by the
27130561Sobrienhost: more precisely, the encoding used for character constants in
28130561Sobrienpreprocessor @samp{#if} statements (the C "execution character set").
29130561SobrienIt is defined by @file{safe-ctype.h}, and will be an integer constant
30130561Sobrienwith one of the following values:
3177298Sobrien
32130561Sobrien@ftable @code
33130561Sobrien@item HOST_CHARSET_UNKNOWN
34130561SobrienThe host character set is unknown - that is, not one of the next two
35130561Sobrienpossibilities.
36130561Sobrien
37130561Sobrien@item HOST_CHARSET_ASCII
38130561SobrienThe host character set is ASCII.
39130561Sobrien
40130561Sobrien@item HOST_CHARSET_EBCDIC
41130561SobrienThe host character set is some variant of EBCDIC.  (Only one of the
42130561Sobriennineteen EBCDIC varying characters is tested; exercise caution.)
43130561Sobrien@end ftable
44130561Sobrien@end defvr
45130561Sobrien
46130561Sobrien@deffn  Extension ISALPHA  (@var{c})
47130561Sobrien@deffnx Extension ISALNUM  (@var{c})
48130561Sobrien@deffnx Extension ISBLANK  (@var{c})
49130561Sobrien@deffnx Extension ISCNTRL  (@var{c})
50130561Sobrien@deffnx Extension ISDIGIT  (@var{c})
51130561Sobrien@deffnx Extension ISGRAPH  (@var{c})
52130561Sobrien@deffnx Extension ISLOWER  (@var{c})
53130561Sobrien@deffnx Extension ISPRINT  (@var{c})
54130561Sobrien@deffnx Extension ISPUNCT  (@var{c})
55130561Sobrien@deffnx Extension ISSPACE  (@var{c})
56130561Sobrien@deffnx Extension ISUPPER  (@var{c})
57130561Sobrien@deffnx Extension ISXDIGIT (@var{c})
58130561Sobrien
59130561SobrienThese twelve macros are defined by @file{safe-ctype.h}.  Each has the
60130561Sobriensame meaning as the corresponding macro (with name in lowercase)
61130561Sobriendefined by the standard header @file{ctype.h}.  For example,
62130561Sobrien@code{ISALPHA} returns true for alphabetic characters and false for
63130561Sobrienothers.  However, there are two differences between these macros and
64130561Sobrienthose provided by @file{ctype.h}:
65130561Sobrien
66130561Sobrien@itemize @bullet
67130561Sobrien@item These macros are guaranteed to have well-defined behavior for all
68130561Sobrienvalues representable by @code{signed char} and @code{unsigned char}, and
69130561Sobrienfor @code{EOF}.
70130561Sobrien
71130561Sobrien@item These macros ignore the current locale; they are true for these
72130561Sobrienfixed sets of characters:
73130561Sobrien@multitable {@code{XDIGIT}} {yada yada yada yada yada yada yada yada}
74130561Sobrien@item @code{ALPHA}  @tab @kbd{A-Za-z}
75130561Sobrien@item @code{ALNUM}  @tab @kbd{A-Za-z0-9}
76130561Sobrien@item @code{BLANK}  @tab @kbd{space tab}
77130561Sobrien@item @code{CNTRL}  @tab @code{!PRINT}
78130561Sobrien@item @code{DIGIT}  @tab @kbd{0-9}
79130561Sobrien@item @code{GRAPH}  @tab @code{ALNUM || PUNCT}
80130561Sobrien@item @code{LOWER}  @tab @kbd{a-z}
81130561Sobrien@item @code{PRINT}  @tab @code{GRAPH ||} @kbd{space}
82130561Sobrien@item @code{PUNCT}  @tab @kbd{`~!@@#$%^&*()_-=+[@{]@}\|;:'",<.>/?}
83130561Sobrien@item @code{SPACE}  @tab @kbd{space tab \n \r \f \v}
84130561Sobrien@item @code{UPPER}  @tab @kbd{A-Z}
85130561Sobrien@item @code{XDIGIT} @tab @kbd{0-9A-Fa-f}
86130561Sobrien@end multitable
87130561Sobrien
88130561SobrienNote that, if the host character set is ASCII or a superset thereof,
89130561Sobrienall these macros will return false for all values of @code{char} outside
90130561Sobrienthe range of 7-bit ASCII.  In particular, both ISPRINT and ISCNTRL return
91130561Sobrienfalse for characters with numeric values from 128 to 255.
92130561Sobrien@end itemize
93130561Sobrien@end deffn
94130561Sobrien
95130561Sobrien@deffn  Extension ISIDNUM         (@var{c})
96130561Sobrien@deffnx Extension ISIDST          (@var{c})
97130561Sobrien@deffnx Extension IS_VSPACE       (@var{c})
98130561Sobrien@deffnx Extension IS_NVSPACE      (@var{c})
99130561Sobrien@deffnx Extension IS_SPACE_OR_NUL (@var{c})
100130561Sobrien@deffnx Extension IS_ISOBASIC     (@var{c})
101130561SobrienThese six macros are defined by @file{safe-ctype.h} and provide
102130561Sobrienadditional character classes which are useful when doing lexical
103130561Sobrienanalysis of C or similar languages.  They are true for the following
104130561Sobriensets of characters:
105130561Sobrien
106130561Sobrien@multitable {@code{SPACE_OR_NUL}} {yada yada yada yada yada yada yada yada}
107130561Sobrien@item @code{IDNUM}        @tab @kbd{A-Za-z0-9_}
108130561Sobrien@item @code{IDST}         @tab @kbd{A-Za-z_}
109130561Sobrien@item @code{VSPACE}       @tab @kbd{\r \n}
110130561Sobrien@item @code{NVSPACE}      @tab @kbd{space tab \f \v \0}
111130561Sobrien@item @code{SPACE_OR_NUL} @tab @code{VSPACE || NVSPACE}
112130561Sobrien@item @code{ISOBASIC}     @tab @code{VSPACE || NVSPACE || PRINT}
113130561Sobrien@end multitable
114130561Sobrien@end deffn
115130561Sobrien
116130561Sobrien*/
117130561Sobrien
11877298Sobrien#include "ansidecl.h"
11977298Sobrien#include <safe-ctype.h>
12077298Sobrien#include <stdio.h>  /* for EOF */
12177298Sobrien
122130561Sobrien#if EOF != -1
123130561Sobrien #error "<safe-ctype.h> requires EOF == -1"
124130561Sobrien#endif
125130561Sobrien
12677298Sobrien/* Shorthand */
12777298Sobrien#define bl _sch_isblank
12877298Sobrien#define cn _sch_iscntrl
12977298Sobrien#define di _sch_isdigit
13077298Sobrien#define is _sch_isidst
13177298Sobrien#define lo _sch_islower
13277298Sobrien#define nv _sch_isnvsp
13377298Sobrien#define pn _sch_ispunct
13477298Sobrien#define pr _sch_isprint
13577298Sobrien#define sp _sch_isspace
13677298Sobrien#define up _sch_isupper
13777298Sobrien#define vs _sch_isvsp
13877298Sobrien#define xd _sch_isxdigit
13977298Sobrien
14077298Sobrien/* Masks.  */
141130561Sobrien#define L  (const unsigned short) (lo|is   |pr)	/* lower case letter */
142130561Sobrien#define XL (const unsigned short) (lo|is|xd|pr)	/* lowercase hex digit */
143130561Sobrien#define U  (const unsigned short) (up|is   |pr)	/* upper case letter */
144130561Sobrien#define XU (const unsigned short) (up|is|xd|pr)	/* uppercase hex digit */
145130561Sobrien#define D  (const unsigned short) (di   |xd|pr)	/* decimal digit */
146130561Sobrien#define P  (const unsigned short) (pn      |pr)	/* punctuation */
147130561Sobrien#define _  (const unsigned short) (pn|is   |pr)	/* underscore */
14877298Sobrien
149130561Sobrien#define C  (const unsigned short) (         cn)	/* control character */
150130561Sobrien#define Z  (const unsigned short) (nv      |cn)	/* NUL */
151130561Sobrien#define M  (const unsigned short) (nv|sp   |cn)	/* cursor movement: \f \v */
152130561Sobrien#define V  (const unsigned short) (vs|sp   |cn)	/* vertical space: \r \n */
153130561Sobrien#define T  (const unsigned short) (nv|sp|bl|cn)	/* tab */
154130561Sobrien#define S  (const unsigned short) (nv|sp|bl|pr)	/* space */
15577298Sobrien
15677298Sobrien/* Are we ASCII? */
157130561Sobrien#if HOST_CHARSET == HOST_CHARSET_ASCII
15877298Sobrien
15977298Sobrienconst unsigned short _sch_istable[256] =
16077298Sobrien{
16177298Sobrien  Z,  C,  C,  C,   C,  C,  C,  C,   /* NUL SOH STX ETX  EOT ENQ ACK BEL */
16277298Sobrien  C,  T,  V,  M,   M,  V,  C,  C,   /* BS  HT  LF  VT   FF  CR  SO  SI  */
16377298Sobrien  C,  C,  C,  C,   C,  C,  C,  C,   /* DLE DC1 DC2 DC3  DC4 NAK SYN ETB */
16477298Sobrien  C,  C,  C,  C,   C,  C,  C,  C,   /* CAN EM  SUB ESC  FS  GS  RS  US  */
16577298Sobrien  S,  P,  P,  P,   P,  P,  P,  P,   /* SP  !   "   #    $   %   &   '   */
16677298Sobrien  P,  P,  P,  P,   P,  P,  P,  P,   /* (   )   *   +    ,   -   .   /   */
16777298Sobrien  D,  D,  D,  D,   D,  D,  D,  D,   /* 0   1   2   3    4   5   6   7   */
16877298Sobrien  D,  D,  P,  P,   P,  P,  P,  P,   /* 8   9   :   ;    <   =   >   ?   */
16977298Sobrien  P, XU, XU, XU,  XU, XU, XU,  U,   /* @   A   B   C    D   E   F   G   */
17077298Sobrien  U,  U,  U,  U,   U,  U,  U,  U,   /* H   I   J   K    L   M   N   O   */
17177298Sobrien  U,  U,  U,  U,   U,  U,  U,  U,   /* P   Q   R   S    T   U   V   W   */
17277298Sobrien  U,  U,  U,  P,   P,  P,  P,  _,   /* X   Y   Z   [    \   ]   ^   _   */
17377298Sobrien  P, XL, XL, XL,  XL, XL, XL,  L,   /* `   a   b   c    d   e   f   g   */
17477298Sobrien  L,  L,  L,  L,   L,  L,  L,  L,   /* h   i   j   k    l   m   n   o   */
17577298Sobrien  L,  L,  L,  L,   L,  L,  L,  L,   /* p   q   r   s    t   u   v   w   */
17677298Sobrien  L,  L,  L,  P,   P,  P,  P,  C,   /* x   y   z   {    |   }   ~   DEL */
17777298Sobrien
17877298Sobrien  /* high half of unsigned char is locale-specific, so all tests are
17977298Sobrien     false in "C" locale */
18077298Sobrien  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
18177298Sobrien  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
18277298Sobrien  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
18377298Sobrien  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
18477298Sobrien
18577298Sobrien  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
18677298Sobrien  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
18777298Sobrien  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
18877298Sobrien  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
18977298Sobrien};
19077298Sobrien
19177298Sobrienconst unsigned char _sch_tolower[256] =
19277298Sobrien{
19377298Sobrien   0,  1,  2,  3,   4,  5,  6,  7,   8,  9, 10, 11,  12, 13, 14, 15,
19477298Sobrien  16, 17, 18, 19,  20, 21, 22, 23,  24, 25, 26, 27,  28, 29, 30, 31,
19577298Sobrien  32, 33, 34, 35,  36, 37, 38, 39,  40, 41, 42, 43,  44, 45, 46, 47,
19677298Sobrien  48, 49, 50, 51,  52, 53, 54, 55,  56, 57, 58, 59,  60, 61, 62, 63,
19777298Sobrien  64,
19877298Sobrien
19977298Sobrien  'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
20077298Sobrien  'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
20177298Sobrien
20277298Sobrien  91, 92, 93, 94, 95, 96,
20377298Sobrien
20477298Sobrien  'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
20577298Sobrien  'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
20677298Sobrien
20777298Sobrien 123,124,125,126,127,
20877298Sobrien
20977298Sobrien 128,129,130,131, 132,133,134,135, 136,137,138,139, 140,141,142,143,
21077298Sobrien 144,145,146,147, 148,149,150,151, 152,153,154,155, 156,157,158,159,
21177298Sobrien 160,161,162,163, 164,165,166,167, 168,169,170,171, 172,173,174,175,
21277298Sobrien 176,177,178,179, 180,181,182,183, 184,185,186,187, 188,189,190,191,
21377298Sobrien
21477298Sobrien 192,193,194,195, 196,197,198,199, 200,201,202,203, 204,205,206,207,
21577298Sobrien 208,209,210,211, 212,213,214,215, 216,217,218,219, 220,221,222,223,
21677298Sobrien 224,225,226,227, 228,229,230,231, 232,233,234,235, 236,237,238,239,
21777298Sobrien 240,241,242,243, 244,245,246,247, 248,249,250,251, 252,253,254,255,
21877298Sobrien};
21977298Sobrien
22077298Sobrienconst unsigned char _sch_toupper[256] =
22177298Sobrien{
22277298Sobrien   0,  1,  2,  3,   4,  5,  6,  7,   8,  9, 10, 11,  12, 13, 14, 15,
22377298Sobrien  16, 17, 18, 19,  20, 21, 22, 23,  24, 25, 26, 27,  28, 29, 30, 31,
22477298Sobrien  32, 33, 34, 35,  36, 37, 38, 39,  40, 41, 42, 43,  44, 45, 46, 47,
22577298Sobrien  48, 49, 50, 51,  52, 53, 54, 55,  56, 57, 58, 59,  60, 61, 62, 63,
22677298Sobrien  64,
22777298Sobrien
22877298Sobrien  'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
22977298Sobrien  'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
23077298Sobrien
23177298Sobrien  91, 92, 93, 94, 95, 96,
23277298Sobrien
23377298Sobrien  'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
23477298Sobrien  'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
23577298Sobrien
23677298Sobrien 123,124,125,126,127,
23777298Sobrien
23877298Sobrien 128,129,130,131, 132,133,134,135, 136,137,138,139, 140,141,142,143,
23977298Sobrien 144,145,146,147, 148,149,150,151, 152,153,154,155, 156,157,158,159,
24077298Sobrien 160,161,162,163, 164,165,166,167, 168,169,170,171, 172,173,174,175,
24177298Sobrien 176,177,178,179, 180,181,182,183, 184,185,186,187, 188,189,190,191,
24277298Sobrien
24377298Sobrien 192,193,194,195, 196,197,198,199, 200,201,202,203, 204,205,206,207,
24477298Sobrien 208,209,210,211, 212,213,214,215, 216,217,218,219, 220,221,222,223,
24577298Sobrien 224,225,226,227, 228,229,230,231, 232,233,234,235, 236,237,238,239,
24677298Sobrien 240,241,242,243, 244,245,246,247, 248,249,250,251, 252,253,254,255,
24777298Sobrien};
24877298Sobrien
24977298Sobrien#else
250130561Sobrien# if HOST_CHARSET == HOST_CHARSET_EBCDIC
251130561Sobrien  #error "FIXME: write tables for EBCDIC"
252130561Sobrien# else
253130561Sobrien  #error "Unrecognized host character set"
254130561Sobrien# endif
255130561Sobrien#endif
256