1// -*- C++ -*-
2/* Copyright (C) 2002
3   Free Software Foundation, Inc.
4     Written by Werner Lemberg <wl@gnu.org>
5
6This file is part of groff.
7
8groff is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
10Software Foundation; either version 2, or (at your option) any later
11version.
12
13groff is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License along
19with groff; see the file COPYING.  If not, write to the Free Software
20Foundation, 51 Franklin St - Fifth Floor, Boston, MA 02110-1301, USA. */
21
22#include "lib.h"
23#include "cset.h"
24#include "stringclass.h"
25
26#include "unicode.h"
27
28const char *check_unicode_name(const char *u)
29{
30  if (*u != 'u')
31    return 0;
32  const char *p = ++u;
33  for (;;) {
34    int val = 0;
35    const char *start = p;
36    for (;;) {
37      // only uppercase hex digits allowed
38      if (!csxdigit(*p))
39	return 0;
40      if (csdigit(*p))
41	val = val*0x10 + (*p-'0');
42      else if (csupper(*p))
43	val = val*0x10 + (*p-'A'+10);
44      else
45	return 0;
46      // biggest Unicode value is U+10FFFF
47      if (val > 0x10FFFF)
48	return 0;
49      p++;
50      if (*p == '\0' || *p == '_')
51	break;
52    }
53    // surrogates not allowed
54    if ((val >= 0xD800 && val <= 0xDBFF) || (val >= 0xDC00 && val <= 0xDFFF))
55      return 0;
56    if (val > 0xFFFF) {
57      if (*start == '0')	// no leading zeros allowed if > 0xFFFF
58	return 0;
59    }
60    else if (p - start != 4)	// otherwise, check for exactly 4 hex digits
61      return 0;
62    if (*p == '\0')
63      break;
64    p++;
65  }
66  return u;
67}
68