unicode.cpp revision 151498
1// -*- C++ -*- 2/* Copyright (C) 2002 3 Free Software Foundation, Inc. 4 Written by Werner Lemberg <wl@gnu.org> 5 6This file is part of groff. 7 8groff is free software; you can redistribute it and/or modify it under 9the terms of the GNU General Public License as published by the Free 10Software Foundation; either version 2, or (at your option) any later 11version. 12 13groff is distributed in the hope that it will be useful, but WITHOUT ANY 14WARRANTY; without even the implied warranty of MERCHANTABILITY or 15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 16for more details. 17 18You should have received a copy of the GNU General Public License along 19with groff; see the file COPYING. If not, write to the Free Software 20Foundation, 51 Franklin St - Fifth Floor, Boston, MA 02110-1301, USA. */ 21 22#include "lib.h" 23#include "cset.h" 24#include "stringclass.h" 25 26#include "unicode.h" 27 28const char *check_unicode_name(const char *u) 29{ 30 if (*u != 'u') 31 return 0; 32 const char *p = ++u; 33 for (;;) { 34 int val = 0; 35 const char *start = p; 36 for (;;) { 37 // only uppercase hex digits allowed 38 if (!csxdigit(*p)) 39 return 0; 40 if (csdigit(*p)) 41 val = val*0x10 + (*p-'0'); 42 else if (csupper(*p)) 43 val = val*0x10 + (*p-'A'+10); 44 else 45 return 0; 46 // biggest Unicode value is U+10FFFF 47 if (val > 0x10FFFF) 48 return 0; 49 p++; 50 if (*p == '\0' || *p == '_') 51 break; 52 } 53 // surrogates not allowed 54 if ((val >= 0xD800 && val <= 0xDBFF) || (val >= 0xDC00 && val <= 0xDFFF)) 55 return 0; 56 if (val > 0xFFFF) { 57 if (*start == '0') // no leading zeros allowed if > 0xFFFF 58 return 0; 59 } 60 else if (p - start != 4) // otherwise, check for exactly 4 hex digits 61 return 0; 62 if (*p == '\0') 63 break; 64 p++; 65 } 66 return u; 67} 68