1289177Speter/* 2289177Speter * utf8proc.c: Wrappers for the utf8proc library 3289177Speter * 4289177Speter * ==================================================================== 5289177Speter * Licensed to the Apache Software Foundation (ASF) under one 6289177Speter * or more contributor license agreements. See the NOTICE file 7289177Speter * distributed with this work for additional information 8289177Speter * regarding copyright ownership. The ASF licenses this file 9289177Speter * to you under the Apache License, Version 2.0 (the 10289177Speter * "License"); you may not use this file except in compliance 11289177Speter * with the License. You may obtain a copy of the License at 12289177Speter * 13289177Speter * http://www.apache.org/licenses/LICENSE-2.0 14289177Speter * 15289177Speter * Unless required by applicable law or agreed to in writing, 16289177Speter * software distributed under the License is distributed on an 17289177Speter * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 18289177Speter * KIND, either express or implied. See the License for the 19289177Speter * specific language governing permissions and limitations 20289177Speter * under the License. 21289177Speter * ==================================================================== 22289177Speter */ 23289177Speter 24289177Speter 25289177Speter 26289177Speter#include <apr_fnmatch.h> 27289177Speter 28289177Speter#include "private/svn_string_private.h" 29289177Speter#include "private/svn_utf_private.h" 30289177Speter#include "svn_private_config.h" 31289177Speter 32289177Speter#define UTF8PROC_INLINE 33289177Speter/* Somehow utf8proc thinks it is nice to use strlen as an argument name, 34289177Speter while this function is already defined via apr.h */ 35289177Speter#define strlen svn__strlen_var 36289177Speter#include "utf8proc/utf8proc.c" 37289177Speter#undef strlen 38289177Speter 39289177Speter 40289177Speter 41289177Speterconst char * 42289177Spetersvn_utf__utf8proc_compiled_version(void) 43289177Speter{ 44289177Speter static const char utf8proc_version[] = 45289177Speter APR_STRINGIFY(UTF8PROC_VERSION_MAJOR) "." 46289177Speter APR_STRINGIFY(UTF8PROC_VERSION_MINOR) "." 47289177Speter APR_STRINGIFY(UTF8PROC_VERSION_PATCH); 48289177Speter return utf8proc_version; 49289177Speter} 50289177Speter 51289177Speterconst char * 52289177Spetersvn_utf__utf8proc_runtime_version(void) 53289177Speter{ 54289177Speter /* Unused static function warning removal hack. */ 55289177Speter SVN_UNUSED(utf8proc_NFD); 56289177Speter SVN_UNUSED(utf8proc_NFC); 57289177Speter SVN_UNUSED(utf8proc_NFKD); 58289177Speter SVN_UNUSED(utf8proc_NFKC); 59289177Speter 60289177Speter return utf8proc_version(); 61289177Speter} 62289177Speter 63289177Speter 64289177Speter 65289177Speter/* Fill the given BUFFER with decomposed UCS-4 representation of the 66289177Speter * UTF-8 STRING. If LENGTH is SVN_UTF__UNKNOWN_LENGTH, assume STRING 67289177Speter * is NUL-terminated; otherwise look only at the first LENGTH bytes in 68289177Speter * STRING. Upon return, BUFFER->data points at an array of UCS-4 69289177Speter * characters, and return the length of the array. TRANSFORM_FLAGS 70289177Speter * define exactly how the decomposition is performed. 71289177Speter * 72289177Speter * A negative return value is an utf8proc error code and may indicate 73289177Speter * that STRING contains invalid UTF-8 or was so long that an overflow 74289177Speter * occurred. 75289177Speter */ 76289177Speterstatic ssize_t 77289177Speterunicode_decomposition(int transform_flags, 78289177Speter const char *string, apr_size_t length, 79289177Speter svn_membuf_t *buffer) 80289177Speter{ 81289177Speter const int nullterm = (length == SVN_UTF__UNKNOWN_LENGTH 82289177Speter ? UTF8PROC_NULLTERM : 0); 83289177Speter 84289177Speter for (;;) 85289177Speter { 86289177Speter apr_int32_t *const ucs4buf = buffer->data; 87289177Speter const ssize_t ucs4len = buffer->size / sizeof(*ucs4buf); 88289177Speter const ssize_t result = 89289177Speter utf8proc_decompose((const void*) string, length, ucs4buf, ucs4len, 90289177Speter UTF8PROC_DECOMPOSE | UTF8PROC_STABLE 91289177Speter | transform_flags | nullterm); 92289177Speter 93289177Speter if (result < 0 || result <= ucs4len) 94289177Speter return result; 95289177Speter 96289177Speter /* Increase the decomposition buffer size and retry */ 97289177Speter svn_membuf__ensure(buffer, result * sizeof(*ucs4buf)); 98289177Speter } 99289177Speter} 100289177Speter 101289177Speter/* Fill the given BUFFER with an NFD UCS-4 representation of the UTF-8 102289177Speter * STRING. If LENGTH is SVN_UTF__UNKNOWN_LENGTH, assume STRING is 103289177Speter * NUL-terminated; otherwise look only at the first LENGTH bytes in 104289177Speter * STRING. Upon return, BUFFER->data points at an array of UCS-4 105289177Speter * characters and *RESULT_LENGTH contains the length of the array. 106289177Speter * 107289177Speter * A returned error may indicate that STRING contains invalid UTF-8 or 108289177Speter * invalid Unicode codepoints. Any error message comes from utf8proc. 109289177Speter */ 110289177Speterstatic svn_error_t * 111289177Speterdecompose_normalized(apr_size_t *result_length, 112289177Speter const char *string, apr_size_t length, 113289177Speter svn_membuf_t *buffer) 114289177Speter{ 115289177Speter ssize_t result = unicode_decomposition(0, string, length, buffer); 116289177Speter if (result < 0) 117289177Speter return svn_error_create(SVN_ERR_UTF8PROC_ERROR, NULL, 118289177Speter gettext(utf8proc_errmsg(result))); 119289177Speter *result_length = result; 120289177Speter return SVN_NO_ERROR; 121289177Speter} 122289177Speter 123289177Speter/* Fill the given BUFFER with an NFC UTF-8 representation of the UTF-8 124289177Speter * STRING. If LENGTH is SVN_UTF__UNKNOWN_LENGTH, assume STRING is 125289177Speter * NUL-terminated; otherwise look only at the first LENGTH bytes in 126289177Speter * STRING. Upon return, BUFFER->data points at a NUL-terminated string 127289177Speter * of UTF-8 characters. 128289177Speter * 129289177Speter * A returned error may indicate that STRING contains invalid UTF-8 or 130289177Speter * invalid Unicode codepoints. Any error message comes from utf8proc. 131289177Speter */ 132289177Speterstatic svn_error_t * 133289177Speternormalize_cstring(apr_size_t *result_length, 134289177Speter const char *string, apr_size_t length, 135289177Speter svn_membuf_t *buffer) 136289177Speter{ 137289177Speter ssize_t result = unicode_decomposition(0, string, length, buffer); 138289177Speter if (result >= 0) 139289177Speter { 140289177Speter svn_membuf__resize(buffer, result * sizeof(apr_int32_t) + 1); 141289177Speter result = utf8proc_reencode(buffer->data, result, 142289177Speter UTF8PROC_COMPOSE | UTF8PROC_STABLE); 143289177Speter } 144289177Speter if (result < 0) 145289177Speter return svn_error_create(SVN_ERR_UTF8PROC_ERROR, NULL, 146289177Speter gettext(utf8proc_errmsg(result))); 147289177Speter *result_length = result; 148289177Speter return SVN_NO_ERROR; 149289177Speter} 150289177Speter 151289177Speter/* Compare two arrays of UCS-4 codes, BUFA of length LENA and BUFB of 152289177Speter * length LENB. Return 0 if they're equal, a negative value if BUFA is 153289177Speter * less than BUFB, otherwise a positive value. 154289177Speter * 155289177Speter * Yes, this is strcmp for known-length UCS-4 strings. 156289177Speter */ 157289177Speterstatic int 158289177Speterucs4cmp(const apr_int32_t *bufa, apr_size_t lena, 159289177Speter const apr_int32_t *bufb, apr_size_t lenb) 160289177Speter{ 161289177Speter const apr_size_t len = (lena < lenb ? lena : lenb); 162289177Speter apr_size_t i; 163289177Speter 164289177Speter for (i = 0; i < len; ++i) 165289177Speter { 166289177Speter const int diff = bufa[i] - bufb[i]; 167289177Speter if (diff) 168289177Speter return diff; 169289177Speter } 170289177Speter return (lena == lenb ? 0 : (lena < lenb ? -1 : 1)); 171289177Speter} 172289177Speter 173289177Spetersvn_error_t * 174289177Spetersvn_utf__normcmp(int *result, 175289177Speter const char *str1, apr_size_t len1, 176289177Speter const char *str2, apr_size_t len2, 177289177Speter svn_membuf_t *buf1, svn_membuf_t *buf2) 178289177Speter{ 179289177Speter apr_size_t buflen1; 180289177Speter apr_size_t buflen2; 181289177Speter 182289177Speter /* Shortcut-circuit the decision if at least one of the strings is empty. */ 183289177Speter const svn_boolean_t empty1 = 184289177Speter (0 == len1 || (len1 == SVN_UTF__UNKNOWN_LENGTH && !*str1)); 185289177Speter const svn_boolean_t empty2 = 186289177Speter (0 == len2 || (len2 == SVN_UTF__UNKNOWN_LENGTH && !*str2)); 187289177Speter if (empty1 || empty2) 188289177Speter { 189289177Speter *result = (empty1 == empty2 ? 0 : (empty1 ? -1 : 1)); 190289177Speter return SVN_NO_ERROR; 191289177Speter } 192289177Speter 193289177Speter SVN_ERR(decompose_normalized(&buflen1, str1, len1, buf1)); 194289177Speter SVN_ERR(decompose_normalized(&buflen2, str2, len2, buf2)); 195289177Speter *result = ucs4cmp(buf1->data, buflen1, buf2->data, buflen2); 196289177Speter return SVN_NO_ERROR; 197289177Speter} 198289177Speter 199289177Spetersvn_error_t* 200289177Spetersvn_utf__normalize(const char **result, 201289177Speter const char *str, apr_size_t len, 202289177Speter svn_membuf_t *buf) 203289177Speter{ 204289177Speter apr_size_t result_length; 205289177Speter SVN_ERR(normalize_cstring(&result_length, str, len, buf)); 206289177Speter *result = (const char*)(buf->data); 207289177Speter return SVN_NO_ERROR; 208289177Speter} 209289177Speter 210289177Speter/* Decode a single UCS-4 code point to UTF-8, appending the result to BUFFER. 211289177Speter * Assume BUFFER is already filled to *LENGTH and return the new size there. 212289177Speter * This function does *not* nul-terminate the stringbuf! 213289177Speter * 214289177Speter * A returned error indicates that the codepoint is invalid. 215289177Speter */ 216289177Speterstatic svn_error_t * 217289177Speterencode_ucs4(svn_membuf_t *buffer, apr_int32_t ucs4chr, apr_size_t *length) 218289177Speter{ 219289177Speter apr_size_t utf8len; 220289177Speter 221289177Speter if (buffer->size - *length < 4) 222289177Speter svn_membuf__resize(buffer, buffer->size + 4); 223289177Speter 224289177Speter utf8len = utf8proc_encode_char(ucs4chr, ((uint8_t*)buffer->data + *length)); 225289177Speter if (!utf8len) 226289177Speter return svn_error_createf(SVN_ERR_UTF8PROC_ERROR, NULL, 227289177Speter _("Invalid Unicode character U+%04lX"), 228289177Speter (long)ucs4chr); 229289177Speter *length += utf8len; 230289177Speter return SVN_NO_ERROR; 231289177Speter} 232289177Speter 233289177Spetersvn_error_t * 234289177Spetersvn_utf__encode_ucs4_string(svn_membuf_t *buffer, 235289177Speter const apr_int32_t *ucs4str, 236289177Speter apr_size_t length, 237289177Speter apr_size_t *result_length) 238289177Speter{ 239289177Speter *result_length = 0; 240289177Speter while (length-- > 0) 241289177Speter SVN_ERR(encode_ucs4(buffer, *ucs4str++, result_length)); 242289177Speter svn_membuf__resize(buffer, *result_length + 1); 243289177Speter ((char*)buffer->data)[*result_length] = '\0'; 244289177Speter return SVN_NO_ERROR; 245289177Speter} 246289177Speter 247289177Speter 248289177Spetersvn_error_t * 249289177Spetersvn_utf__glob(svn_boolean_t *match, 250289177Speter const char *pattern, apr_size_t pattern_len, 251289177Speter const char *string, apr_size_t string_len, 252289177Speter const char *escape, apr_size_t escape_len, 253289177Speter svn_boolean_t sql_like, 254289177Speter svn_membuf_t *pattern_buf, 255289177Speter svn_membuf_t *string_buf, 256289177Speter svn_membuf_t *temp_buf) 257289177Speter{ 258289177Speter apr_size_t patternbuf_len; 259289177Speter apr_size_t tempbuf_len; 260289177Speter 261289177Speter /* If we're in GLOB mode, we don't do custom escape chars. */ 262289177Speter if (escape && !sql_like) 263289177Speter return svn_error_create(SVN_ERR_UTF8_GLOB, NULL, 264289177Speter _("Cannot use a custom escape token" 265289177Speter " in glob matching mode")); 266289177Speter 267289177Speter /* Convert the patern to NFD UTF-8. We can't use the UCS-4 result 268289177Speter because apr_fnmatch can't handle it.*/ 269289177Speter SVN_ERR(decompose_normalized(&tempbuf_len, pattern, pattern_len, temp_buf)); 270289177Speter if (!sql_like) 271289177Speter SVN_ERR(svn_utf__encode_ucs4_string(pattern_buf, temp_buf->data, 272289177Speter tempbuf_len, &patternbuf_len)); 273289177Speter else 274289177Speter { 275289177Speter /* Convert a LIKE pattern to a GLOB pattern that apr_fnmatch can use. */ 276289177Speter const apr_int32_t *like = temp_buf->data; 277289177Speter apr_int32_t ucs4esc; 278289177Speter svn_boolean_t escaped; 279289177Speter apr_size_t i; 280289177Speter 281289177Speter if (!escape) 282289177Speter ucs4esc = -1; /* Definitely an invalid UCS-4 character. */ 283289177Speter else 284289177Speter { 285289177Speter const int nullterm = (escape_len == SVN_UTF__UNKNOWN_LENGTH 286289177Speter ? UTF8PROC_NULLTERM : 0); 287289177Speter ssize_t result = 288289177Speter utf8proc_decompose((const void*) escape, escape_len, &ucs4esc, 1, 289289177Speter UTF8PROC_DECOMPOSE | UTF8PROC_STABLE | nullterm); 290289177Speter if (result < 0) 291289177Speter return svn_error_create(SVN_ERR_UTF8PROC_ERROR, NULL, 292289177Speter gettext(utf8proc_errmsg(result))); 293289177Speter if (result == 0 || result > 1) 294289177Speter return svn_error_create(SVN_ERR_UTF8_GLOB, NULL, 295289177Speter _("Escape token must be one character")); 296289177Speter if ((ucs4esc & 0xFF) != ucs4esc) 297289177Speter return svn_error_createf(SVN_ERR_UTF8_GLOB, NULL, 298289177Speter _("Invalid escape character U+%04lX"), 299289177Speter (long)ucs4esc); 300289177Speter } 301289177Speter 302289177Speter patternbuf_len = 0; 303289177Speter svn_membuf__ensure(pattern_buf, tempbuf_len + 1); 304289177Speter for (i = 0, escaped = FALSE; i < tempbuf_len; ++i, ++like) 305289177Speter { 306289177Speter if (*like == ucs4esc && !escaped) 307289177Speter { 308289177Speter svn_membuf__resize(pattern_buf, patternbuf_len + 1); 309289177Speter ((char*)pattern_buf->data)[patternbuf_len++] = '\\'; 310289177Speter escaped = TRUE; 311289177Speter } 312289177Speter else if (escaped) 313289177Speter { 314289177Speter SVN_ERR(encode_ucs4(pattern_buf, *like, &patternbuf_len)); 315289177Speter escaped = FALSE; 316289177Speter } 317289177Speter else 318289177Speter { 319289177Speter if ((*like == '[' || *like == '\\') && !escaped) 320289177Speter { 321289177Speter /* Escape brackets and backslashes which are always 322289177Speter literals in LIKE patterns. */ 323289177Speter svn_membuf__resize(pattern_buf, patternbuf_len + 1); 324289177Speter ((char*)pattern_buf->data)[patternbuf_len++] = '\\'; 325289177Speter escaped = TRUE; 326289177Speter --i; --like; 327289177Speter continue; 328289177Speter } 329289177Speter 330289177Speter /* Replace LIKE wildcards with their GLOB equivalents. */ 331289177Speter if (*like == '%' || *like == '_') 332289177Speter { 333289177Speter const char wildcard = (*like == '%' ? '*' : '?'); 334289177Speter svn_membuf__resize(pattern_buf, patternbuf_len + 1); 335289177Speter ((char*)pattern_buf->data)[patternbuf_len++] = wildcard; 336289177Speter } 337289177Speter else 338289177Speter SVN_ERR(encode_ucs4(pattern_buf, *like, &patternbuf_len)); 339289177Speter } 340289177Speter } 341289177Speter svn_membuf__resize(pattern_buf, patternbuf_len + 1); 342289177Speter ((char*)pattern_buf->data)[patternbuf_len] = '\0'; 343289177Speter } 344289177Speter 345289177Speter /* Now normalize the string */ 346289177Speter SVN_ERR(decompose_normalized(&tempbuf_len, string, string_len, temp_buf)); 347289177Speter SVN_ERR(svn_utf__encode_ucs4_string(string_buf, temp_buf->data, 348289177Speter tempbuf_len, &tempbuf_len)); 349289177Speter 350289177Speter *match = !apr_fnmatch(pattern_buf->data, string_buf->data, 0); 351289177Speter return SVN_NO_ERROR; 352289177Speter} 353289177Speter 354289177Spetersvn_boolean_t 355289177Spetersvn_utf__is_normalized(const char *string, apr_pool_t *scratch_pool) 356289177Speter{ 357289177Speter svn_error_t *err; 358289177Speter svn_membuf_t buffer; 359289177Speter apr_size_t result_length; 360289177Speter const apr_size_t length = strlen(string); 361289177Speter svn_membuf__create(&buffer, length * sizeof(apr_int32_t), scratch_pool); 362289177Speter err = normalize_cstring(&result_length, string, length, &buffer); 363289177Speter if (err) 364289177Speter { 365289177Speter svn_error_clear(err); 366289177Speter return FALSE; 367289177Speter } 368289177Speter return (length == result_length && 0 == strcmp(string, buffer.data)); 369289177Speter} 370289177Speter 371289177Speterconst char * 372289177Spetersvn_utf__fuzzy_escape(const char *src, apr_size_t length, apr_pool_t *pool) 373289177Speter{ 374289177Speter /* Hexadecimal digits for code conversion. */ 375289177Speter static const char digits[] = "0123456789ABCDEF"; 376289177Speter 377289177Speter /* Flags used for Unicode decomposition. */ 378289177Speter static const int decomp_flags = ( 379289177Speter UTF8PROC_COMPAT | UTF8PROC_STABLE | UTF8PROC_LUMP 380289177Speter | UTF8PROC_NLF2LF | UTF8PROC_STRIPCC | UTF8PROC_STRIPMARK); 381289177Speter 382289177Speter svn_stringbuf_t *result; 383289177Speter svn_membuf_t buffer; 384289177Speter ssize_t decomp_length; 385289177Speter ssize_t len; 386289177Speter 387289177Speter /* Decompose to a non-reversible compatibility format. */ 388289177Speter svn_membuf__create(&buffer, length * sizeof(apr_int32_t), pool); 389289177Speter decomp_length = unicode_decomposition(decomp_flags, src, length, &buffer); 390289177Speter if (decomp_length < 0) 391289177Speter { 392289177Speter svn_membuf_t part; 393289177Speter apr_size_t done, prev; 394289177Speter 395289177Speter /* The only other error we can receive here indicates an integer 396289177Speter overflow due to the length of the input string. Not very 397289177Speter likely, but we certainly shouldn't continue in that case. */ 398289177Speter SVN_ERR_ASSERT_NO_RETURN(decomp_length == UTF8PROC_ERROR_INVALIDUTF8); 399289177Speter 400289177Speter /* Break the decomposition into parts that are valid UTF-8, and 401289177Speter bytes that are not. Represent the invalid bytes in the target 402289177Speter erray by their negative value. This works because utf8proc 403289177Speter will not generate Unicode code points with values larger than 404289177Speter U+10FFFF. */ 405289177Speter svn_membuf__create(&part, sizeof(apr_int32_t), pool); 406289177Speter decomp_length = 0; 407289177Speter done = prev = 0; 408289177Speter while (done < length) 409289177Speter { 410289177Speter apr_int32_t uc; 411289177Speter 412289177Speter while (done < length) 413289177Speter { 414289177Speter len = utf8proc_iterate((uint8_t*)src + done, length - done, &uc); 415289177Speter if (len < 0) 416289177Speter break; 417289177Speter done += len; 418289177Speter } 419289177Speter 420289177Speter /* Decompose the valid part */ 421289177Speter if (done > prev) 422289177Speter { 423289177Speter len = unicode_decomposition( 424289177Speter decomp_flags, src + prev, done - prev, &part); 425289177Speter SVN_ERR_ASSERT_NO_RETURN(len > 0); 426289177Speter svn_membuf__resize( 427289177Speter &buffer, (decomp_length + len) * sizeof(apr_int32_t)); 428289177Speter memcpy((apr_int32_t*)buffer.data + decomp_length, 429289177Speter part.data, len * sizeof(apr_int32_t)); 430289177Speter decomp_length += len; 431289177Speter prev = done; 432289177Speter } 433289177Speter 434289177Speter /* What follows could be a valid UTF-8 sequence, but not 435289177Speter a valid Unicode character. */ 436289177Speter if (done < length) 437289177Speter { 438289177Speter const char *last; 439289177Speter 440289177Speter /* Determine the length of the UTF-8 sequence */ 441289177Speter const char *const p = src + done; 442289177Speter len = utf8proc_utf8class[(uint8_t)*p]; 443289177Speter 444289177Speter /* Check if the multi-byte sequence is valid UTF-8. */ 445289177Speter if (len > 1 && len <= (apr_ssize_t)(length - done)) 446289177Speter last = svn_utf__last_valid(p, len); 447289177Speter else 448289177Speter last = NULL; 449289177Speter 450289177Speter /* Might not be a valid UTF-8 sequence at all */ 451289177Speter if (!last || (last && last - p < len)) 452289177Speter { 453289177Speter uc = -((apr_int32_t)(*p & 0xff)); 454289177Speter len = 1; 455289177Speter } 456289177Speter else 457289177Speter { 458289177Speter switch (len) 459289177Speter { 460289177Speter /* Decode the UTF-8 sequence without validation. */ 461289177Speter case 2: 462289177Speter uc = ((p[0] & 0x1f) << 6) + (p[1] & 0x3f); 463289177Speter break; 464289177Speter case 3: 465289177Speter uc = (((p[0] & 0x0f) << 12) + ((p[1] & 0x3f) << 6) 466289177Speter + (p[2] & 0x3f)); 467289177Speter break; 468289177Speter case 4: 469289177Speter uc = (((p[0] & 0x07) << 18) + ((p[1] & 0x3f) << 12) 470289177Speter + ((p[2] & 0x3f) << 6) + (p[3] & 0x3f)); 471289177Speter break; 472289177Speter default: 473289177Speter SVN_ERR_ASSERT_NO_RETURN( 474289177Speter !"Unexpected invalid UTF-8 byte"); 475289177Speter } 476289177Speter 477289177Speter } 478289177Speter 479289177Speter svn_membuf__resize( 480289177Speter &buffer, (decomp_length + 1) * sizeof(apr_int32_t)); 481289177Speter ((apr_int32_t*)buffer.data)[decomp_length++] = uc; 482289177Speter done += len; 483289177Speter prev = done; 484289177Speter } 485289177Speter } 486289177Speter } 487289177Speter 488289177Speter /* Scan the result and deleting any combining diacriticals and 489289177Speter inserting placeholders where any non-ascii characters remain. */ 490289177Speter result = svn_stringbuf_create_ensure(decomp_length, pool); 491289177Speter for (len = 0; len < decomp_length; ++len) 492289177Speter { 493289177Speter const apr_int32_t cp = ((apr_int32_t*)buffer.data)[len]; 494289177Speter if (cp > 0 && cp < 127) 495289177Speter svn_stringbuf_appendbyte(result, (char)cp); 496289177Speter else if (cp == 0) 497289177Speter svn_stringbuf_appendcstr(result, "\\0"); 498289177Speter else if (cp < 0) 499289177Speter { 500289177Speter const apr_int32_t rcp = ((-cp) & 0xff); 501289177Speter svn_stringbuf_appendcstr(result, "?\\"); 502289177Speter svn_stringbuf_appendbyte(result, digits[(rcp & 0x00f0) >> 4]); 503289177Speter svn_stringbuf_appendbyte(result, digits[(rcp & 0x000f)]); 504289177Speter } 505289177Speter else 506289177Speter { 507289177Speter if (utf8proc_codepoint_valid(cp)) 508289177Speter { 509289177Speter const utf8proc_property_t *prop = utf8proc_get_property(cp); 510289177Speter if (prop->combining_class != 0) 511289177Speter continue; /* Combining mark; ignore */ 512289177Speter svn_stringbuf_appendcstr(result, "{U+"); 513289177Speter } 514289177Speter else 515289177Speter svn_stringbuf_appendcstr(result, "{U?"); 516289177Speter if (cp > 0xffff) 517289177Speter { 518289177Speter svn_stringbuf_appendbyte(result, digits[(cp & 0xf00000) >> 20]); 519289177Speter svn_stringbuf_appendbyte(result, digits[(cp & 0x0f0000) >> 16]); 520289177Speter } 521289177Speter svn_stringbuf_appendbyte(result, digits[(cp & 0xf000) >> 12]); 522289177Speter svn_stringbuf_appendbyte(result, digits[(cp & 0x0f00) >> 8]); 523289177Speter svn_stringbuf_appendbyte(result, digits[(cp & 0x00f0) >> 4]); 524289177Speter svn_stringbuf_appendbyte(result, digits[(cp & 0x000f)]); 525289177Speter svn_stringbuf_appendbyte(result, '}'); 526289177Speter } 527289177Speter } 528289177Speter 529289177Speter return result->data; 530289177Speter} 531