1@node unistr.h 2@chapter Elementary Unicode string functions @code{<unistr.h>} 3 4This include file declares elementary functions for Unicode strings. It is 5essentially the equivalent of what @code{<string.h>} is for C strings. 6 7@menu 8* Elementary string checks:: 9* Elementary string conversions:: 10* Elementary string functions:: 11* Elementary string functions with memory allocation:: 12* Elementary string functions on NUL terminated strings:: 13@end menu 14 15@node Elementary string checks 16@section Elementary string checks 17 18@cindex validity 19@cindex verification 20The following function is available to verify the integrity of a Unicode string. 21 22@deftypefun {const uint8_t *} u8_check (const uint8_t *@var{s}, size_t @var{n}) 23@deftypefunx {const uint16_t *} u16_check (const uint16_t *@var{s}, size_t @var{n}) 24@deftypefunx {const uint32_t *} u32_check (const uint32_t *@var{s}, size_t @var{n}) 25This function checks whether a Unicode string is well-formed. 26It returns NULL if valid, or a pointer to the first invalid unit otherwise. 27@end deftypefun 28 29@node Elementary string conversions 30@section Elementary string conversions 31 32@cindex converting 33The following functions perform conversions between the different forms of Unicode strings. 34 35@deftypefun {uint16_t *} u8_to_u16 (const uint8_t *@var{s}, size_t @var{n}, uint16_t *@var{resultbuf}, size_t *@var{lengthp}) 36Converts an UTF-8 string to an UTF-16 string. 37@end deftypefun 38 39@deftypefun {uint32_t *} u8_to_u32 (const uint8_t *@var{s}, size_t @var{n}, uint32_t *@var{resultbuf}, size_t *@var{lengthp}) 40Converts an UTF-8 string to an UTF-32 string. 41@end deftypefun 42 43@deftypefun {uint8_t *} u16_to_u8 (const uint16_t *@var{s}, size_t @var{n}, uint8_t *@var{resultbuf}, size_t *@var{lengthp}) 44Converts an UTF-16 string to an UTF-8 string. 45@end deftypefun 46 47@deftypefun {uint32_t *} u16_to_u32 (const uint16_t *@var{s}, size_t @var{n}, uint32_t *@var{resultbuf}, size_t *@var{lengthp}) 48Converts an UTF-16 string to an UTF-32 string. 49@end deftypefun 50 51@deftypefun {uint8_t *} u32_to_u8 (const uint32_t *@var{s}, size_t @var{n}, uint8_t *@var{resultbuf}, size_t *@var{lengthp}) 52Converts an UTF-32 string to an UTF-8 string. 53@end deftypefun 54 55@deftypefun {uint16_t *} u32_to_u16 (const uint32_t *@var{s}, size_t @var{n}, uint16_t *@var{resultbuf}, size_t *@var{lengthp}) 56Converts an UTF-32 string to an UTF-16 string. 57@end deftypefun 58 59@node Elementary string functions 60@section Elementary string functions 61 62@cindex iterating 63The following functions inspect and return details about the first character 64in a Unicode string. 65 66@deftypefun int u8_mblen (const uint8_t *@var{s}, size_t @var{n}) 67@deftypefunx int u16_mblen (const uint16_t *@var{s}, size_t @var{n}) 68@deftypefunx int u32_mblen (const uint32_t *@var{s}, size_t @var{n}) 69Returns the length (number of units) of the first character in @var{s}, which 70is no longer than @var{n}. Returns 0 if it is the NUL character. Returns -1 71upon failure. 72 73This function is similar to @posixfunc{mblen}, except that it operates on a 74Unicode string and that @var{s} must not be NULL. 75@end deftypefun 76 77@deftypefun int u8_mbtouc_unsafe (ucs4_t *@var{puc}, const uint8_t *@var{s}, size_t @var{n}) 78@deftypefunx int u16_mbtouc_unsafe (ucs4_t *@var{puc}, const uint16_t *@var{s}, size_t @var{n}) 79@deftypefunx int u32_mbtouc_unsafe (ucs4_t *@var{puc}, const uint32_t *@var{s}, size_t @var{n}) 80Returns the length (number of units) of the first character in @var{s}, 81putting its @code{ucs4_t} representation in @code{*@var{puc}}. Upon failure, 82@code{*@var{puc}} is set to @code{0xfffd}, and an appropriate number of units 83is returned. 84 85The number of available units, @var{n}, must be > 0. 86 87This function is similar to @posixfunc{mbtowc}, except that it operates on a 88Unicode string, @var{puc} and @var{s} must not be NULL, @var{n} must be > 0, 89and the NUL character is not treated specially. 90@end deftypefun 91 92@deftypefun int u8_mbtouc (ucs4_t *@var{puc}, const uint8_t *@var{s}, size_t @var{n}) 93@deftypefunx int u16_mbtouc (ucs4_t *@var{puc}, const uint16_t *@var{s}, size_t @var{n}) 94@deftypefunx int u32_mbtouc (ucs4_t *@var{puc}, const uint32_t *@var{s}, size_t @var{n}) 95This function is like @code{u8_mbtouc_unsafe}, except that it will detect an 96invalid UTF-8 character, even if the library is compiled without 97@option{--enable-safety}. 98@end deftypefun 99 100@deftypefun int u8_mbtoucr (ucs4_t *@var{puc}, const uint8_t *@var{s}, size_t @var{n}) 101@deftypefunx int u16_mbtoucr (ucs4_t *@var{puc}, const uint16_t *@var{s}, size_t @var{n}) 102@deftypefunx int u32_mbtoucr (ucs4_t *@var{puc}, const uint32_t *@var{s}, size_t @var{n}) 103Returns the length (number of units) of the first character in @var{s}, 104putting its @code{ucs4_t} representation in @code{*@var{puc}}. Upon failure, 105@code{*@var{puc}} is set to @code{0xfffd}, and -1 is returned for an invalid 106sequence of units, -2 is returned for an incomplete sequence of units. 107 108The number of available units, @var{n}, must be > 0. 109 110This function is similar to @code{u8_mbtouc}, except that the return value 111gives more details about the failure, similar to @posixfunc{mbrtowc}. 112@end deftypefun 113 114The following function stores a Unicode character as a Unicode string in 115memory. 116 117@deftypefun int u8_uctomb (uint8_t *@var{s}, ucs4_t @var{uc}, int @var{n}) 118@deftypefunx int u16_uctomb (uint16_t *@var{s}, ucs4_t @var{uc}, int @var{n}) 119@deftypefunx int u32_uctomb (uint32_t *@var{s}, ucs4_t @var{uc}, int @var{n}) 120Puts the multibyte character represented by @var{uc} in @var{s}, returning its 121length. Returns -1 upon failure, -2 if the number of available units, @var{n}, 122is too small. The latter case cannot occur if @var{n} >= 6/2/1, respectively. 123 124This function is similar to @posixfunc{wctomb}, except that it operates on a 125Unicode strings, @var{s} must not be NULL, and the argument @var{n} must be 126specified. 127@end deftypefun 128 129@cindex copying 130The following functions copy Unicode strings in memory. 131 132@deftypefun {uint8_t *} u8_cpy (uint8_t *@var{dest}, const uint8_t *@var{src}, size_t @var{n}) 133@deftypefunx {uint16_t *} u16_cpy (uint16_t *@var{dest}, const uint16_t *@var{src}, size_t @var{n}) 134@deftypefunx {uint32_t *} u32_cpy (uint32_t *@var{dest}, const uint32_t *@var{src}, size_t @var{n}) 135Copies @var{n} units from @var{src} to @var{dest}. 136 137This function is similar to @posixfunc{memcpy}, except that it operates on 138Unicode strings. 139@end deftypefun 140 141@deftypefun {uint8_t *} u8_move (uint8_t *@var{dest}, const uint8_t *@var{src}, size_t @var{n}) 142@deftypefunx {uint16_t *} u16_move (uint16_t *@var{dest}, const uint16_t *@var{src}, size_t @var{n}) 143@deftypefunx {uint32_t *} u32_move (uint32_t *@var{dest}, const uint32_t *@var{src}, size_t @var{n}) 144Copies @var{n} units from @var{src} to @var{dest}, guaranteeing correct 145behavior for overlapping memory areas. 146 147This function is similar to @posixfunc{memmove}, except that it operates on 148Unicode strings. 149@end deftypefun 150 151The following function fills a Unicode string. 152 153@deftypefun {uint8_t *} u8_set (uint8_t *@var{s}, ucs4_t @var{uc}, size_t @var{n}) 154@deftypefunx {uint16_t *} u16_set (uint16_t *@var{s}, ucs4_t @var{uc}, size_t @var{n}) 155@deftypefunx {uint32_t *} u32_set (uint32_t *@var{s}, ucs4_t @var{uc}, size_t @var{n}) 156Sets the first @var{n} characters of @var{s} to @var{uc}. @var{uc} should be 157a character that occupies only 1 unit. 158 159This function is similar to @posixfunc{memset}, except that it operates on 160Unicode strings. 161@end deftypefun 162 163@cindex comparing 164The following function compares two Unicode strings of the same length. 165 166@deftypefun int u8_cmp (const uint8_t *@var{s1}, const uint8_t *@var{s2}, size_t @var{n}) 167@deftypefunx int u16_cmp (const uint16_t *@var{s1}, const uint16_t *@var{s2}, size_t @var{n}) 168@deftypefunx int u32_cmp (const uint32_t *@var{s1}, const uint32_t *@var{s2}, size_t @var{n}) 169Compares @var{s1} and @var{s2}, each of length @var{n}, lexicographically. 170Returns a negative value if @var{s1} compares smaller than @var{s2}, 171a positive value if @var{s1} compares larger than @var{s2}, or 0 if 172they compare equal. 173 174This function is similar to @posixfunc{memcmp}, except that it operates on 175Unicode strings. 176@end deftypefun 177 178The following function compares two Unicode strings of possibly different 179lengths. 180 181@deftypefun int u8_cmp2 (const uint8_t *@var{s1}, size_t @var{n1}, const uint8_t *@var{s2}, size_t @var{n2}) 182@deftypefunx int u16_cmp2 (const uint16_t *@var{s1}, size_t @var{n1}, const uint16_t *@var{s2}, size_t @var{n2}) 183@deftypefunx int u32_cmp2 (const uint32_t *@var{s1}, size_t @var{n1}, const uint32_t *@var{s2}, size_t @var{n2}) 184Compares @var{s1} and @var{s2}, lexicographically. 185Returns a negative value if @var{s1} compares smaller than @var{s2}, 186a positive value if @var{s1} compares larger than @var{s2}, or 0 if 187they compare equal. 188 189This function is similar to the gnulib function @func{memcmp2}, except that it 190operates on Unicode strings. 191@end deftypefun 192 193@cindex searching, for a character 194The following function searches for a given Unicode character. 195 196@deftypefun {uint8_t *} u8_chr (const uint8_t *@var{s}, size_t @var{n}, ucs4_t @var{uc}) 197@deftypefunx {uint16_t *} u16_chr (const uint16_t *@var{s}, size_t @var{n}, ucs4_t @var{uc}) 198@deftypefunx {uint32_t *} u32_chr (const uint32_t *@var{s}, size_t @var{n}, ucs4_t @var{uc}) 199Searches the string at @var{s} for @var{uc}. Returns a pointer to the first 200occurrence of @var{uc} in @var{s}, or NULL if @var{uc} does not occur in 201@var{s}. 202 203This function is similar to @posixfunc{memchr}, except that it operates on 204Unicode strings. 205@end deftypefun 206 207@cindex counting 208The following function counts the number of Unicode characters. 209 210@deftypefun size_t u8_mbsnlen (const uint8_t *@var{s}, size_t @var{n}) 211@deftypefunx size_t u16_mbsnlen (const uint16_t *@var{s}, size_t @var{n}) 212@deftypefunx size_t u32_mbsnlen (const uint32_t *@var{s}, size_t @var{n}) 213Counts and returns the number of Unicode characters in the @var{n} units 214from @var{s}. 215 216This function is similar to the gnulib function @func{mbsnlen}, except that 217it operates on Unicode strings. 218@end deftypefun 219 220@node Elementary string functions with memory allocation 221@section Elementary string functions with memory allocation 222 223@cindex duplicating 224The following function copies a Unicode string. 225 226@deftypefun {uint8_t *} u8_cpy_alloc (const uint8_t *@var{s}, size_t @var{n}) 227@deftypefunx {uint16_t *} u16_cpy_alloc (const uint16_t *@var{s}, size_t @var{n}) 228@deftypefunx {uint32_t *} u32_cpy_alloc (const uint32_t *@var{s}, size_t @var{n}) 229Makes a freshly allocated copy of @var{s}, of length @var{n}. 230@end deftypefun 231 232@node Elementary string functions on NUL terminated strings 233@section Elementary string functions on NUL terminated strings 234 235The following functions inspect and return details about the first character 236in a Unicode string. 237 238@deftypefun int u8_strmblen (const uint8_t *@var{s}) 239@deftypefunx int u16_strmblen (const uint16_t *@var{s}) 240@deftypefunx int u32_strmblen (const uint32_t *@var{s}) 241Returns the length (number of units) of the first character in @var{s}. 242Returns 0 if it is the NUL character. Returns -1 upon failure. 243@end deftypefun 244 245@cindex iterating 246@deftypefun int u8_strmbtouc (ucs4_t *@var{puc}, const uint8_t *@var{s}) 247@deftypefunx int u16_strmbtouc (ucs4_t *@var{puc}, const uint16_t *@var{s}) 248@deftypefunx int u32_strmbtouc (ucs4_t *@var{puc}, const uint32_t *@var{s}) 249Returns the length (number of units) of the first character in @var{s}, 250putting its @code{ucs4_t} representation in @code{*@var{puc}}. Returns 0 251if it is the NUL character. Returns -1 upon failure. 252@end deftypefun 253 254@deftypefun {const uint8_t *} u8_next (ucs4_t *@var{puc}, const uint8_t *@var{s}) 255@deftypefunx {const uint16_t *} u16_next (ucs4_t *@var{puc}, const uint16_t *@var{s}) 256@deftypefunx {const uint32_t *} u32_next (ucs4_t *@var{puc}, const uint32_t *@var{s}) 257Forward iteration step. Advances the pointer past the next character, 258or returns NULL if the end of the string has been reached. Puts the 259character's @code{ucs4_t} representation in @code{*@var{puc}}. 260@end deftypefun 261 262The following function inspects and returns details about the previous 263character in a Unicode string. 264 265@deftypefun {const uint8_t *} u8_prev (ucs4_t *@var{puc}, const uint8_t *@var{s}, const uint8_t *@var{start}) 266@deftypefunx {const uint16_t *} u16_prev (ucs4_t *@var{puc}, const uint16_t *@var{s}, const uint16_t *@var{start}) 267@deftypefunx {const uint32_t *} u32_prev (ucs4_t *@var{puc}, const uint32_t *@var{s}, const uint32_t *@var{start}) 268Backward iteration step. Advances the pointer to point to the previous 269character, or returns NULL if the beginning of the string had been reached. 270Puts the character's @code{ucs4_t} representation in @code{*@var{puc}}. 271@end deftypefun 272 273The following functions determine the length of a Unicode string. 274 275@deftypefun size_t u8_strlen (const uint8_t *@var{s}) 276@deftypefunx size_t u16_strlen (const uint16_t *@var{s}) 277@deftypefunx size_t u32_strlen (const uint32_t *@var{s}) 278Returns the number of units in @var{s}. 279 280This function is similar to @posixfunc{strlen} and @posixfunc{wcslen}, except 281that it operates on Unicode strings. 282@end deftypefun 283 284@deftypefun size_t u8_strnlen (const uint8_t *@var{s}, size_t @var{maxlen}) 285@deftypefunx size_t u16_strnlen (const uint16_t *@var{s}, size_t @var{maxlen}) 286@deftypefunx size_t u32_strnlen (const uint32_t *@var{s}, size_t @var{maxlen}) 287Returns the number of units in @var{s}, but at most @var{maxlen}. 288 289This function is similar to @posixfunc{strnlen} and @posixfunc{wcsnlen}, except 290that it operates on Unicode strings. 291@end deftypefun 292 293@cindex copying 294The following functions copy portions of Unicode strings in memory. 295 296@deftypefun {uint8_t *} u8_strcpy (uint8_t *@var{dest}, const uint8_t *@var{src}) 297@deftypefunx {uint16_t *} u16_strcpy (uint16_t *@var{dest}, const uint16_t *@var{src}) 298@deftypefunx {uint32_t *} u32_strcpy (uint32_t *@var{dest}, const uint32_t *@var{src}) 299Copies @var{src} to @var{dest}. 300 301This function is similar to @posixfunc{strcpy} and @posixfunc{wcscpy}, except 302that it operates on Unicode strings. 303@end deftypefun 304 305@deftypefun {uint8_t *} u8_stpcpy (uint8_t *@var{dest}, const uint8_t *@var{src}) 306@deftypefunx {uint16_t *} u16_stpcpy (uint16_t *@var{dest}, const uint16_t *@var{src}) 307@deftypefunx {uint32_t *} u32_stpcpy (uint32_t *@var{dest}, const uint32_t *@var{src}) 308Copies @var{src} to @var{dest}, returning the address of the terminating NUL 309in @var{dest}. 310 311This function is similar to @posixfunc{stpcpy}, except that it operates on 312Unicode strings. 313@end deftypefun 314 315@deftypefun {uint8_t *} u8_strncpy (uint8_t *@var{dest}, const uint8_t *@var{src}, size_t @var{n}) 316@deftypefunx {uint16_t *} u16_strncpy (uint16_t *@var{dest}, const uint16_t *@var{src}, size_t @var{n}) 317@deftypefunx {uint32_t *} u32_strncpy (uint32_t *@var{dest}, const uint32_t *@var{src}, size_t @var{n}) 318Copies no more than @var{n} units of @var{src} to @var{dest}. 319 320This function is similar to @posixfunc{strncpy} and @posixfunc{wcsncpy}, except 321that it operates on Unicode strings. 322@end deftypefun 323 324@deftypefun {uint8_t *} u8_stpncpy (uint8_t *@var{dest}, const uint8_t *@var{src}, size_t @var{n}) 325@deftypefunx {uint16_t *} u16_stpncpy (uint16_t *@var{dest}, const uint16_t *@var{src}, size_t @var{n}) 326@deftypefunx {uint32_t *} u32_stpncpy (uint32_t *@var{dest}, const uint32_t *@var{src}, size_t @var{n}) 327Copies no more than @var{n} units of @var{src} to @var{dest}. Returns a 328pointer past the last non-NUL unit written into @var{dest}. In other words, 329if the units written into @var{dest} include a NUL, the return value is the 330address of the first such NUL unit, otherwise it is 331@code{@var{dest} + @var{n}}. 332 333This function is similar to @posixfunc{stpncpy}, except that it operates on 334Unicode strings. 335@end deftypefun 336 337@deftypefun {uint8_t *} u8_strcat (uint8_t *@var{dest}, const uint8_t *@var{src}) 338@deftypefunx {uint16_t *} u16_strcat (uint16_t *@var{dest}, const uint16_t *@var{src}) 339@deftypefunx {uint32_t *} u32_strcat (uint32_t *@var{dest}, const uint32_t *@var{src}) 340Appends @var{src} onto @var{dest}. 341 342This function is similar to @posixfunc{strcat} and @posixfunc{wcscat}, except 343that it operates on Unicode strings. 344@end deftypefun 345 346@deftypefun {uint8_t *} u8_strncat (uint8_t *@var{dest}, const uint8_t *@var{src}, size_t @var{n}) 347@deftypefunx {uint16_t *} u16_strncat (uint16_t *@var{dest}, const uint16_t *@var{src}, size_t @var{n}) 348@deftypefunx {uint32_t *} u32_strncat (uint32_t *@var{dest}, const uint32_t *@var{src}, size_t @var{n}) 349Appends no more than @var{n} units of @var{src} onto @var{dest}. 350 351This function is similar to @posixfunc{strncat} and @posixfunc{wcsncat}, except 352that it operates on Unicode strings. 353@end deftypefun 354 355@cindex comparing 356The following functions compare two Unicode strings. 357 358@deftypefun int u8_strcmp (const uint8_t *@var{s1}, const uint8_t *@var{s2}) 359@deftypefunx int u16_strcmp (const uint16_t *@var{s1}, const uint16_t *@var{s2}) 360@deftypefunx int u32_strcmp (const uint32_t *@var{s1}, const uint32_t *@var{s2}) 361Compares @var{s1} and @var{s2}, lexicographically. 362Returns a negative value if @var{s1} compares smaller than @var{s2}, 363a positive value if @var{s1} compares larger than @var{s2}, or 0 if 364they compare equal. 365 366This function is similar to @posixfunc{strcmp} and @posixfunc{wcscmp}, except 367that it operates on Unicode strings. 368@end deftypefun 369 370@cindex comparing, with collation rules 371@deftypefun int u8_strcoll (const uint8_t *@var{s1}, const uint8_t *@var{s2}) 372@deftypefunx int u16_strcoll (const uint16_t *@var{s1}, const uint16_t *@var{s2}) 373@deftypefunx int u32_strcoll (const uint32_t *@var{s1}, const uint32_t *@var{s2}) 374Compares @var{s1} and @var{s2} using the collation rules of the current 375locale. 376Returns -1 if @var{s1} < @var{s2}, 0 if @var{s1} = @var{s2}, 1 if 377@var{s1} > @var{s2}. Upon failure, sets @code{errno} and returns any value. 378 379This function is similar to @posixfunc{strcoll} and @posixfunc{wcscoll}, except 380that it operates on Unicode strings. 381 382Note that this function may consider different canonical normalizations 383of the same string as having a large distance. It is therefore better to 384use the function @code{u8_normcoll} instead of this one; see @ref{uninorm.h}. 385@end deftypefun 386 387@deftypefun int u8_strncmp (const uint8_t *@var{s1}, const uint8_t *@var{s2}, size_t @var{n}) 388@deftypefunx int u16_strncmp (const uint16_t *@var{s1}, const uint16_t *@var{s2}, size_t @var{n}) 389@deftypefunx int u32_strncmp (const uint32_t *@var{s1}, const uint32_t *@var{s2}, size_t @var{n}) 390Compares no more than @var{n} units of @var{s1} and @var{s2}. 391 392This function is similar to @posixfunc{strncmp} and @posixfunc{wcsncmp}, except 393that it operates on Unicode strings. 394@end deftypefun 395 396@cindex duplicating 397The following function allocates a duplicate of a Unicode string. 398 399@deftypefun {uint8_t *} u8_strdup (const uint8_t *@var{s}) 400@deftypefunx {uint16_t *} u16_strdup (const uint16_t *@var{s}) 401@deftypefunx {uint32_t *} u32_strdup (const uint32_t *@var{s}) 402Duplicates @var{s}, returning an identical malloc'd string. 403 404This function is similar to @posixfunc{strdup} and @posixfunc{wcsdup}, except 405that it operates on Unicode strings. 406@end deftypefun 407 408@cindex searching, for a character 409The following functions search for a given Unicode character. 410 411@deftypefun {uint8_t *} u8_strchr (const uint8_t *@var{str}, ucs4_t @var{uc}) 412@deftypefunx {uint16_t *} u16_strchr (const uint16_t *@var{str}, ucs4_t @var{uc}) 413@deftypefunx {uint32_t *} u32_strchr (const uint32_t *@var{str}, ucs4_t @var{uc}) 414Finds the first occurrence of @var{uc} in @var{str}. 415 416This function is similar to @posixfunc{strchr} and @posixfunc{wcschr}, except 417that it operates on Unicode strings. 418@end deftypefun 419 420@deftypefun {uint8_t *} u8_strrchr (const uint8_t *@var{str}, ucs4_t @var{uc}) 421@deftypefunx {uint16_t *} u16_strrchr (const uint16_t *@var{str}, ucs4_t @var{uc}) 422@deftypefunx {uint32_t *} u32_strrchr (const uint32_t *@var{str}, ucs4_t @var{uc}) 423Finds the last occurrence of @var{uc} in @var{str}. 424 425This function is similar to @posixfunc{strrchr} and @posixfunc{wcsrchr}, except 426that it operates on Unicode strings. 427@end deftypefun 428 429The following functions search for the first occurrence of some Unicode 430character in or outside a given set of Unicode characters. 431 432@deftypefun size_t u8_strcspn (const uint8_t *@var{str}, const uint8_t *@var{reject}) 433@deftypefunx size_t u16_strcspn (const uint16_t *@var{str}, const uint16_t *@var{reject}) 434@deftypefunx size_t u32_strcspn (const uint32_t *@var{str}, const uint32_t *@var{reject}) 435Returns the length of the initial segment of @var{str} which consists entirely 436of Unicode characters not in @var{reject}. 437 438This function is similar to @posixfunc{strcspn} and @posixfunc{wcscspn}, except 439that it operates on Unicode strings. 440@end deftypefun 441 442@deftypefun size_t u8_strspn (const uint8_t *@var{str}, const uint8_t *@var{accept}) 443@deftypefunx size_t u16_strspn (const uint16_t *@var{str}, const uint16_t *@var{accept}) 444@deftypefunx size_t u32_strspn (const uint32_t *@var{str}, const uint32_t *@var{accept}) 445Returns the length of the initial segment of @var{str} which consists entirely 446of Unicode characters in @var{accept}. 447 448This function is similar to @posixfunc{strspn} and @posixfunc{wcsspn}, except 449that it operates on Unicode strings. 450@end deftypefun 451 452@deftypefun {uint8_t *} u8_strpbrk (const uint8_t *@var{str}, const uint8_t *@var{accept}) 453@deftypefunx {uint16_t *} u16_strpbrk (const uint16_t *@var{str}, const uint16_t *@var{accept}) 454@deftypefunx {uint32_t *} u32_strpbrk (const uint32_t *@var{str}, const uint32_t *@var{accept}) 455Finds the first occurrence in @var{str} of any character in @var{accept}. 456 457This function is similar to @posixfunc{strpbrk} and @posixfunc{wcspbrk}, except 458that it operates on Unicode strings. 459@end deftypefun 460 461@cindex searching, for a substring 462The following functions search whether a given Unicode string is a substring 463of another Unicode string. 464 465@deftypefun {uint8_t *} u8_strstr (const uint8_t *@var{haystack}, const uint8_t *@var{needle}) 466@deftypefunx {uint16_t *} u16_strstr (const uint16_t *@var{haystack}, const uint16_t *@var{needle}) 467@deftypefunx {uint32_t *} u32_strstr (const uint32_t *@var{haystack}, const uint32_t *@var{needle}) 468Finds the first occurrence of @var{needle} in @var{haystack}. 469 470This function is similar to @posixfunc{strstr} and @posixfunc{wcsstr}, except 471that it operates on Unicode strings. 472@end deftypefun 473 474@deftypefun bool u8_startswith (const uint8_t *@var{str}, const uint8_t *@var{prefix}) 475@deftypefunx bool u16_startswith (const uint16_t *@var{str}, const uint16_t *@var{prefix}) 476@deftypefunx bool u32_startswith (const uint32_t *@var{str}, const uint32_t *@var{prefix}) 477Tests whether @var{str} starts with @var{prefix}. 478@end deftypefun 479 480@deftypefun bool u8_endswith (const uint8_t *@var{str}, const uint8_t *@var{suffix}) 481@deftypefunx bool u16_endswith (const uint16_t *@var{str}, const uint16_t *@var{suffix}) 482@deftypefunx bool u32_endswith (const uint32_t *@var{str}, const uint32_t *@var{suffix}) 483Tests whether @var{str} ends with @var{suffix}. 484@end deftypefun 485 486The following function does one step in tokenizing a Unicode string. 487 488@deftypefun {uint8_t *} u8_strtok (uint8_t *@var{str}, const uint8_t *@var{delim}, uint8_t **@var{ptr}) 489@deftypefunx {uint16_t *} u16_strtok (uint16_t *@var{str}, const uint16_t *@var{delim}, uint16_t **@var{ptr}) 490@deftypefunx {uint32_t *} u32_strtok (uint32_t *@var{str}, const uint32_t *@var{delim}, uint32_t **@var{ptr}) 491Divides @var{str} into tokens separated by characters in @var{delim}. 492 493This function is similar to @posixfunc{strtok_r} and @posixfunc{wcstok}, except 494that it operates on Unicode strings. Its interface is actually more similar to 495@code{wcstok} than to @code{strtok}. 496@end deftypefun 497