1/* 2 * Copyright (c) 2007 Mans Rullgard 3 * 4 * This file is part of FFmpeg. 5 * 6 * FFmpeg is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * FFmpeg is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with FFmpeg; if not, write to the Free Software 18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 19 */ 20 21#ifndef AVUTIL_AVSTRING_H 22#define AVUTIL_AVSTRING_H 23 24#include <stddef.h> 25#include <stdint.h> 26#include "attributes.h" 27 28/** 29 * @addtogroup lavu_string 30 * @{ 31 */ 32 33/** 34 * Return non-zero if pfx is a prefix of str. If it is, *ptr is set to 35 * the address of the first character in str after the prefix. 36 * 37 * @param str input string 38 * @param pfx prefix to test 39 * @param ptr updated if the prefix is matched inside str 40 * @return non-zero if the prefix matches, zero otherwise 41 */ 42int av_strstart(const char *str, const char *pfx, const char **ptr); 43 44/** 45 * Return non-zero if pfx is a prefix of str independent of case. If 46 * it is, *ptr is set to the address of the first character in str 47 * after the prefix. 48 * 49 * @param str input string 50 * @param pfx prefix to test 51 * @param ptr updated if the prefix is matched inside str 52 * @return non-zero if the prefix matches, zero otherwise 53 */ 54int av_stristart(const char *str, const char *pfx, const char **ptr); 55 56/** 57 * Locate the first case-independent occurrence in the string haystack 58 * of the string needle. A zero-length string needle is considered to 59 * match at the start of haystack. 60 * 61 * This function is a case-insensitive version of the standard strstr(). 62 * 63 * @param haystack string to search in 64 * @param needle string to search for 65 * @return pointer to the located match within haystack 66 * or a null pointer if no match 67 */ 68char *av_stristr(const char *haystack, const char *needle); 69 70/** 71 * Locate the first occurrence of the string needle in the string haystack 72 * where not more than hay_length characters are searched. A zero-length 73 * string needle is considered to match at the start of haystack. 74 * 75 * This function is a length-limited version of the standard strstr(). 76 * 77 * @param haystack string to search in 78 * @param needle string to search for 79 * @param hay_length length of string to search in 80 * @return pointer to the located match within haystack 81 * or a null pointer if no match 82 */ 83char *av_strnstr(const char *haystack, const char *needle, size_t hay_length); 84 85/** 86 * Copy the string src to dst, but no more than size - 1 bytes, and 87 * null-terminate dst. 88 * 89 * This function is the same as BSD strlcpy(). 90 * 91 * @param dst destination buffer 92 * @param src source string 93 * @param size size of destination buffer 94 * @return the length of src 95 * 96 * @warning since the return value is the length of src, src absolutely 97 * _must_ be a properly 0-terminated string, otherwise this will read beyond 98 * the end of the buffer and possibly crash. 99 */ 100size_t av_strlcpy(char *dst, const char *src, size_t size); 101 102/** 103 * Append the string src to the string dst, but to a total length of 104 * no more than size - 1 bytes, and null-terminate dst. 105 * 106 * This function is similar to BSD strlcat(), but differs when 107 * size <= strlen(dst). 108 * 109 * @param dst destination buffer 110 * @param src source string 111 * @param size size of destination buffer 112 * @return the total length of src and dst 113 * 114 * @warning since the return value use the length of src and dst, these 115 * absolutely _must_ be a properly 0-terminated strings, otherwise this 116 * will read beyond the end of the buffer and possibly crash. 117 */ 118size_t av_strlcat(char *dst, const char *src, size_t size); 119 120/** 121 * Append output to a string, according to a format. Never write out of 122 * the destination buffer, and always put a terminating 0 within 123 * the buffer. 124 * @param dst destination buffer (string to which the output is 125 * appended) 126 * @param size total size of the destination buffer 127 * @param fmt printf-compatible format string, specifying how the 128 * following parameters are used 129 * @return the length of the string that would have been generated 130 * if enough space had been available 131 */ 132size_t av_strlcatf(char *dst, size_t size, const char *fmt, ...) av_printf_format(3, 4); 133 134/** 135 * Get the count of continuous non zero chars starting from the beginning. 136 * 137 * @param len maximum number of characters to check in the string, that 138 * is the maximum value which is returned by the function 139 */ 140static inline size_t av_strnlen(const char *s, size_t len) 141{ 142 size_t i; 143 for (i = 0; i < len && s[i]; i++) 144 ; 145 return i; 146} 147 148/** 149 * Print arguments following specified format into a large enough auto 150 * allocated buffer. It is similar to GNU asprintf(). 151 * @param fmt printf-compatible format string, specifying how the 152 * following parameters are used. 153 * @return the allocated string 154 * @note You have to free the string yourself with av_free(). 155 */ 156char *av_asprintf(const char *fmt, ...) av_printf_format(1, 2); 157 158/** 159 * Convert a number to a av_malloced string. 160 */ 161char *av_d2str(double d); 162 163/** 164 * Unescape the given string until a non escaped terminating char, 165 * and return the token corresponding to the unescaped string. 166 * 167 * The normal \ and ' escaping is supported. Leading and trailing 168 * whitespaces are removed, unless they are escaped with '\' or are 169 * enclosed between ''. 170 * 171 * @param buf the buffer to parse, buf will be updated to point to the 172 * terminating char 173 * @param term a 0-terminated list of terminating chars 174 * @return the malloced unescaped string, which must be av_freed by 175 * the user, NULL in case of allocation failure 176 */ 177char *av_get_token(const char **buf, const char *term); 178 179/** 180 * Split the string into several tokens which can be accessed by 181 * successive calls to av_strtok(). 182 * 183 * A token is defined as a sequence of characters not belonging to the 184 * set specified in delim. 185 * 186 * On the first call to av_strtok(), s should point to the string to 187 * parse, and the value of saveptr is ignored. In subsequent calls, s 188 * should be NULL, and saveptr should be unchanged since the previous 189 * call. 190 * 191 * This function is similar to strtok_r() defined in POSIX.1. 192 * 193 * @param s the string to parse, may be NULL 194 * @param delim 0-terminated list of token delimiters, must be non-NULL 195 * @param saveptr user-provided pointer which points to stored 196 * information necessary for av_strtok() to continue scanning the same 197 * string. saveptr is updated to point to the next character after the 198 * first delimiter found, or to NULL if the string was terminated 199 * @return the found token, or NULL when no token is found 200 */ 201char *av_strtok(char *s, const char *delim, char **saveptr); 202 203/** 204 * Locale-independent conversion of ASCII isdigit. 205 */ 206int av_isdigit(int c); 207 208/** 209 * Locale-independent conversion of ASCII isgraph. 210 */ 211int av_isgraph(int c); 212 213/** 214 * Locale-independent conversion of ASCII isspace. 215 */ 216int av_isspace(int c); 217 218/** 219 * Locale-independent conversion of ASCII characters to uppercase. 220 */ 221static inline int av_toupper(int c) 222{ 223 if (c >= 'a' && c <= 'z') 224 c ^= 0x20; 225 return c; 226} 227 228/** 229 * Locale-independent conversion of ASCII characters to lowercase. 230 */ 231static inline int av_tolower(int c) 232{ 233 if (c >= 'A' && c <= 'Z') 234 c ^= 0x20; 235 return c; 236} 237 238/** 239 * Locale-independent conversion of ASCII isxdigit. 240 */ 241int av_isxdigit(int c); 242 243/** 244 * Locale-independent case-insensitive compare. 245 * @note This means only ASCII-range characters are case-insensitive 246 */ 247int av_strcasecmp(const char *a, const char *b); 248 249/** 250 * Locale-independent case-insensitive compare. 251 * @note This means only ASCII-range characters are case-insensitive 252 */ 253int av_strncasecmp(const char *a, const char *b, size_t n); 254 255 256/** 257 * Thread safe basename. 258 * @param path the path, on DOS both \ and / are considered separators. 259 * @return pointer to the basename substring. 260 */ 261const char *av_basename(const char *path); 262 263/** 264 * Thread safe dirname. 265 * @param path the path, on DOS both \ and / are considered separators. 266 * @return the path with the separator replaced by the string terminator or ".". 267 * @note the function may change the input string. 268 */ 269const char *av_dirname(char *path); 270 271enum AVEscapeMode { 272 AV_ESCAPE_MODE_AUTO, ///< Use auto-selected escaping mode. 273 AV_ESCAPE_MODE_BACKSLASH, ///< Use backslash escaping. 274 AV_ESCAPE_MODE_QUOTE, ///< Use single-quote escaping. 275}; 276 277/** 278 * Consider spaces special and escape them even in the middle of the 279 * string. 280 * 281 * This is equivalent to adding the whitespace characters to the special 282 * characters lists, except it is guaranteed to use the exact same list 283 * of whitespace characters as the rest of libavutil. 284 */ 285#define AV_ESCAPE_FLAG_WHITESPACE 0x01 286 287/** 288 * Escape only specified special characters. 289 * Without this flag, escape also any characters that may be considered 290 * special by av_get_token(), such as the single quote. 291 */ 292#define AV_ESCAPE_FLAG_STRICT 0x02 293 294/** 295 * Escape string in src, and put the escaped string in an allocated 296 * string in *dst, which must be freed with av_free(). 297 * 298 * @param dst pointer where an allocated string is put 299 * @param src string to escape, must be non-NULL 300 * @param special_chars string containing the special characters which 301 * need to be escaped, can be NULL 302 * @param mode escape mode to employ, see AV_ESCAPE_MODE_* macros. 303 * Any unknown value for mode will be considered equivalent to 304 * AV_ESCAPE_MODE_BACKSLASH, but this behaviour can change without 305 * notice. 306 * @param flags flags which control how to escape, see AV_ESCAPE_FLAG_ macros 307 * @return the length of the allocated string, or a negative error code in case of error 308 * @see av_bprint_escape() 309 */ 310int av_escape(char **dst, const char *src, const char *special_chars, 311 enum AVEscapeMode mode, int flags); 312 313#define AV_UTF8_FLAG_ACCEPT_INVALID_BIG_CODES 1 ///< accept codepoints over 0x10FFFF 314#define AV_UTF8_FLAG_ACCEPT_NON_CHARACTERS 2 ///< accept non-characters - 0xFFFE and 0xFFFF 315#define AV_UTF8_FLAG_ACCEPT_SURROGATES 4 ///< accept UTF-16 surrogates codes 316#define AV_UTF8_FLAG_EXCLUDE_XML_INVALID_CONTROL_CODES 8 ///< exclude control codes not accepted by XML 317 318#define AV_UTF8_FLAG_ACCEPT_ALL \ 319 AV_UTF8_FLAG_ACCEPT_INVALID_BIG_CODES|AV_UTF8_FLAG_ACCEPT_NON_CHARACTERS|AV_UTF8_FLAG_ACCEPT_SURROGATES 320 321/** 322 * Read and decode a single UTF-8 code point (character) from the 323 * buffer in *buf, and update *buf to point to the next byte to 324 * decode. 325 * 326 * In case of an invalid byte sequence, the pointer will be updated to 327 * the next byte after the invalid sequence and the function will 328 * return an error code. 329 * 330 * Depending on the specified flags, the function will also fail in 331 * case the decoded code point does not belong to a valid range. 332 * 333 * @note For speed-relevant code a carefully implemented use of 334 * GET_UTF8() may be preferred. 335 * 336 * @param codep pointer used to return the parsed code in case of success. 337 * The value in *codep is set even in case the range check fails. 338 * @param bufp pointer to the address the first byte of the sequence 339 * to decode, updated by the function to point to the 340 * byte next after the decoded sequence 341 * @param buf_end pointer to the end of the buffer, points to the next 342 * byte past the last in the buffer. This is used to 343 * avoid buffer overreads (in case of an unfinished 344 * UTF-8 sequence towards the end of the buffer). 345 * @param flags a collection of AV_UTF8_FLAG_* flags 346 * @return >= 0 in case a sequence was successfully read, a negative 347 * value in case of invalid sequence 348 */ 349int av_utf8_decode(int32_t *codep, const uint8_t **bufp, const uint8_t *buf_end, 350 unsigned int flags); 351 352/** 353 * @} 354 */ 355 356#endif /* AVUTIL_AVSTRING_H */ 357