/*++ /* NAME /* header_token 3 /* SUMMARY /* mail header parser /* SYNOPSIS /* #include /* /* typedef struct { /* .in +4 /* int type; /* const char *u.value; /* /* ... */ /* .in /* } HEADER_TOKEN; /* /* ssize_t header_token(token, token_len, token_buffer, ptr, /* specials, terminator) /* HEADER_TOKEN *token; /* ssize_t token_len; /* VSTRING *token_buffer; /* const char **ptr; /* const char *specials; /* int terminator; /* DESCRIPTION /* This module parses a mail header value (text after field-name:) /* into tokens. The parser understands RFC 822 linear white space, /* quoted-string, comment, control characters, and a set of /* user-specified special characters. /* /* A result token type is one of the following: /* .IP HEADER_TOK_QSTRING /* Quoted string as per RFC 822. /* .IP HEADER_TOK_TOKEN /* Token as per RFC 822, and the special characters supplied by the /* caller. /* .IP other /* The value of a control character or special character. /* .PP /* header_token() tokenizes the input and stops after a user-specified /* terminator (ignoring all tokens that exceed the capacity of /* the result storage), or when it runs out of space for the result. /* The terminator is not stored. The result value is the number of /* tokens stored, or -1 when the input was exhausted before any tokens /* were found. /* /* Arguments: /* .IP token /* Result array of HEADER_TOKEN structures. Token string values /* are pointers to null-terminated substrings in the token_buffer. /* .IP token_len /* Length of the array of HEADER_TOKEN structures. /* .IP token_buffer /* Storage for result token string values. /* .IP ptr /* Input/output read position. The input is a null-terminated string. /* .IP specials /* Special characters according to the relevant RFC, or a /* null pointer (default to the RFC 822 special characters). /* This must include the optional terminator if one is specified. /* .IP terminator /* The special character to stop after, or zero. /* BUGS /* Eight-bit characters are not given special treatment. /* SEE ALSO /* RFC 822 (ARPA Internet Text Messages) /* DIAGNOSTICS /* Fatal errors: memory allocation problem. /* LICENSE /* .ad /* .fi /* The Secure Mailer license must be distributed with this software. /* AUTHOR(S) /* Wietse Venema /* IBM T.J. Watson Research /* P.O. Box 704 /* Yorktown Heights, NY 10598, USA /*--*/ /* System library. */ #include #include #include /* Utility library. */ #include #include /* Global library. */ #include #include /* Application-specific. */ /* * Silly little macros. */ #define STR(x) vstring_str(x) #define LEN(x) VSTRING_LEN(x) #define CU_CHAR_PTR(x) ((const unsigned char *) (x)) /* header_token - parse out the next item in a message header */ ssize_t header_token(HEADER_TOKEN *token, ssize_t token_len, VSTRING *token_buffer, const char **ptr, const char *user_specials, int user_terminator) { ssize_t comment_level; const unsigned char *cp; ssize_t len; int ch; ssize_t tok_count; ssize_t n; /* * Initialize. */ VSTRING_RESET(token_buffer); cp = CU_CHAR_PTR(*ptr); tok_count = 0; if (user_specials == 0) user_specials = LEX_822_SPECIALS; /* * Main parsing loop. * * XXX What was the reason to continue parsing when user_terminator is * specified? Perhaps this was needed at some intermediate stage of * development? */ while ((ch = *cp) != 0 && (user_terminator != 0 || tok_count < token_len)) { cp++; /* * Skip RFC 822 linear white space. */ if (IS_SPACE_TAB_CR_LF(ch)) continue; /* * Terminator. */ if (ch == user_terminator) break; /* * Skip RFC 822 comment. */ if (ch == '(') { comment_level = 1; while ((ch = *cp) != 0) { cp++; if (ch == '(') { /* comments can nest! */ comment_level++; } else if (ch == ')') { if (--comment_level == 0) break; } else if (ch == '\\') { if ((ch = *cp) == 0) break; cp++; } } continue; } /* * Copy quoted text according to RFC 822. */ if (ch == '"') { if (tok_count < token_len) { token[tok_count].u.offset = LEN(token_buffer); token[tok_count].type = HEADER_TOK_QSTRING; } while ((ch = *cp) != 0) { cp++; if (ch == '"') break; if (ch == '\n') { /* unfold */ if (tok_count < token_len) { len = LEN(token_buffer); while (len > 0 && IS_SPACE_TAB_CR_LF(STR(token_buffer)[len - 1])) len--; if (len < LEN(token_buffer)) vstring_truncate(token_buffer, len); } continue; } if (ch == '\\') { if ((ch = *cp) == 0) break; cp++; } if (tok_count < token_len) VSTRING_ADDCH(token_buffer, ch); } if (tok_count < token_len) { VSTRING_ADDCH(token_buffer, 0); tok_count++; } continue; } /* * Control, or special. */ if (strchr(user_specials, ch) || ISCNTRL(ch)) { if (tok_count < token_len) { token[tok_count].u.offset = LEN(token_buffer); token[tok_count].type = ch; VSTRING_ADDCH(token_buffer, ch); VSTRING_ADDCH(token_buffer, 0); tok_count++; } continue; } /* * Token. */ else { if (tok_count < token_len) { token[tok_count].u.offset = LEN(token_buffer); token[tok_count].type = HEADER_TOK_TOKEN; VSTRING_ADDCH(token_buffer, ch); } while ((ch = *cp) != 0 && !IS_SPACE_TAB_CR_LF(ch) && !ISCNTRL(ch) && !strchr(user_specials, ch)) { cp++; if (tok_count < token_len) VSTRING_ADDCH(token_buffer, ch); } if (tok_count < token_len) { VSTRING_ADDCH(token_buffer, 0); tok_count++; } continue; } } /* * Ignore a zero-length item after the last terminator. */ if (tok_count == 0 && ch == 0) return (-1); /* * Finalize. Fill in the string pointer array, now that the token buffer * is no longer dynamically reallocated as it grows. */ *ptr = (const char *) cp; for (n = 0; n < tok_count; n++) token[n].u.value = STR(token_buffer) + token[n].u.offset; if (msg_verbose) msg_info("header_token: %s %s %s", tok_count > 0 ? token[0].u.value : "", tok_count > 1 ? token[1].u.value : "", tok_count > 2 ? token[2].u.value : ""); return (tok_count); }