1/* (Text)Component - message component base class and plain text 2** 3** Copyright 2001 Dr. Zoidberg Enterprises. All rights reserved. 4*/ 5 6 7#include <String.h> 8#include <Mime.h> 9 10#include <ctype.h> 11#include <stdlib.h> 12#include <strings.h> 13 14class _EXPORT BMailComponent; 15class _EXPORT BTextMailComponent; 16 17#include <MailComponent.h> 18#include <MailAttachment.h> 19#include <MailContainer.h> 20#include <mail_util.h> 21 22#include <CharacterSet.h> 23#include <CharacterSetRoster.h> 24 25using namespace BPrivate ; 26 27struct CharsetConversionEntry 28{ 29 const char* charset; 30 uint32 flavor; 31}; 32 33extern const CharsetConversionEntry mail_charsets[]; 34 35 36const char* kHeaderCharsetString = "header-charset"; 37const char* kHeaderEncodingString = "header-encoding"; 38// Special field names in the headers which specify the character set (int32) 39// and encoding (int8) to use when converting the headers from UTF-8 to the 40// output e-mail format (rfc2047). Since they are numbers, not strings, the 41// extra fields won't be output. 42 43 44BMailComponent::BMailComponent(uint32 defaultCharSet) 45 : _charSetForTextDecoding (defaultCharSet) 46{ 47} 48 49 50BMailComponent::~BMailComponent() 51{ 52} 53 54 55uint32 56BMailComponent::ComponentType() 57{ 58 if (NULL != dynamic_cast<BAttributedMailAttachment*> (this)) 59 return B_MAIL_ATTRIBUTED_ATTACHMENT; 60 61 BMimeType type, super; 62 MIMEType(&type); 63 type.GetSupertype(&super); 64 65 //---------ATT-This code *desperately* needs to be improved 66 if (super == "multipart") { 67 if (type == "multipart/x-bfile") // Not likely, they have the MIME 68 return B_MAIL_ATTRIBUTED_ATTACHMENT; // of their data contents. 69 else 70 return B_MAIL_MULTIPART_CONTAINER; 71 } else if (!IsAttachment() && (super == "text" || type.Type() == NULL)) 72 return B_MAIL_PLAIN_TEXT_BODY; 73 else 74 return B_MAIL_SIMPLE_ATTACHMENT; 75} 76 77 78BMailComponent* 79BMailComponent::WhatIsThis() 80{ 81 switch (ComponentType()) { 82 case B_MAIL_SIMPLE_ATTACHMENT: 83 return new BSimpleMailAttachment; 84 case B_MAIL_ATTRIBUTED_ATTACHMENT: 85 return new BAttributedMailAttachment; 86 case B_MAIL_MULTIPART_CONTAINER: 87 return new BMIMEMultipartMailContainer (NULL, NULL, _charSetForTextDecoding); 88 case B_MAIL_PLAIN_TEXT_BODY: 89 default: 90 return new BTextMailComponent (NULL, _charSetForTextDecoding); 91 } 92} 93 94 95bool 96BMailComponent::IsAttachment() 97{ 98 const char* disposition = HeaderField("Content-Disposition"); 99 if ((disposition != NULL) 100 && (strncasecmp(disposition, "Attachment", strlen("Attachment")) == 0)) 101 return true; 102 103 BMessage header; 104 HeaderField("Content-Type", &header); 105 if (header.HasString("name")) 106 return true; 107 108 if (HeaderField("Content-Location", &header) == B_OK) 109 return true; 110 111 BMimeType type; 112 MIMEType(&type); 113 if (type == "multipart/x-bfile") 114 return true; 115 116 return false; 117} 118 119 120void 121BMailComponent::SetHeaderField(const char* key, const char* value, 122 uint32 charset, mail_encoding encoding, bool replace_existing) 123{ 124 if (replace_existing) 125 headers.RemoveName(key); 126 if (value != NULL && value[0] != 0) // Empty or NULL strings mean delete header. 127 headers.AddString(key, value); 128 129 // Latest setting of the character set and encoding to use when outputting 130 // the headers is the one which affects all the headers. There used to be 131 // separate settings for each item in the headers, but it never actually 132 // worked (can't store multiple items of different types in a BMessage). 133 if (charset != B_MAIL_NULL_CONVERSION 134 && headers.ReplaceInt32 (kHeaderCharsetString, charset) != B_OK) 135 headers.AddInt32(kHeaderCharsetString, charset); 136 if (encoding != null_encoding 137 && headers.ReplaceInt8 (kHeaderEncodingString, encoding) != B_OK) 138 headers.AddInt8(kHeaderEncodingString, encoding); 139} 140 141 142void 143BMailComponent::SetHeaderField(const char* key, BMessage* structure, 144 bool replace_existing) 145{ 146 int32 charset = B_MAIL_NULL_CONVERSION; 147 int8 encoding = null_encoding; 148 const char* unlabeled = "unlabeled"; 149 150 if (replace_existing) 151 headers.RemoveName(key); 152 153 BString value; 154 if (structure->HasString(unlabeled)) 155 value << structure->FindString(unlabeled) << "; "; 156 157 const char* name; 158 const char* sub_val; 159 type_code type; 160 for (int32 i = 0; structure->GetInfo(B_STRING_TYPE, i, 161#if !defined(HAIKU_TARGET_PLATFORM_DANO) 162 (char**) 163#endif 164 &name, &type) == B_OK; i++) { 165 166 if (strcasecmp(name, unlabeled) == 0) 167 continue; 168 169 structure->FindString(name, &sub_val); 170 value << name << '='; 171 if (BString(sub_val).FindFirst(' ') > 0) 172 value << '\"' << sub_val << "\"; "; 173 else 174 value << sub_val << "; "; 175 } 176 177 value.Truncate(value.Length() - 2); //-----Remove the last "; " 178 179 if (structure->HasInt32(kHeaderCharsetString)) 180 structure->FindInt32(kHeaderCharsetString, &charset); 181 if (structure->HasInt8(kHeaderEncodingString)) 182 structure->FindInt8(kHeaderEncodingString, &encoding); 183 184 SetHeaderField(key, value.String(), (uint32) charset, (mail_encoding) encoding); 185} 186 187 188const char* 189BMailComponent::HeaderField(const char* key, int32 index) const 190{ 191 const char* string = NULL; 192 193 headers.FindString(key, index, &string); 194 return string; 195} 196 197 198status_t 199BMailComponent::HeaderField(const char* key, BMessage* structure, 200 int32 index) const 201{ 202 BString string = HeaderField(key, index); 203 if (string == "") 204 return B_NAME_NOT_FOUND; 205 206 BString sub_cat; 207 BString end_piece; 208 int32 i = 0; 209 int32 end = 0; 210 211 // Break the header into parts, they're separated by semicolons, like this: 212 // Content-Type: multipart/mixed;boundary= "----=_NextPart_000_00AA_354DB459.5977A1CA" 213 // There's also white space and quotes to be removed, and even comments in 214 // parenthesis like this, which can appear anywhere white space is: (header comment) 215 216 while (end < string.Length()) { 217 end = string.FindFirst(';', i); 218 if (end < 0) 219 end = string.Length(); 220 221 string.CopyInto(sub_cat, i, end - i); 222 i = end + 1; 223 224 //-------Trim spaces off of beginning and end of text 225 for (int32 h = 0; h < sub_cat.Length(); h++) { 226 if (!isspace(sub_cat.ByteAt(h))) { 227 sub_cat.Remove(0, h); 228 break; 229 } 230 } 231 for (int32 h = sub_cat.Length() - 1; h >= 0; h--) { 232 if (!isspace(sub_cat.ByteAt(h))) { 233 sub_cat.Truncate(h + 1); 234 break; 235 } 236 } 237 //--------Split along '=' 238 int32 first_equal = sub_cat.FindFirst('='); 239 if (first_equal >= 0) { 240 sub_cat.CopyInto(end_piece, first_equal + 1, sub_cat.Length() - first_equal - 1); 241 sub_cat.Truncate(first_equal); 242 // Remove leading spaces from part after the equals sign. 243 while (isspace (end_piece.ByteAt(0))) 244 end_piece.Remove (0 /* index */, 1 /* number of chars */); 245 // Remove quote marks. 246 if (end_piece.ByteAt(0) == '\"') { 247 end_piece.Remove(0, 1); 248 end_piece.Truncate(end_piece.Length() - 1); 249 } 250 sub_cat.ToLower(); 251 structure->AddString(sub_cat.String(), end_piece.String()); 252 } else { 253 structure->AddString("unlabeled", sub_cat.String()); 254 } 255 } 256 257 return B_OK; 258} 259 260 261status_t 262BMailComponent::RemoveHeader(const char* key) 263{ 264 return headers.RemoveName(key); 265} 266 267 268const char* 269BMailComponent::HeaderAt(int32 index) const 270{ 271#if defined(HAIKU_TARGET_PLATFORM_DANO) 272 const 273#endif 274 char* name = NULL; 275 type_code type; 276 277 headers.GetInfo(B_STRING_TYPE, index, &name, &type); 278 return name; 279} 280 281 282status_t 283BMailComponent::GetDecodedData(BPositionIO*) 284{ 285 return B_OK; 286} 287 288 289status_t 290BMailComponent::SetDecodedData(BPositionIO*) 291{ 292 return B_OK; 293} 294 295 296status_t 297BMailComponent::SetToRFC822(BPositionIO* data, size_t /*length*/, bool /*parse_now*/) 298{ 299 headers.MakeEmpty(); 300 301 // Only parse the header here 302 return parse_header(headers, *data); 303} 304 305 306status_t 307BMailComponent::RenderToRFC822(BPositionIO* render_to) 308{ 309 int32 charset = B_ISO15_CONVERSION; 310 int8 encoding = quoted_printable; 311 const char* key; 312 const char* value; 313 char* allocd; 314 ssize_t amountWritten; 315 BString concat; 316 type_code stupidity_personified = B_STRING_TYPE; 317 int32 count = 0; 318 319 if (headers.HasInt32(kHeaderCharsetString)) 320 headers.FindInt32(kHeaderCharsetString, &charset); 321 if (headers.HasInt8(kHeaderEncodingString)) 322 headers.FindInt8(kHeaderEncodingString, &encoding); 323 324 for (int32 index = 0; headers.GetInfo(B_STRING_TYPE, index, 325#if !defined(HAIKU_TARGET_PLATFORM_DANO) 326 (char**) 327#endif 328 &key, &stupidity_personified, &count) == B_OK; index++) { 329 for (int32 g = 0; g < count; g++) { 330 headers.FindString(key, g, (const char**)&value); 331 allocd = (char*)malloc(strlen(value) + 1); 332 strcpy(allocd, value); 333 334 concat << key << ": "; 335 concat.CapitalizeEachWord(); 336 337 concat.Append(allocd, utf8_to_rfc2047(&allocd, strlen(value), 338 charset, encoding)); 339 free(allocd); 340 FoldLineAtWhiteSpaceAndAddCRLF(concat); 341 342 amountWritten = render_to->Write(concat.String(), concat.Length()); 343 if (amountWritten < 0) 344 return amountWritten; // IO error happened, usually disk full. 345 concat = ""; 346 } 347 } 348 349 render_to->Write("\r\n", 2); 350 351 return B_OK; 352} 353 354 355status_t 356BMailComponent::MIMEType(BMimeType* mime) 357{ 358 bool foundBestHeader; 359 const char* boundaryString; 360 unsigned int i; 361 BMessage msg; 362 const char* typeAsString = NULL; 363 char typeAsLowerCaseString[B_MIME_TYPE_LENGTH]; 364 365 // Find the best Content-Type header to use. There should really be just 366 // one, but evil spammers sneakily insert one for multipart (with no 367 // boundary string), then one for text/plain. We'll scan through them and 368 // only use the multipart one if there are no others, and it has a 369 // boundary. 370 371 foundBestHeader = false; 372 for (i = 0; msg.MakeEmpty(), HeaderField("Content-Type", &msg, i) == B_OK; i++) { 373 typeAsString = msg.FindString("unlabeled"); 374 if (typeAsString != NULL && strncasecmp(typeAsString, "multipart", 9) != 0) { 375 foundBestHeader = true; 376 break; 377 } 378 } 379 if (!foundBestHeader) { 380 for (i = 0; msg.MakeEmpty(), HeaderField("Content-Type", &msg, i) == B_OK; i++) { 381 typeAsString = msg.FindString("unlabeled"); 382 if (typeAsString != NULL && strncasecmp(typeAsString, "multipart", 9) == 0) { 383 boundaryString = msg.FindString("boundary"); 384 if (boundaryString != NULL && strlen(boundaryString) > 0) { 385 foundBestHeader = true; 386 break; 387 } 388 } 389 } 390 } 391 // At this point we have the good MIME type in typeAsString, but only if 392 // foundBestHeader is true. 393 394 if (!foundBestHeader) { 395 strcpy(typeAsLowerCaseString, "text/plain"); // Hope this is an OK default. 396 } else { 397 // Some extra processing to convert mixed or upper case MIME types into 398 // lower case, since the BeOS R5 BMimeType is case sensitive (but Haiku 399 // isn't). Also truncate the string if it is too long. 400 for (i = 0; i < sizeof(typeAsLowerCaseString) - 1 401 && typeAsString[i] != 0; i++) 402 typeAsLowerCaseString[i] = tolower(typeAsString[i]); 403 typeAsLowerCaseString[i] = 0; 404 405 // Some old e-mail programs saved the type as just "TEXT", which we need to 406 // convert to "text/plain" since the rest of the code looks for that. 407 if (strcmp(typeAsLowerCaseString, "text") == 0) 408 strcpy(typeAsLowerCaseString, "text/plain"); 409 } 410 mime->SetTo(typeAsLowerCaseString); 411 return B_OK; 412} 413 414 415void BMailComponent::_ReservedComponent1() {} 416void BMailComponent::_ReservedComponent2() {} 417void BMailComponent::_ReservedComponent3() {} 418void BMailComponent::_ReservedComponent4() {} 419void BMailComponent::_ReservedComponent5() {} 420 421 422//------------------------------------------------------------------------- 423// #pragma mark - 424 425 426BTextMailComponent::BTextMailComponent(const char* text, uint32 defaultCharSet) 427 : BMailComponent(defaultCharSet), 428 encoding(quoted_printable), 429 charset(B_ISO15_CONVERSION), 430 raw_data(NULL) 431{ 432 if (text != NULL) 433 SetText(text); 434 435 SetHeaderField("MIME-Version", "1.0"); 436} 437 438 439BTextMailComponent::~BTextMailComponent() 440{ 441} 442 443 444void 445BTextMailComponent::SetEncoding(mail_encoding encoding, int32 charset) 446{ 447 this->encoding = encoding; 448 this->charset = charset; 449} 450 451 452void 453BTextMailComponent::SetText(const char* text) 454{ 455 this->text.SetTo(text); 456 457 raw_data = NULL; 458} 459 460 461void 462BTextMailComponent::AppendText(const char* text) 463{ 464 ParseRaw(); 465 466 this->text << text; 467} 468 469 470const char* 471BTextMailComponent::Text() 472{ 473 ParseRaw(); 474 475 return text.String(); 476} 477 478 479BString* 480BTextMailComponent::BStringText() 481{ 482 ParseRaw(); 483 484 return &text; 485} 486 487 488void 489BTextMailComponent::Quote(const char* message, const char* quote_style) 490{ 491 ParseRaw(); 492 493 BString string; 494 string << '\n' << quote_style; 495 text.ReplaceAll("\n",string.String()); 496 497 string = message; 498 string << '\n'; 499 text.Prepend(string.String()); 500} 501 502 503status_t 504BTextMailComponent::GetDecodedData(BPositionIO* data) 505{ 506 ParseRaw(); 507 508 if (data == NULL) 509 return B_IO_ERROR; 510 511 BMimeType type; 512 BMimeType textAny("text"); 513 ssize_t written; 514 if (MIMEType(&type) == B_OK && textAny.Contains(&type)) 515 // Write out the string which has been both decoded from quoted 516 // printable or base64 etc, and then converted to UTF-8 from whatever 517 // character set the message specified. Do it for text/html, 518 // text/plain and all other text datatypes. Of course, if the message 519 // is HTML and specifies a META tag for a character set, it will now be 520 // wrong. But then we don't display HTML in BeMail, yet. 521 written = data->Write(text.String(), text.Length()); 522 else 523 // Just write out whatever the binary contents are, only decoded from 524 // the quoted printable etc format. 525 written = data->Write(decoded.String(), decoded.Length()); 526 527 return written >= 0 ? B_OK : written; 528} 529 530 531status_t 532BTextMailComponent::SetDecodedData(BPositionIO* data) 533{ 534 char buffer[255]; 535 size_t buf_len; 536 537 while ((buf_len = data->Read(buffer, 254)) > 0) { 538 buffer[buf_len] = 0; 539 this->text << buffer; 540 } 541 542 raw_data = NULL; 543 544 return B_OK; 545} 546 547 548status_t 549BTextMailComponent::SetToRFC822(BPositionIO* data, size_t length, bool parseNow) 550{ 551 off_t position = data->Position(); 552 BMailComponent::SetToRFC822(data, length); 553 554 // Some malformed MIME headers can have the header running into the 555 // boundary of the next MIME chunk, resulting in a negative length. 556 length -= data->Position() - position; 557 if ((ssize_t) length < 0) 558 length = 0; 559 560 raw_data = data; 561 raw_length = length; 562 raw_offset = data->Position(); 563 564 if (parseNow) { 565 // copies the data stream and sets the raw_data variable to NULL 566 return ParseRaw(); 567 } 568 569 return B_OK; 570} 571 572 573status_t 574BTextMailComponent::ParseRaw() 575{ 576 if (raw_data == NULL) 577 return B_OK; 578 579 raw_data->Seek(raw_offset, SEEK_SET); 580 581 BMessage content_type; 582 HeaderField("Content-Type", &content_type); 583 584 charset = _charSetForTextDecoding; 585 if (charset == B_MAIL_NULL_CONVERSION && content_type.HasString("charset")) { 586 const char* charset_string = content_type.FindString("charset"); 587 if (strcasecmp(charset_string, "us-ascii") == 0) { 588 charset = B_MAIL_US_ASCII_CONVERSION; 589 } else if (strcasecmp(charset_string, "utf-8") == 0) { 590 charset = B_MAIL_UTF8_CONVERSION; 591 } else { 592 const BCharacterSet* cs = BCharacterSetRoster::FindCharacterSetByName(charset_string); 593 if (cs != NULL) { 594 charset = cs->GetConversionID(); 595 } 596 } 597 } 598 599 encoding = encoding_for_cte(HeaderField("Content-Transfer-Encoding")); 600 601 char* buffer = (char*)malloc(raw_length + 1); 602 if (buffer == NULL) 603 return B_NO_MEMORY; 604 605 int32 bytes; 606 if ((bytes = raw_data->Read(buffer, raw_length)) < 0) 607 return B_IO_ERROR; 608 609 char* string = decoded.LockBuffer(bytes + 1); 610 bytes = decode(encoding, string, buffer, bytes, 0); 611 free(buffer); 612 buffer = NULL; 613 614 // Change line ends from \r\n to just \n. Though this won't work properly 615 // for UTF-16 because \r takes up two bytes rather than one. 616 char* dest; 617 char* src; 618 char* end = string + bytes; 619 for (dest = src = string; src < end; src++) { 620 if (*src != '\r') 621 *dest++ = *src; 622 } 623 decoded.UnlockBuffer(dest - string); 624 bytes = decoded.Length(); // Might have shrunk a bit. 625 626 // If the character set wasn't specified, try to guess. ISO-2022-JP 627 // contains the escape sequences ESC $ B or ESC $ @ to turn on 2 byte 628 // Japanese, and ESC ( J to switch to Roman, or sometimes ESC ( B for 629 // ASCII. We'll just try looking for the two switch to Japanese sequences. 630 631 if (charset == B_MAIL_NULL_CONVERSION) { 632 if (decoded.FindFirst ("\e$B") >= 0 || decoded.FindFirst ("\e$@") >= 0) 633 charset = B_JIS_CONVERSION; 634 else // Just assume the usual Latin-9 character set. 635 charset = B_ISO15_CONVERSION; 636 } 637 638 int32 state = 0; 639 int32 destLength = bytes * 3 /* in case it grows */ + 1 /* +1 so it isn't zero which crashes */; 640 string = text.LockBuffer(destLength); 641 mail_convert_to_utf8(charset, decoded.String(), &bytes, string, 642 &destLength, &state); 643 if (destLength > 0) 644 text.UnlockBuffer(destLength); 645 else { 646 text.UnlockBuffer(0); 647 text.SetTo(decoded); 648 } 649 650 raw_data = NULL; 651 return B_OK; 652} 653 654 655status_t 656BTextMailComponent::RenderToRFC822(BPositionIO* render_to) 657{ 658 status_t status = ParseRaw(); 659 if (status < B_OK) 660 return status; 661 662 BMimeType type; 663 MIMEType(&type); 664 BString content_type; 665 content_type << type.Type(); // Preserve MIME type (e.g. text/html 666 667 for (uint32 i = 0; mail_charsets[i].charset != NULL; i++) { 668 if (mail_charsets[i].flavor == charset) { 669 content_type << "; charset=\"" << mail_charsets[i].charset << "\""; 670 break; 671 } 672 } 673 674 SetHeaderField("Content-Type", content_type.String()); 675 676 const char* transfer_encoding = NULL; 677 switch (encoding) { 678 case base64: 679 transfer_encoding = "base64"; 680 break; 681 case quoted_printable: 682 transfer_encoding = "quoted-printable"; 683 break; 684 case eight_bit: 685 transfer_encoding = "8bit"; 686 break; 687 case seven_bit: 688 default: 689 transfer_encoding = "7bit"; 690 break; 691 } 692 693 SetHeaderField("Content-Transfer-Encoding", transfer_encoding); 694 695 BMailComponent::RenderToRFC822(render_to); 696 697 BString modified = this->text; 698 BString alt; 699 700 int32 len = this->text.Length(); 701 if (len > 0) { 702 int32 dest_len = len * 5; 703 // Shift-JIS can have a 3 byte escape sequence and a 2 byte code for 704 // each character (which could just be 2 bytes in UTF-8, or even 1 byte 705 // if it's regular ASCII), so it can get quite a bit larger than the 706 // original text. Multiplying by 5 should make more than enough space. 707 char* raw = alt.LockBuffer(dest_len); 708 int32 state = 0; 709 mail_convert_from_utf8(charset, this->text.String(), &len, raw, 710 &dest_len, &state); 711 alt.UnlockBuffer(dest_len); 712 713 raw = modified.LockBuffer((alt.Length() * 3) + 1); 714 switch (encoding) { 715 case base64: 716 len = encode_base64(raw, alt.String(), alt.Length(), false); 717 raw[len] = 0; 718 break; 719 case quoted_printable: 720 len = encode_qp(raw, alt.String(), alt.Length(), false); 721 raw[len] = 0; 722 break; 723 case eight_bit: 724 case seven_bit: 725 default: 726 len = alt.Length(); 727 strcpy(raw, alt.String()); 728 } 729 modified.UnlockBuffer(len); 730 731 if (encoding != base64) // encode_base64 already does CRLF line endings. 732 modified.ReplaceAll("\n","\r\n"); 733 734 // There seem to be a possibility of NULL bytes in the text, so lets 735 // filter them out, shouldn't be any after the encoding stage. 736 737 char* string = modified.LockBuffer(modified.Length()); 738 for (int32 i = modified.Length(); i-- > 0;) { 739 if (string[i] != '\0') 740 continue; 741 742 puts("BTextMailComponent::RenderToRFC822: NULL byte in text!!"); 743 string[i] = ' '; 744 } 745 modified.UnlockBuffer(); 746 747 // word wrapping is already done by BeMail (user-configurable) 748 // and it does it *MUCH* nicer. 749 750// //------Desperate bid to wrap lines 751// int32 curr_line_length = 0; 752// int32 last_space = 0; 753// 754// for (int32 i = 0; i < modified.Length(); i++) { 755// if (isspace(modified.ByteAt(i))) 756// last_space = i; 757// 758// if ((modified.ByteAt(i) == '\r') && (modified.ByteAt(i+1) == '\n')) 759// curr_line_length = 0; 760// else 761// curr_line_length++; 762// 763// if (curr_line_length > 80) { 764// if (last_space >= 0) { 765// modified.Insert("\r\n",last_space); 766// last_space = -1; 767// curr_line_length = 0; 768// } 769// } 770// } 771 } 772 modified << "\r\n"; 773 774 render_to->Write(modified.String(), modified.Length()); 775 776 return B_OK; 777} 778 779 780void BTextMailComponent::_ReservedText1() {} 781void BTextMailComponent::_ReservedText2() {} 782