1/* 2 * Copyright 2001-2009, Haiku, Inc. All Rights Reserved. 3 * Distributed under the terms of the MIT License. 4 * 5 * Authors: 6 * Philippe Houdoin 7 * Simon Gauvin 8 * Michael Pfeiffer 9 */ 10 11#include <stdio.h> 12#include <string.h> 13#include <math.h> 14 15#include <Debug.h> 16#include <StorageKit.h> 17#include <TranslationKit.h> 18#include <support/UTF8.h> 19 20#include "PDFWriter.h" 21#include "Link.h" 22#include "Bookmark.h" 23#include "DrawShape.h" 24#include "XReferences.h" 25#include "Log.h" 26#include "Report.h" 27#include "pdflib.h" 28 29 30typedef struct { 31 uint16 from; 32 uint16 to; 33 int16 length; 34 uint16 *unicodes; 35} unicode_to_encoding; 36 37typedef struct { 38 uint16 unicode; 39 uint16 cid; 40} unicode_to_cid; 41 42typedef struct { 43 uint16 length; 44 unicode_to_cid *table; 45} cid_table; 46 47#ifdef UNICODE5_FROM 48# error check code! 49#endif 50 51#define ELEMS(v, e) sizeof(v) / sizeof(e) 52 53// Adobe Glyph List 54#include "enc_range.h" 55#include "unicode0.h" 56#include "unicode1.h" 57#include "unicode2.h" 58#include "unicode3.h" 59#include "unicode4.h" 60 61 62static unicode_to_encoding encodings[] = { 63 {UNICODE0_FROM, UNICODE0_TO, ELEMS(unicode0, uint16), unicode0}, 64 {UNICODE1_FROM, UNICODE1_TO, ELEMS(unicode1, uint16), unicode1}, 65 {UNICODE2_FROM, UNICODE2_TO, ELEMS(unicode2, uint16), unicode2}, 66 {UNICODE3_FROM, UNICODE3_TO, ELEMS(unicode3, uint16), unicode3}, 67 {UNICODE4_FROM, UNICODE4_TO, ELEMS(unicode4, uint16), unicode4} 68}; 69 70// unicode to cid 71#include "japanese.h" 72#include "gb1.h" 73#include "cns1.h" 74#include "korean.h" 75 76 77static cid_table cid_tables[] = { 78 {ELEMS(japanese, unicode_to_cid), japanese}, 79 {ELEMS(CNS1, unicode_to_cid), CNS1}, 80 {ELEMS(GB1, unicode_to_cid), GB1}, 81 {ELEMS(korean, unicode_to_cid), korean} 82}; 83 84static const char* encoding_names[] = { 85 "macroman", 86 // TrueType 87 "ttenc0", 88 "ttenc1", 89 "ttenc2", 90 "ttenc3", 91 "ttenc4", 92 // Type 1 93 "t1enc0", 94 "t1enc1", 95 "t1enc2", 96 "t1enc3", 97 "t1enc4", 98 // CJK 99 "UniJIS-UCS2-H", 100 "UniCNS-UCS2-H", 101 "UniGB-UCS2-H", 102 "UniKS-UCS2-H" 103}; 104 105 106// #pragma mark - 107 108 109static bool 110find_encoding(uint16 unicode, uint8 &encoding, uint16 &index) 111{ 112 for (unsigned int i = 0; i < ELEMS(encodings, unicode_to_encoding); i++) { 113 if (encodings[i].from <= unicode && unicode <= encodings[i].to) { 114 int16 bottom = 0; 115 int16 top = encodings[i].length-1; 116 uint16* codes = encodings[i].unicodes; 117 while (top >= bottom) { 118 int16 m = (top + bottom) / 2; 119 if (unicode < codes[m]) { 120 top = m-1; 121 } else if (unicode > codes[m]) { 122 bottom = m+1; 123 } else { 124 index = m; 125 encoding = i; 126 return true; 127 } 128 } 129 return false; 130 } 131 } 132 return false; 133} 134 135 136static bool 137find_in_cid_tables(uint16 unicode, font_encoding &encoding, uint16 &index, 138 font_encoding* order) 139{ 140 for (unsigned int i = 0; i < ELEMS(cid_tables, cid_table); i++) { 141 encoding = order[i]; 142 if (encoding == invalid_encoding) break; 143 int index = encoding - first_cjk_encoding; 144 int32 bottom = 0; 145 int32 top = cid_tables[index].length-1; 146 unicode_to_cid *table = cid_tables[index].table; 147 while (top >= bottom) { 148 int32 m = (top + bottom) / 2; 149 if (unicode < table[m].unicode) { 150 top = m-1; 151 } else if (unicode > table[m].unicode) { 152 bottom = m+1; 153 } else { 154 index = table[m].cid; 155 return true; 156 } 157 } 158 } 159 return false; 160} 161 162 163void 164PDFWriter::MakeUserDefinedEncoding(uint16 unicode, uint8 &enc, uint8 &index) 165{ 166 if (fUserDefinedEncodings.Get(unicode, enc, index)) { 167 BString s("user"); 168 s << (int)enc; 169 PDF_encoding_set_char(fPdf, s.String(), (int)index, NULL, (int)unicode); 170 } 171} 172 173 174void 175PDFWriter::RecordFont(const char* family, const char* style, float size) 176{ 177 const int32 n = fUsedFonts.CountItems(); 178 for (int32 i = 0; i < n; i ++) { 179 if (fUsedFonts.ItemAt(i)->Equals(family, style, size)) return; 180 } 181 182 UsedFont* font; 183 font = new UsedFont(family, style, size); 184 fUsedFonts.AddItem(font); 185 186 REPORT(kInfo, -1, "Used font: \"%s\" \"%s\" %f", family, style, size); 187} 188 189 190void 191PDFWriter::GetFontName(BFont *font, char *fontname) 192{ 193 font_family family; 194 font_style style; 195 196 font->GetFamilyAndStyle(&family, &style); 197 strcat(strcat(strcpy(fontname, family), "-"), style); 198 199 RecordFont(family, style, font->Size()); 200} 201 202 203void 204PDFWriter::GetFontName(BFont *font, char *fontname, bool &embed, 205 font_encoding encoding) 206{ 207 GetFontName(font, fontname); 208 209 switch (encoding) { 210 case japanese_encoding: 211 strcpy(fontname, "HeiseiMin-W3"); return; 212 case chinese_cns1_encoding: 213 strcpy(fontname, "MHei-Medium"); return; 214 case chinese_gb1_encoding: 215 strcpy(fontname, "STSong-Light"); return; 216 case korean_encoding: 217 strcpy(fontname, "HYGoThic-Medium"); return; 218 default:; 219 } 220} 221 222 223int 224PDFWriter::FindFont(char* fontName, bool embed, font_encoding encoding) 225{ 226 static Font* cache = NULL; 227 if (cache && cache->encoding == encoding 228 && strcmp(cache->name.String(), fontName) == 0) 229 return cache->font; 230 231 REPORT(kDebug, fPage, "FindFont %s", fontName); 232 Font *f = NULL; 233 const int n = fFontCache.CountItems(); 234 for (int i = 0; i < n; i++) { 235 f = fFontCache.ItemAt(i); 236 if (f->encoding == encoding && strcmp(f->name.String(), fontName) == 0) { 237 cache = f; 238 return f->font; 239 } 240 } 241 242 if (embed) embed = EmbedFont(fontName); 243 244 BString s; 245 const char* encoding_name; 246 if (encoding < user_defined_encoding_start) { 247 encoding_name = encoding_names[encoding]; 248 } else { 249 s = "user"; 250 s << (int)(encoding - user_defined_encoding_start); 251 encoding_name = s.String(); 252 } 253 REPORT(kDebug, fPage, "Create new font, %sembed, encoding %s", 254 embed ? "" : "do not ", encoding_name); 255 int font = PDF_findfont(fPdf, fontName, encoding_name, embed); 256 if (font != -1) { 257 REPORT(kDebug, fPage, "font created"); 258 cache = new Font(fontName, font, encoding); 259 fFontCache.AddItem(cache); 260 } else { 261 REPORT(kError, fPage, "Could not create font '%s': %s", fontName, 262 PDF_get_errmsg(fPdf)); 263 } 264 return font; 265} 266 267 268void 269PDFWriter::ToUtf8(uint32 encoding, const char *string, BString &utf8) 270{ 271 int32 len = strlen(string); 272 int32 srcLen = len, destLen = 255; 273 int32 state = 0; 274 char buffer[256]; 275 int32 srcStart = 0; 276 277 do { 278 convert_to_utf8(encoding, &string[srcStart], &srcLen, buffer, &destLen, 279 &state); 280 srcStart += srcLen; 281 len -= srcLen; 282 srcLen = len; 283 284 utf8.Append(buffer, destLen); 285 destLen = 255; 286 } while (len > 0); 287}; 288 289 290void 291PDFWriter::ToUnicode(const char *string, BString &unicode) 292{ 293 int32 len = strlen(string); 294 int32 srcLen = len, destLen = 255; 295 int32 state = 0; 296 char buffer[256]; 297 int32 srcStart = 0; 298 int i = 0; 299 300 unicode = ""; 301 if (len == 0) return; 302 303 do { 304 convert_from_utf8(B_UNICODE_CONVERSION, &string[srcStart], &srcLen, 305 buffer, &destLen, &state); 306 srcStart += srcLen; 307 len -= srcLen; 308 srcLen = len; 309 310 char *b = unicode.LockBuffer(i + destLen); 311 memcpy(&b[i], buffer, destLen); 312 unicode.UnlockBuffer(i + destLen); 313 i += destLen; 314 destLen = 255; 315 } while (len > 0); 316} 317 318 319void 320PDFWriter::ToPDFUnicode(const char *string, BString &unicode) 321{ 322 // PDFlib requires BOM at begin and two 0 at end of string 323 char marker[3] = { 0xfe, 0xff, 0}; // byte order marker 324 BString s; 325 ToUnicode(string, s); 326 unicode << marker; 327 int32 len = s.Length()+2; 328 char* buf = unicode.LockBuffer(len + 2); 329 // reserve space for two additional '\0' 330 memcpy(&buf[2], s.String(), s.Length()); 331 buf[len] = buf[len+1] = 0; 332 unicode.UnlockBuffer(len + 2); 333} 334 335 336uint16 337PDFWriter::CodePointSize(const char* s) 338{ 339 uint16 i = 1; 340 for (s++; !BeginsChar(*s); s++) i++; 341 return i; 342} 343 344 345void 346PDFWriter::RecordDests(const char* s) 347{ 348 ::RecordDests record(fXRefDests, &fTextLine, fPage); 349 fXRefs->Matches(s, &record, true); 350} 351 352 353void 354PDFWriter::DrawChar(uint16 unicode, const char* utf8, int16 size) 355{ 356 // try to convert from utf8 to MacRoman encoding schema... 357 int32 srcLen = size; 358 int32 destLen = 1; 359 char dest[3] = "\0\0"; 360 int32 state = 0; 361 bool embed = true; 362 font_encoding encoding = macroman_encoding; 363 char fontName[B_FONT_FAMILY_LENGTH+B_FONT_STYLE_LENGTH+1]; 364 365 if (convert_from_utf8(B_MAC_ROMAN_CONVERSION, utf8, &srcLen, dest, &destLen, 366 &state, 0) != B_OK || dest[0] == 0) { 367 // could not convert to MacRoman 368 font_encoding fenc; 369 uint16 index = 0; 370 uint8 enc; 371 372 GetFontName(&fState->beFont, fontName); 373 embed = EmbedFont(fontName); 374 375 REPORT(kDebug, -1, "find_encoding unicode %d\n", (int)unicode); 376 if (find_encoding(unicode, enc, index)) { 377 // is code point in the Adobe Glyph List? 378 // Note if rendering the glyphs only would be desired, we could 379 // always use the second method below (MakeUserDefinedEncoding), 380 // but extracting text from the generated PDF would be almost 381 // impossible (OCR!) 382 REPORT(kDebug, -1, "encoding for %x -> %d %d", unicode, (int)enc, 383 (int)index); 384 // use one of the user pre-defined encodings 385 if (fState->beFont.FileFormat() == B_TRUETYPE_WINDOWS) { 386 encoding = font_encoding(enc + tt_encoding0); 387 } else { 388 encoding = font_encoding(enc + t1_encoding0); 389 } 390 *dest = index; 391 } else if (embed) { 392 // if the font is embedded, create a user defined encoding at runtime 393 uint8 index; 394 MakeUserDefinedEncoding(unicode, enc, index); 395 *dest = index; 396 encoding = font_encoding(user_defined_encoding_start + enc); 397 } else if (find_in_cid_tables(unicode, fenc, index, fFontSearchOrder)) { 398 // font is not embedded use one of the CJK fonts for substitution 399 REPORT(kDebug, -1, "cid table %d index = %d", (int)fenc, (int)index); 400 dest[0] = unicode / 256; 401 dest[1] = unicode % 256; 402 destLen = 2; 403 encoding = fenc; 404 embed = false; 405 } else { 406 static bool found = false; 407 REPORT(kDebug, -1, "encoding for %x not found!", (int)unicode); 408 if (!found) { 409 found = true; 410 REPORT(kError, fPage, "Could not find an encoding for character " 411 "with unicode %d! Message is not repeated for other unicode " 412 "values.", (int)unicode); 413 } 414 *dest = 0; // paint a box (is 0 a box in MacRoman) or 415 return; // simply skip character 416 } 417 } else { 418 REPORT(kDebug, -1, "macroman srcLen=%d destLen=%d dest= %d %d!", srcLen, 419 destLen, (int)dest[0], (int)dest[1]); 420 } 421 422 // Note we have to build the user defined encoding before it is used in 423 // PDF_find_font! 424 if (!MakesPDF()) return; 425 426 int font; 427 428 GetFontName(&fState->beFont, fontName, embed, encoding); 429 font = FindFont(fontName, embed, encoding); 430 if (font < 0) { 431 REPORT(kWarning, fPage, "**** PDF_findfont(%s) failed, back to default " 432 "font", fontName); 433 font = PDF_findfont(fPdf, "Helvetica", "macroman", 0); 434 } 435 436 fState->font = font; 437 438 uint16 face = fState->beFont.Face(); 439 PDF_set_parameter(fPdf, "underline", (face & B_UNDERSCORE_FACE) != 0 440 ? "true" : "false"); 441 PDF_set_parameter(fPdf, "strikeout", (face & B_STRIKEOUT_FACE) != 0 442 ? "true" : "false"); 443 PDF_set_value(fPdf, "textrendering", (face & B_OUTLINED_FACE) != 0 ? 1 : 0); 444 445 PDF_setfont(fPdf, fState->font, scale(fState->beFont.Size())); 446 447 const float x = tx(fState->penX); 448 const float y = ty(fState->penY); 449 const float rotation = fState->beFont.Rotation(); 450 const bool rotate = rotation != 0.0; 451 452 if (rotate) { 453 PDF_save(fPdf); 454 PDF_translate(fPdf, x, y); 455 PDF_rotate(fPdf, rotation); 456 PDF_set_text_pos(fPdf, 0, 0); 457 } else 458 PDF_set_text_pos(fPdf, x, y); 459 460 PDF_show2(fPdf, dest, destLen); 461 462 if (rotate) { 463 PDF_restore(fPdf); 464 } 465} 466 467 468void 469PDFWriter::ClipChar(BFont* font, const char* unicode, const char* utf8, 470 int16 size, float width) 471{ 472 BShape glyph; 473 bool hasGlyph[1]; 474 font->GetHasGlyphs(utf8, 1, hasGlyph); 475 if (hasGlyph[0]) { 476 BShape *glyphs[1]; 477 glyphs[0] = &glyph; 478 font->GetGlyphShapes(utf8, 1, glyphs); 479 } else { 480 REPORT(kWarning, fPage, "glyph for %*.*s not found!", size, size, utf8); 481 // create a rectangle instead 482 font_height height; 483 fState->beFont.GetHeight(&height); 484 BRect r(0, 0, width, height.ascent); 485 float w = r.Width() < r.Height() ? r.Width()*0.1 : r.Height()*0.1; 486 BRect o = r; o.InsetBy(w, w); 487 w *= 2.0; 488 BRect i = r; i.InsetBy(w, w); 489 490 o.OffsetBy(0, -height.ascent); 491 i.OffsetBy(0, -height.ascent); 492 493 glyph.MoveTo(BPoint(o.left, o.top)); 494 glyph.LineTo(BPoint(o.right, o.top)); 495 glyph.LineTo(BPoint(o.right, o.bottom)); 496 glyph.LineTo(BPoint(o.left, o.bottom)); 497 glyph.Close(); 498 499 glyph.MoveTo(BPoint(i.left, i.top)); 500 glyph.LineTo(BPoint(i.left, i.bottom)); 501 glyph.LineTo(BPoint(i.right, i.bottom)); 502 glyph.LineTo(BPoint(i.right, i.top)); 503 glyph.Close(); 504 } 505 506 BPoint p(fState->penX, fState->penY); 507 PushInternalState(); SetOrigin(p); 508 { 509 DrawShape iterator(this, false); 510 iterator.Iterate(&glyph); 511 } 512 PopInternalState(); 513} 514 515 516void 517PDFWriter::DrawString(char *string, float escapementNoSpace, 518 float escapementSpace) 519{ 520 REPORT(kDebug, fPage, "DrawString string=\"%s\", escapementNoSpace=%f, " 521 "escapementSpace=%f, at %f, %f", string, escapementNoSpace, 522 escapementSpace, fState->penX, fState->penY); 523 524 if (IsDrawing()) { 525 // text color is always the high color and not the pattern! 526 SetColor(fState->foregroundColor); 527 } 528 // convert string to UTF8 529 BString utf8; 530 if (fState->beFont.Encoding() == B_UNICODE_UTF8) { 531 utf8 = string; 532 } else { 533 ToUtf8(fState->beFont.Encoding()-1, string, utf8); 534 } 535 536 // convert string in UTF8 to unicode UCS2 537 BString unicode; 538 ToUnicode(utf8.String(), unicode); 539 // need font object to calculate width of utf8 code point 540 BFont font = fState->beFont; 541 font.SetEncoding(B_UNICODE_UTF8); 542 // constants to calculate position of next character 543 const double rotation = DEGREE2RAD(fState->beFont.Rotation()); 544 const bool rotate = rotation != 0.0; 545 const double cos1 = rotate ? cos(rotation) : 1; 546 const double sin1 = rotate ? -sin(rotation) : 0; 547 548 BPoint start(fState->penX, fState->penY); 549 550 BeginTransparency(); 551 // If !MakesPDF() all the effort below just for the bounding box! 552 // draw each character 553 const char *c = utf8.String(); 554 const unsigned char *u = (unsigned char*)unicode.String(); 555 for (int i = 0; i < unicode.Length(); i += 2) { 556 int s = CodePointSize((char*)c); 557 558 float w = font.StringWidth(c, s); 559 560 if (MakesPDF() && IsClipping()) { 561 ClipChar(&font, (char*)u, c, s, w); 562 } else { 563 DrawChar(u[0]*256+u[1], c, s); 564 } 565 566 // position of next character 567 if (*(unsigned char*)c <= 0x20) { // should test if c is a white-space! 568 w += escapementSpace; 569 } else { 570 w += escapementNoSpace; 571 } 572 573 fState->penX += w * cos1; 574 fState->penY += w * sin1; 575 576 // next character 577 c += s; u += 2; 578 } 579 EndTransparency(); 580 581 // text line processing (for non rotated text only!) 582 BPoint end(fState->penX, fState->penY); 583 BRect bounds; 584 font_height height; 585 586 font.GetHeight(&height); 587 588 bounds.left = start.x; 589 bounds.right = end.x; 590 bounds.top = start.y - height.ascent; 591 bounds.bottom = end.y + height.descent; 592 593 TextSegment* segment = new TextSegment(utf8.String(), start, escapementSpace, 594 escapementNoSpace, &bounds, &font, pdfSystem()); 595 596 fTextLine.Add(segment); 597} 598 599 600bool 601PDFWriter::EmbedFont(const char* name) 602{ 603 static FontFile* cache = NULL; 604 if (cache && strcmp(cache->Name(), name) == 0) return cache->Embed(); 605 606 const int n = fFonts->Length(); 607 for (int i = 0; i < n; i++) { 608 FontFile* f = fFonts->At(i); 609 if (strcmp(f->Name(), name) == 0) { 610 cache = f; 611 return f->Embed(); 612 } 613 } 614 return false; 615} 616