1/*
2 * Copyright 2001-2009, Haiku, Inc. All Rights Reserved.
3 * Distributed under the terms of the MIT License.
4 *
5 * Authors:
6 *		Philippe Houdoin
7 *		Simon Gauvin
8 *		Michael Pfeiffer
9 */
10
11#include <stdio.h>
12#include <string.h>
13#include <math.h>
14
15#include <Debug.h>
16#include <StorageKit.h>
17#include <TranslationKit.h>
18#include <support/UTF8.h>
19
20#include "PDFWriter.h"
21#include "Link.h"
22#include "Bookmark.h"
23#include "DrawShape.h"
24#include "XReferences.h"
25#include "Log.h"
26#include "Report.h"
27#include "pdflib.h"
28
29
30typedef struct {
31	uint16 from;
32	uint16 to;
33	int16  length;
34	uint16 *unicodes;
35} unicode_to_encoding;
36
37typedef struct {
38	uint16 unicode;
39	uint16 cid;
40} unicode_to_cid;
41
42typedef struct {
43	uint16         length;
44	unicode_to_cid *table;
45} cid_table;
46
47#ifdef UNICODE5_FROM
48#	error check code!
49#endif
50
51#define ELEMS(v, e) sizeof(v) / sizeof(e)
52
53// Adobe Glyph List
54#include "enc_range.h"
55#include "unicode0.h"
56#include "unicode1.h"
57#include "unicode2.h"
58#include "unicode3.h"
59#include "unicode4.h"
60
61
62static unicode_to_encoding encodings[] = {
63	{UNICODE0_FROM, UNICODE0_TO, ELEMS(unicode0, uint16), unicode0},
64	{UNICODE1_FROM, UNICODE1_TO, ELEMS(unicode1, uint16), unicode1},
65	{UNICODE2_FROM, UNICODE2_TO, ELEMS(unicode2, uint16), unicode2},
66	{UNICODE3_FROM, UNICODE3_TO, ELEMS(unicode3, uint16), unicode3},
67	{UNICODE4_FROM, UNICODE4_TO, ELEMS(unicode4, uint16), unicode4}
68};
69
70// unicode to cid
71#include "japanese.h"
72#include "gb1.h"
73#include "cns1.h"
74#include "korean.h"
75
76
77static cid_table cid_tables[] = {
78	{ELEMS(japanese, unicode_to_cid), japanese},
79	{ELEMS(CNS1,     unicode_to_cid), CNS1},
80	{ELEMS(GB1,      unicode_to_cid), GB1},
81	{ELEMS(korean,   unicode_to_cid), korean}
82};
83
84static const char* encoding_names[] = {
85	"macroman",
86	// TrueType
87	"ttenc0",
88	"ttenc1",
89	"ttenc2",
90	"ttenc3",
91	"ttenc4",
92	// Type 1
93	"t1enc0",
94	"t1enc1",
95	"t1enc2",
96	"t1enc3",
97	"t1enc4",
98	// CJK
99	"UniJIS-UCS2-H",
100	"UniCNS-UCS2-H",
101	"UniGB-UCS2-H",
102	"UniKS-UCS2-H"
103};
104
105
106//	#pragma mark -
107
108
109static bool
110find_encoding(uint16 unicode, uint8 &encoding, uint16 &index)
111{
112	for (unsigned int i = 0; i < ELEMS(encodings, unicode_to_encoding); i++) {
113		if (encodings[i].from <= unicode && unicode <= encodings[i].to) {
114			int16 bottom = 0;
115			int16 top = encodings[i].length-1;
116			uint16* codes = encodings[i].unicodes;
117			while (top >= bottom) {
118			    int16 m = (top + bottom) / 2;
119				if (unicode < codes[m]) {
120					top = m-1;
121				} else if (unicode > codes[m]) {
122					bottom = m+1;
123				} else {
124					index = m;
125					encoding = i;
126					return true;
127				}
128			}
129		    return false;
130		}
131	}
132	return false;
133}
134
135
136static bool
137find_in_cid_tables(uint16 unicode, font_encoding &encoding, uint16 &index,
138	font_encoding* order)
139{
140	for (unsigned int i = 0; i < ELEMS(cid_tables, cid_table); i++) {
141		encoding = order[i];
142		if (encoding == invalid_encoding) break;
143		int index = encoding - first_cjk_encoding;
144		int32 bottom = 0;
145		int32 top = cid_tables[index].length-1;
146		unicode_to_cid *table = cid_tables[index].table;
147		while (top >= bottom) {
148		    int32 m = (top + bottom) / 2;
149			if (unicode < table[m].unicode) {
150				top = m-1;
151			} else if (unicode > table[m].unicode) {
152				bottom = m+1;
153			} else {
154				index = table[m].cid;
155				return true;
156			}
157		}
158	}
159	return false;
160}
161
162
163void
164PDFWriter::MakeUserDefinedEncoding(uint16 unicode, uint8 &enc, uint8 &index)
165{
166	if (fUserDefinedEncodings.Get(unicode, enc, index)) {
167		BString s("user");
168		s << (int)enc;
169		PDF_encoding_set_char(fPdf, s.String(), (int)index, NULL, (int)unicode);
170	}
171}
172
173
174void
175PDFWriter::RecordFont(const char* family, const char* style, float size)
176{
177	const int32 n = fUsedFonts.CountItems();
178	for (int32 i = 0; i < n; i ++) {
179		if (fUsedFonts.ItemAt(i)->Equals(family, style, size)) return;
180	}
181
182	UsedFont* font;
183	font = new UsedFont(family, style, size);
184	fUsedFonts.AddItem(font);
185
186	REPORT(kInfo, -1, "Used font: \"%s\" \"%s\" %f", family, style, size);
187}
188
189
190void
191PDFWriter::GetFontName(BFont *font, char *fontname)
192{
193	font_family family;
194	font_style  style;
195
196	font->GetFamilyAndStyle(&family, &style);
197	strcat(strcat(strcpy(fontname, family), "-"), style);
198
199	RecordFont(family, style, font->Size());
200}
201
202
203void
204PDFWriter::GetFontName(BFont *font, char *fontname, bool &embed,
205	font_encoding encoding)
206{
207	GetFontName(font, fontname);
208
209	switch (encoding) {
210		case japanese_encoding:
211			strcpy(fontname, "HeiseiMin-W3"); return;
212		case chinese_cns1_encoding:
213			strcpy(fontname, "MHei-Medium"); return;
214		case chinese_gb1_encoding:
215			strcpy(fontname, "STSong-Light"); return;
216		case korean_encoding:
217			strcpy(fontname, "HYGoThic-Medium"); return;
218		default:;
219	}
220}
221
222
223int
224PDFWriter::FindFont(char* fontName, bool embed, font_encoding encoding)
225{
226	static Font* cache = NULL;
227	if (cache && cache->encoding == encoding
228		&& strcmp(cache->name.String(), fontName) == 0)
229		return cache->font;
230
231	REPORT(kDebug, fPage, "FindFont %s", fontName);
232	Font *f = NULL;
233	const int n = fFontCache.CountItems();
234	for (int i = 0; i < n; i++) {
235		f = fFontCache.ItemAt(i);
236		if (f->encoding == encoding && strcmp(f->name.String(), fontName) == 0) {
237			cache = f;
238			return f->font;
239		}
240	}
241
242	if (embed) embed = EmbedFont(fontName);
243
244	BString s;
245	const char* encoding_name;
246	if (encoding < user_defined_encoding_start) {
247		encoding_name = encoding_names[encoding];
248	} else {
249		s = "user";
250		s << (int)(encoding - user_defined_encoding_start);
251		encoding_name = s.String();
252	}
253	REPORT(kDebug, fPage, "Create new font, %sembed, encoding %s",
254		embed ? "" : "do not ", encoding_name);
255	int font = PDF_findfont(fPdf, fontName, encoding_name, embed);
256	if (font != -1) {
257		REPORT(kDebug, fPage, "font created");
258		cache = new Font(fontName, font, encoding);
259		fFontCache.AddItem(cache);
260	} else {
261		REPORT(kError, fPage, "Could not create font '%s': %s", fontName,
262			PDF_get_errmsg(fPdf));
263	}
264	return font;
265}
266
267
268void
269PDFWriter::ToUtf8(uint32 encoding, const char *string, BString &utf8)
270{
271	int32 len = strlen(string);
272	int32 srcLen = len, destLen = 255;
273	int32 state = 0;
274	char buffer[256];
275	int32 srcStart = 0;
276
277	do {
278		convert_to_utf8(encoding, &string[srcStart], &srcLen, buffer, &destLen,
279			&state);
280		srcStart += srcLen;
281		len -= srcLen;
282		srcLen = len;
283
284		utf8.Append(buffer, destLen);
285		destLen = 255;
286	} while (len > 0);
287};
288
289
290void
291PDFWriter::ToUnicode(const char *string, BString &unicode)
292{
293	int32 len = strlen(string);
294	int32 srcLen = len, destLen = 255;
295	int32 state = 0;
296	char buffer[256];
297	int32 srcStart = 0;
298	int i = 0;
299
300	unicode = "";
301	if (len == 0) return;
302
303	do {
304		convert_from_utf8(B_UNICODE_CONVERSION, &string[srcStart], &srcLen,
305			buffer, &destLen, &state);
306		srcStart += srcLen;
307		len -= srcLen;
308		srcLen = len;
309
310		char *b = unicode.LockBuffer(i + destLen);
311		memcpy(&b[i], buffer, destLen);
312		unicode.UnlockBuffer(i + destLen);
313		i += destLen;
314		destLen = 255;
315	} while (len > 0);
316}
317
318
319void
320PDFWriter::ToPDFUnicode(const char *string, BString &unicode)
321{
322	// PDFlib requires BOM at begin and two 0 at end of string
323	char marker[3] = { 0xfe, 0xff, 0}; // byte order marker
324	BString s;
325	ToUnicode(string, s);
326	unicode << marker;
327	int32 len = s.Length()+2;
328	char* buf = unicode.LockBuffer(len + 2);
329		// reserve space for two additional '\0'
330	memcpy(&buf[2], s.String(), s.Length());
331	buf[len] = buf[len+1] = 0;
332	unicode.UnlockBuffer(len + 2);
333}
334
335
336uint16
337PDFWriter::CodePointSize(const char* s)
338{
339	uint16 i = 1;
340	for (s++; !BeginsChar(*s); s++) i++;
341	return i;
342}
343
344
345void
346PDFWriter::RecordDests(const char* s)
347{
348	::RecordDests record(fXRefDests, &fTextLine, fPage);
349	fXRefs->Matches(s, &record, true);
350}
351
352
353void
354PDFWriter::DrawChar(uint16 unicode, const char* utf8, int16 size)
355{
356	// try to convert from utf8 to MacRoman encoding schema...
357	int32 srcLen  = size;
358	int32 destLen = 1;
359	char dest[3] = "\0\0";
360	int32 state = 0;
361	bool embed = true;
362	font_encoding encoding = macroman_encoding;
363	char fontName[B_FONT_FAMILY_LENGTH+B_FONT_STYLE_LENGTH+1];
364
365	if (convert_from_utf8(B_MAC_ROMAN_CONVERSION, utf8, &srcLen, dest, &destLen,
366			&state, 0) != B_OK || dest[0] == 0) {
367		// could not convert to MacRoman
368		font_encoding fenc;
369		uint16 index = 0;
370		uint8 enc;
371
372		GetFontName(&fState->beFont, fontName);
373		embed = EmbedFont(fontName);
374
375		REPORT(kDebug, -1, "find_encoding unicode %d\n", (int)unicode);
376		if (find_encoding(unicode, enc, index)) {
377			// is code point in the Adobe Glyph List?
378			// Note if rendering the glyphs only would be desired, we could
379			// always use the second method below (MakeUserDefinedEncoding),
380			// but extracting text from the generated PDF would be almost
381			// impossible (OCR!)
382			REPORT(kDebug, -1, "encoding for %x -> %d %d", unicode, (int)enc,
383				(int)index);
384			// use one of the user pre-defined encodings
385			if (fState->beFont.FileFormat() == B_TRUETYPE_WINDOWS) {
386				encoding = font_encoding(enc + tt_encoding0);
387			} else {
388				encoding = font_encoding(enc + t1_encoding0);
389			}
390			*dest = index;
391		} else if (embed) {
392			// if the font is embedded, create a user defined encoding at runtime
393			uint8 index;
394			MakeUserDefinedEncoding(unicode, enc, index);
395			*dest = index;
396			encoding = font_encoding(user_defined_encoding_start + enc);
397		} else if (find_in_cid_tables(unicode, fenc, index, fFontSearchOrder)) {
398			// font is not embedded use one of the CJK fonts for substitution
399			REPORT(kDebug, -1, "cid table %d index = %d", (int)fenc, (int)index);
400			dest[0] = unicode / 256;
401			dest[1] = unicode % 256;
402			destLen = 2;
403			encoding = fenc;
404			embed = false;
405		} else {
406			static bool found = false;
407			REPORT(kDebug, -1, "encoding for %x not found!", (int)unicode);
408			if (!found) {
409				found = true;
410				REPORT(kError, fPage, "Could not find an encoding for character "
411					"with unicode %d! Message is not repeated for other unicode "
412					"values.", (int)unicode);
413			}
414			*dest = 0; // paint a box (is 0 a box in MacRoman) or
415			return; // simply skip character
416		}
417	} else {
418		REPORT(kDebug, -1, "macroman srcLen=%d destLen=%d dest= %d %d!", srcLen,
419			destLen, (int)dest[0], (int)dest[1]);
420	}
421
422	// Note we have to build the user defined encoding before it is used in
423	// PDF_find_font!
424	if (!MakesPDF()) return;
425
426	int		font;
427
428	GetFontName(&fState->beFont, fontName, embed, encoding);
429	font = FindFont(fontName, embed, encoding);
430	if (font < 0) {
431		REPORT(kWarning, fPage, "**** PDF_findfont(%s) failed, back to default "
432			"font", fontName);
433		font = PDF_findfont(fPdf, "Helvetica", "macroman", 0);
434	}
435
436	fState->font = font;
437
438	uint16 face = fState->beFont.Face();
439	PDF_set_parameter(fPdf, "underline", (face & B_UNDERSCORE_FACE) != 0
440		? "true" : "false");
441	PDF_set_parameter(fPdf, "strikeout", (face & B_STRIKEOUT_FACE) != 0
442		? "true" : "false");
443	PDF_set_value(fPdf, "textrendering", (face & B_OUTLINED_FACE) != 0 ? 1 : 0);
444
445	PDF_setfont(fPdf, fState->font, scale(fState->beFont.Size()));
446
447	const float x = tx(fState->penX);
448	const float y = ty(fState->penY);
449	const float rotation = fState->beFont.Rotation();
450	const bool rotate = rotation != 0.0;
451
452	if (rotate) {
453		PDF_save(fPdf);
454		PDF_translate(fPdf, x, y);
455		PDF_rotate(fPdf, rotation);
456	    PDF_set_text_pos(fPdf, 0, 0);
457	} else
458	    PDF_set_text_pos(fPdf, x, y);
459
460	PDF_show2(fPdf, dest, destLen);
461
462	if (rotate) {
463		PDF_restore(fPdf);
464	}
465}
466
467
468void
469PDFWriter::ClipChar(BFont* font, const char* unicode, const char* utf8,
470	int16 size, float width)
471{
472	BShape glyph;
473	bool hasGlyph[1];
474	font->GetHasGlyphs(utf8, 1, hasGlyph);
475	if (hasGlyph[0]) {
476		BShape *glyphs[1];
477		glyphs[0] = &glyph;
478		font->GetGlyphShapes(utf8, 1, glyphs);
479	} else {
480		REPORT(kWarning, fPage, "glyph for %*.*s not found!", size, size, utf8);
481		// create a rectangle instead
482		font_height height;
483		fState->beFont.GetHeight(&height);
484		BRect r(0, 0, width, height.ascent);
485		float w = r.Width() < r.Height() ? r.Width()*0.1 : r.Height()*0.1;
486		BRect o = r; o.InsetBy(w, w);
487		w *= 2.0;
488		BRect i = r; i.InsetBy(w, w);
489
490		o.OffsetBy(0, -height.ascent);
491		i.OffsetBy(0, -height.ascent);
492
493		glyph.MoveTo(BPoint(o.left,  o.top));
494		glyph.LineTo(BPoint(o.right, o.top));
495		glyph.LineTo(BPoint(o.right, o.bottom));
496		glyph.LineTo(BPoint(o.left,  o.bottom));
497		glyph.Close();
498
499		glyph.MoveTo(BPoint(i.left,  i.top));
500		glyph.LineTo(BPoint(i.left,  i.bottom));
501		glyph.LineTo(BPoint(i.right, i.bottom));
502		glyph.LineTo(BPoint(i.right, i.top));
503		glyph.Close();
504	}
505
506	BPoint p(fState->penX, fState->penY);
507	PushInternalState(); SetOrigin(p);
508	{
509		DrawShape iterator(this, false);
510		iterator.Iterate(&glyph);
511	}
512	PopInternalState();
513}
514
515
516void
517PDFWriter::DrawString(char *string, float escapementNoSpace,
518	float escapementSpace)
519{
520	REPORT(kDebug, fPage, "DrawString string=\"%s\", escapementNoSpace=%f, "
521		"escapementSpace=%f, at %f, %f", string, escapementNoSpace,
522		escapementSpace, fState->penX, fState->penY);
523
524	if (IsDrawing()) {
525		// text color is always the high color and not the pattern!
526		SetColor(fState->foregroundColor);
527	}
528	// convert string to UTF8
529	BString utf8;
530	if (fState->beFont.Encoding() == B_UNICODE_UTF8) {
531		utf8 = string;
532	} else {
533		ToUtf8(fState->beFont.Encoding()-1, string, utf8);
534	}
535
536	// convert string in UTF8 to unicode UCS2
537	BString unicode;
538	ToUnicode(utf8.String(), unicode);
539	// need font object to calculate width of utf8 code point
540	BFont font = fState->beFont;
541	font.SetEncoding(B_UNICODE_UTF8);
542	// constants to calculate position of next character
543	const double rotation = DEGREE2RAD(fState->beFont.Rotation());
544	const bool rotate = rotation != 0.0;
545	const double cos1 = rotate ? cos(rotation) : 1;
546	const double sin1 = rotate ? -sin(rotation) : 0;
547
548	BPoint start(fState->penX, fState->penY);
549
550	BeginTransparency();
551	// If !MakesPDF() all the effort below just for the bounding box!
552	// draw each character
553	const char *c = utf8.String();
554	const unsigned char *u = (unsigned char*)unicode.String();
555	for (int i = 0; i < unicode.Length(); i += 2) {
556		int s = CodePointSize((char*)c);
557
558		float w = font.StringWidth(c, s);
559
560		if (MakesPDF() && IsClipping()) {
561			ClipChar(&font, (char*)u, c, s, w);
562		} else {
563			DrawChar(u[0]*256+u[1], c, s);
564		}
565
566		// position of next character
567		if (*(unsigned char*)c <= 0x20) { // should test if c is a white-space!
568			w += escapementSpace;
569		} else {
570			w += escapementNoSpace;
571		}
572
573		fState->penX += w * cos1;
574		fState->penY += w * sin1;
575
576		// next character
577		c += s; u += 2;
578	}
579	EndTransparency();
580
581	// text line processing (for non rotated text only!)
582	BPoint end(fState->penX, fState->penY);
583	BRect bounds;
584	font_height height;
585
586	font.GetHeight(&height);
587
588	bounds.left = start.x;
589	bounds.right = end.x;
590	bounds.top = start.y - height.ascent;
591	bounds.bottom = end.y   + height.descent;
592
593	TextSegment* segment = new TextSegment(utf8.String(), start, escapementSpace,
594		escapementNoSpace, &bounds, &font, pdfSystem());
595
596	fTextLine.Add(segment);
597}
598
599
600bool
601PDFWriter::EmbedFont(const char* name)
602{
603	static FontFile* cache = NULL;
604	if (cache && strcmp(cache->Name(), name) == 0) return cache->Embed();
605
606	const int n = fFonts->Length();
607	for (int i = 0; i < n; i++) {
608		FontFile* f = fFonts->At(i);
609		if (strcmp(f->Name(), name) == 0) {
610			cache = f;
611			return f->Embed();
612		}
613	}
614	return false;
615}
616