1/*
2 * Copyright 2004-2010, Axel D��rfler, axeld@pinc-software.de.
3 * Distributed under the terms of the MIT License.
4 */
5
6
7#include "RTF.h"
8
9#include <ctype.h>
10#include <stdio.h>
11#include <stdlib.h>
12#include <string.h>
13
14#include <DataIO.h>
15
16
17//#define TRACE_RTF
18#ifdef TRACE_RTF
19#	define TRACE(x...) printf(x)
20#else
21#	define TRACE(x...) ;
22#endif
23
24
25static const char *kDestinationControlWords[] = {
26	"aftncn", "aftnsep", "aftnsepc", "annotation", "atnauthor", "atndate",
27	"atnicn", "atnid", "atnparent", "atnref", "atntime", "atrfend",
28	"atrfstart", "author", "background", "bkmkend", "buptim", "colortbl",
29	"comment", "creatim", "do", "doccomm", "docvar", "fonttbl", "footer",
30	"footerf", "footerl", "footerr", "footnote", "ftncn", "ftnsep",
31	"ftnsepc", "header", "headerf", "headerl", "headerr", "info",
32	"keywords", "operator", "pict", "printim", "private1", "revtim",
33	"rxe", "stylesheet", "subject", "tc", "title", "txe", "xe",
34};
35
36static char read_char(BDataIO &stream, bool endOfFileAllowed = false);
37static int32 parse_integer(char first, BDataIO &stream, char &_last, int32 base = 10);
38
39
40using namespace RTF;
41
42
43static char
44read_char(BDataIO &stream, bool endOfFileAllowed)
45{
46	char c;
47	ssize_t bytesRead = stream.Read(&c, 1);
48
49	if (bytesRead < B_OK)
50		throw (status_t)bytesRead;
51
52	if (bytesRead == 0 && !endOfFileAllowed)
53		throw (status_t)B_ERROR;
54
55	return c;
56}
57
58
59static int32
60parse_integer(char first, BDataIO &stream, char &_last, int32 base)
61{
62	const char *kDigits = "0123456789abcdef";
63	int32 integer = 0;
64	int32 count = 0;
65
66	char digit = first;
67
68	if (digit == '\0')
69		digit = read_char(stream);
70
71	while (true) {
72		int32 pos = 0;
73		for (; pos < base; pos++) {
74			if (kDigits[pos] == tolower(digit)) {
75				integer = integer * base + pos;
76				count++;
77				break;
78			}
79		}
80		if (pos == base) {
81			_last = digit;
82			goto out;
83		}
84
85		digit = read_char(stream);
86	}
87
88out:
89	if (count == 0)
90		throw (status_t)B_BAD_TYPE;
91
92	return integer;
93}
94
95
96static int
97string_array_compare(const char *key, const char **array)
98{
99	return strcmp(key, array[0]);
100}
101
102
103static void
104dump(Element &element, int32 level = 0)
105{
106	printf("%03" B_PRId32 " (%p):", level, &element);
107	for (int32 i = 0; i < level; i++)
108		printf("  ");
109
110	if (RTF::Header *header = dynamic_cast<RTF::Header *>(&element)) {
111		printf("<RTF header, major version %" B_PRId32 ">\n", header->Version());
112	} else if (RTF::Command *command = dynamic_cast<RTF::Command *>(&element)) {
113		printf("<Command: %s", command->Name());
114		if (command->HasOption())
115			printf(", Option %" B_PRId32, command->Option());
116		puts(">");
117	} else if (RTF::Text *text = dynamic_cast<RTF::Text *>(&element)) {
118		printf("<Text>");
119		puts(text->String());
120	} else if (RTF::Group *group = dynamic_cast<RTF::Group *>(&element))
121		printf("<Group \"%s\">\n", group->Name());
122
123	if (RTF::Group *group = dynamic_cast<RTF::Group *>(&element)) {
124		for (uint32 i = 0; i < group->CountElements(); i++)
125			dump(*group->ElementAt(i), level + 1);
126	}
127}
128
129
130//	#pragma mark -
131
132
133Parser::Parser(BPositionIO &stream)
134	:
135	fStream(&stream, 65536, false),
136	fIdentified(false)
137{
138}
139
140
141status_t
142Parser::Identify()
143{
144	char header[5];
145	if (fStream.Read(header, sizeof(header)) < (ssize_t)sizeof(header))
146		return B_IO_ERROR;
147
148	if (strncmp(header, "{\\rtf", 5))
149		return B_BAD_TYPE;
150
151	fIdentified = true;
152	return B_OK;
153}
154
155
156status_t
157Parser::Parse(Header &header)
158{
159	if (!fIdentified && Identify() != B_OK)
160		return B_BAD_TYPE;
161
162	try {
163		int32 openBrackets = 1;
164
165		// since we already preparsed parts of the RTF header, the header
166		// is handled here directly
167		char last;
168		header.Parse('\0', fStream, last);
169
170		Group *parent = &header;
171		char c = last;
172
173		while (true) {
174			Element *element = NULL;
175
176			// we'll just ignore the end of the stream
177			if (parent == NULL)
178				return B_OK;
179
180			switch (c) {
181				case '{':
182					openBrackets++;
183					parent->AddElement(element = new Group());
184					parent = static_cast<Group *>(element);
185					break;
186
187				case '\\':
188					parent->AddElement(element = new Command());
189					break;
190
191				case '}':
192					openBrackets--;
193					parent->DetermineDestination();
194					parent = parent->Parent();
195					// supposed to fall through
196				case '\n':
197				case '\r':
198				{
199					ssize_t bytesRead = fStream.Read(&c, 1);
200					if (bytesRead < B_OK)
201						throw (status_t)bytesRead;
202					else if (bytesRead != 1) {
203						// this is the only valid exit status
204						if (openBrackets == 0)
205							return B_OK;
206
207						throw (status_t)B_ERROR;
208					}
209					continue;
210				}
211
212				default:
213					parent->AddElement(element = new Text());
214					break;
215			}
216
217			if (element == NULL)
218				throw (status_t)B_ERROR;
219
220			element->Parse(c, fStream, last);
221			c = last;
222		}
223	} catch (status_t status) {
224		return status;
225	}
226
227	return B_OK;
228}
229
230
231//	#pragma mark -
232
233
234Element::Element()
235	:
236	fParent(NULL)
237{
238}
239
240
241Element::~Element()
242{
243}
244
245
246void
247Element::SetParent(Group *parent)
248{
249	fParent = parent;
250}
251
252
253Group *
254Element::Parent() const
255{
256	return fParent;
257}
258
259
260bool
261Element::IsDefinitionDelimiter()
262{
263	return false;
264}
265
266
267void
268Element::PrintToStream(int32 level)
269{
270	dump(*this, level);
271}
272
273
274//	#pragma mark -
275
276
277Group::Group()
278	:
279	fDestination(TEXT_DESTINATION)
280{
281}
282
283
284Group::~Group()
285{
286	Element *element;
287	while ((element = (Element *)fElements.RemoveItem((int32)0)) != NULL) {
288		delete element;
289	}
290}
291
292
293void
294Group::Parse(char first, BDataIO &stream, char &last)
295{
296	if (first == '\0')
297		first = read_char(stream);
298
299	if (first != '{')
300		throw (status_t)B_BAD_TYPE;
301
302	last = read_char(stream);
303}
304
305
306status_t
307Group::AddElement(Element *element)
308{
309	if (element == NULL)
310		return B_BAD_VALUE;
311
312	if (fElements.AddItem(element)) {
313		element->SetParent(this);
314		return B_OK;
315	}
316
317	return B_NO_MEMORY;
318}
319
320
321uint32
322Group::CountElements() const
323{
324	return (uint32)fElements.CountItems();
325}
326
327
328Element *
329Group::ElementAt(uint32 index) const
330{
331	return static_cast<Element *>(fElements.ItemAt(index));
332}
333
334
335Element *
336Group::FindDefinitionStart(int32 index, int32 *_startIndex) const
337{
338	if (index < 0)
339		return NULL;
340
341	Element *element;
342	int32 number = 0;
343	for (uint32 i = 0; (element = ElementAt(i)) != NULL; i++) {
344		if (number == index) {
345			if (_startIndex)
346				*_startIndex = i;
347			return element;
348		}
349
350		if (element->IsDefinitionDelimiter())
351			number++;
352	}
353
354	return NULL;
355}
356
357
358Command *
359Group::FindDefinition(const char *name, int32 index) const
360{
361	int32 startIndex;
362	Element *element = FindDefinitionStart(index, &startIndex);
363	if (element == NULL)
364		return NULL;
365
366	for (uint32 i = startIndex; (element = ElementAt(i)) != NULL; i++) {
367		if (element->IsDefinitionDelimiter())
368			break;
369
370		if (Command *command = dynamic_cast<Command *>(element)) {
371			if (command != NULL && !strcmp(name, command->Name()))
372				return command;
373		}
374	}
375
376	return NULL;
377}
378
379
380Group *
381Group::FindGroup(const char *name) const
382{
383	Element *element;
384	for (uint32 i = 0; (element = ElementAt(i)) != NULL; i++) {
385		Group *group = dynamic_cast<Group *>(element);
386		if (group == NULL)
387			continue;
388
389		Command *command = dynamic_cast<Command *>(group->ElementAt(0));
390		if (command != NULL && !strcmp(name, command->Name()))
391			return group;
392	}
393
394	return NULL;
395}
396
397
398const char *
399Group::Name() const
400{
401	Command *command = dynamic_cast<Command *>(ElementAt(0));
402	if (command != NULL)
403		return command->Name();
404
405	return NULL;
406}
407
408
409void
410Group::DetermineDestination()
411{
412	const char *name = Name();
413	if (name == NULL)
414		return;
415
416	if (!strcmp(name, "*")) {
417		fDestination = COMMENT_DESTINATION;
418		return;
419	}
420
421	// binary search for destination control words
422
423	if (bsearch(name, kDestinationControlWords,
424			sizeof(kDestinationControlWords) / sizeof(kDestinationControlWords[0]),
425			sizeof(kDestinationControlWords[0]),
426			(int (*)(const void *, const void *))string_array_compare) != NULL)
427		fDestination = OTHER_DESTINATION;
428}
429
430
431group_destination
432Group::Destination() const
433{
434	return fDestination;
435}
436
437
438//	#pragma mark -
439
440
441Header::Header()
442	:
443	fVersion(0)
444{
445}
446
447
448Header::~Header()
449{
450}
451
452
453void
454Header::Parse(char first, BDataIO &stream, char &last)
455{
456	// The stream has been peeked into by the parser already, and
457	// only the version follows in the stream -- let's pick it up
458
459	fVersion = parse_integer(first, stream, last);
460
461	// recreate "rtf" command to name this group
462
463	Command *command = new Command();
464	command->SetName("rtf");
465	command->SetOption(fVersion);
466
467	AddElement(command);
468}
469
470
471int32
472Header::Version() const
473{
474	return fVersion;
475}
476
477
478const char *
479Header::Charset() const
480{
481	Command *command = dynamic_cast<Command *>(ElementAt(1));
482	if (command == NULL)
483		return NULL;
484
485	return command->Name();
486}
487
488
489rgb_color
490Header::Color(int32 index)
491{
492	rgb_color color = {0, 0, 0, 255};
493
494	Group *colorTable = FindGroup("colortbl");
495
496	if (colorTable != NULL) {
497		if (Command *gun = colorTable->FindDefinition("red", index))
498			color.red = gun->Option();
499		if (Command *gun = colorTable->FindDefinition("green", index))
500			color.green = gun->Option();
501		if (Command *gun = colorTable->FindDefinition("blue", index))
502			color.blue = gun->Option();
503	}
504
505	return color;
506}
507
508
509//	#pragma mark -
510
511
512Text::Text()
513{
514}
515
516
517Text::~Text()
518{
519	SetTo(NULL);
520}
521
522
523bool
524Text::IsDefinitionDelimiter()
525{
526	return fText == ";";
527}
528
529
530void
531Text::Parse(char first, BDataIO &stream, char &last)
532{
533	char c = first;
534	if (c == '\0')
535		c = read_char(stream);
536
537	if (c == ';') {
538		// definition delimiter
539		fText.SetTo(";");
540		last = read_char(stream);
541		return;
542	}
543
544	const size_t kBufferSteps = 1;
545	size_t maxSize = kBufferSteps;
546	char *text = fText.LockBuffer(maxSize);
547	if (text == NULL)
548		throw (status_t)B_NO_MEMORY;
549
550	size_t position = 0;
551
552	while (true) {
553		if (c == '\\' || c == '}' || c == '{' || c == ';' || c == '\n' || c == '\r')
554			break;
555
556		if (position >= maxSize) {
557			fText.UnlockBuffer(position);
558			text = fText.LockBuffer(maxSize += kBufferSteps);
559			if (text == NULL)
560				throw (status_t)B_NO_MEMORY;
561		}
562
563		text[position++] = c;
564
565		c = read_char(stream);
566	}
567	fText.UnlockBuffer(position);
568
569	// ToDo: add support for different charsets - right now, only ASCII is supported!
570	//	To achieve this, we should just translate everything into UTF-8 here
571
572	last = c;
573}
574
575
576status_t
577Text::SetTo(const char *text)
578{
579	return fText.SetTo(text) != NULL ? B_OK : B_NO_MEMORY;
580}
581
582
583const char *
584Text::String() const
585{
586	return fText.String();
587}
588
589
590uint32
591Text::Length() const
592{
593	return fText.Length();
594}
595
596
597//	#pragma mark -
598
599
600Command::Command()
601	:
602	fName(NULL),
603	fHasOption(false),
604	fOption(-1)
605{
606}
607
608
609Command::~Command()
610{
611}
612
613
614void
615Command::Parse(char first, BDataIO &stream, char &last)
616{
617	if (first == '\0')
618		first = read_char(stream);
619
620	if (first != '\\')
621		throw (status_t)B_BAD_TYPE;
622
623	// get name
624	char name[kCommandLength];
625	size_t length = 0;
626	char c;
627	while (isalpha(c = read_char(stream))) {
628		name[length++] = c;
629		if (length >= kCommandLength - 1)
630			throw (status_t)B_BAD_TYPE;
631	}
632
633	if (length == 0) {
634		if (c == '\n' || c == '\r') {
635			// we're a hard return
636			fName.SetTo("par");
637		} else
638			fName.SetTo(c, 1);
639
640		// read over character
641		c = read_char(stream);
642	} else
643		fName.SetTo(name, length);
644
645	TRACE("command: %s\n", fName.String());
646
647	// parse numeric option
648
649	if (c == '-')
650		c = read_char(stream);
651
652	last = c;
653
654	if (fName == "'") {
655		// hexadecimal
656		char bytes[2];
657		bytes[0] = read_char(stream);
658		bytes[1] = '\0';
659		BMemoryIO memory(bytes, 2);
660
661		SetOption(parse_integer(c, memory, last, 16));
662		last = read_char(stream);
663	} else {
664		// decimal
665		if (isdigit(c))
666			SetOption(parse_integer(c, stream, last));
667
668		// a space delimiter is eaten up by the command
669		if (isspace(last))
670			last = read_char(stream);
671	}
672
673	if (HasOption())
674		TRACE("  option: %ld\n", fOption);
675}
676
677
678status_t
679Command::SetName(const char *name)
680{
681	return fName.SetTo(name) != NULL ? B_OK : B_NO_MEMORY;
682}
683
684
685const char *
686Command::Name()
687{
688	return fName.String();
689}
690
691
692void
693Command::UnsetOption()
694{
695	fHasOption = false;
696	fOption = -1;
697}
698
699
700void
701Command::SetOption(int32 option)
702{
703	fOption = option;
704	fHasOption = true;
705}
706
707
708bool
709Command::HasOption() const
710{
711	return fHasOption;
712}
713
714
715int32
716Command::Option() const
717{
718	return fOption;
719}
720
721
722//	#pragma mark -
723
724
725Iterator::Iterator(Element &start, group_destination destination)
726{
727	SetTo(start, destination);
728}
729
730
731void
732Iterator::SetTo(Element &start, group_destination destination)
733{
734	fStart = &start;
735	fDestination = destination;
736
737	Rewind();
738}
739
740
741void
742Iterator::Rewind()
743{
744	fStack.MakeEmpty();
745	fStack.Push(fStart);
746}
747
748
749bool
750Iterator::HasNext() const
751{
752	return !fStack.IsEmpty();
753}
754
755
756Element *
757Iterator::Next()
758{
759	Element *element;
760
761	if (!fStack.Pop(&element))
762		return NULL;
763
764	Group *group = dynamic_cast<Group *>(element);
765	if (group != NULL
766		&& (fDestination == ALL_DESTINATIONS
767			|| fDestination == group->Destination())) {
768		// put this group's children on the stack in
769		// reverse order, so that we iterate over
770		// the tree in in-order
771
772		for (int32 i = group->CountElements(); i-- > 0;) {
773			fStack.Push(group->ElementAt(i));
774		}
775	}
776
777	return element;
778}
779
780
781//	#pragma mark -
782
783
784Worker::Worker(RTF::Header &start)
785	:
786	fStart(start)
787{
788}
789
790
791Worker::~Worker()
792{
793}
794
795
796void
797Worker::Dispatch(Element *element)
798{
799	if (RTF::Group *group = dynamic_cast<RTF::Group *>(element)) {
800		fSkip = false;
801		Group(group);
802
803		if (fSkip)
804			return;
805
806		for (int32 i = 0; (element = group->ElementAt(i)) != NULL; i++)
807			Dispatch(element);
808
809		GroupEnd(group);
810	} else if (RTF::Command *command = dynamic_cast<RTF::Command *>(element)) {
811		Command(command);
812	} else if (RTF::Text *text = dynamic_cast<RTF::Text *>(element)) {
813		Text(text);
814	}
815}
816
817
818void
819Worker::Work()
820{
821	Dispatch(&fStart);
822}
823
824
825void
826Worker::Group(RTF::Group *group)
827{
828}
829
830
831void
832Worker::GroupEnd(RTF::Group *group)
833{
834}
835
836
837void
838Worker::Command(RTF::Command *command)
839{
840}
841
842
843void
844Worker::Text(RTF::Text *text)
845{
846}
847
848
849RTF::Header &
850Worker::Start()
851{
852	return fStart;
853}
854
855
856void
857Worker::Skip()
858{
859	fSkip = true;
860}
861
862
863void
864Worker::Abort(status_t status)
865{
866	throw status;
867}
868
869