1# -*- tcl -*-
2# Copyright (c) 2009 Andreas Kupries <andreas_kupries@sourceforge.net>
3
4# Parser for docidx formatted input. The result is a struct::tree
5# repesenting the contents of the document in a structured form.
6
7# - root = index, attributes for title and label.
8# - children of the root = keys of the index, attribute for keyword.
9# - children of the keys = manpage and url references for the key,
10#                          attributes for reference and label.
11#
12# The order of the keywords under root, and of the references under
13# their keyword reflects the order of the information in the parsed
14# document.
15
16# Attributes in the nodes, except root provide location information,
17# i.e. refering from there in the input the information is coming from
18# (human-readable output: line/col for end of token, offset start/end
19# for range covered by token.
20
21# # ## ### ##### ######## ############# #####################
22## Requirements
23
24package require Tcl 8.4                  ; # Required runtime.
25package require doctools::idx::structure ; # Parse Tcl script, like subst.
26package require doctools::msgcat         ; # Error message L10N
27package require doctools::tcl::parse     ; # Parse Tcl script, like subst.
28package require fileutil                 ; # Easy loading of files.
29package require logger                   ; # User feedback.
30package require snit                     ; # OO system.
31package require struct::list             ; # Assign
32package require struct::tree             ; # Internal syntax tree
33
34# # ## ### ##### ######## ############# #####################
35##
36
37logger::initNamespace ::doctools::idx::parse
38snit::type            ::doctools::idx::parse {
39    # # ## ### ##### ######## #############
40    ## Public API
41
42    typemethod file {path} {
43	log::debug [list $type file]
44	return [$type text [fileutil::cat $path] $path]
45    }
46
47    typemethod text {text {path {}}} {
48	log::debug [list $type text]
49
50	set ourfile $path
51
52	array set vars [array get ourvars]
53	array set _file {}
54	ClearErrors
55
56	set t [struct::tree AST]
57
58	Process $t $text [$t rootname] vars _file
59	StopOnErrors
60
61	ReshapeTree $t
62	StopOnErrors
63
64	set serial [Serialize $t]
65	StopOnErrors
66
67	$t destroy
68	return $serial
69    }
70
71    # # ## ### ##### ######## #############
72    ## Manage symbol table (vset variables).
73
74    typemethod vars {} {
75	return [array get ourvars]
76    }
77
78    typemethod {var set} {name value} {
79	set ourvars($name) $value
80	return
81    }
82
83    typemethod {var load} {dict} {
84	array set ourvars $dict
85	return
86    }
87
88    typemethod {var unset} {args} {
89	if {![llength $args]} { lappend args * }
90	foreach pattern $args {
91	    array unset ourvars $pattern
92	}
93	return
94    }
95
96    # # ## ### ##### ######## #############
97    ## Manage search paths for include files.
98
99    typemethod includes {} {
100	return $ourincpaths
101    }
102
103    typemethod {include set} {paths} {
104	set ourincpaths [lsort -uniq $paths]
105	return
106    }
107
108    typemethod {include add} {path} {
109	lappend ourincpaths $path
110	set     ourincpaths [lsort -uniq $ourincpaths]
111	return
112    }
113
114    typemethod {include remove} {path} {
115	set pos [lsearch $ourincpaths $path]
116	if {$pos < 0} return
117	set  ourincpaths [lreplace $ourincpaths $pos $pos]
118	return
119    }
120
121    typemethod {include clear} {} {
122	set ourincpaths {}
123	return
124    }
125
126    # # ## ### ##### ######## #############
127
128    proc Process {t text root vv fv} {
129	upvar 1 $vv vars $fv _file
130
131	DropChildren $t $root
132
133	# Phase 1. Generate the basic syntax tree
134
135	if {[catch {
136	    doctools::tcl::parse text $t $text $root
137	} msg]} {
138	    if {![string match {doctools::tcl::parse *} $::errorCode]} {
139		# Not a parse error, rethrow.
140		return \
141		    -code      error \
142		    -errorcode $::errorCode \
143		    -errorinfo $::errorInfo \
144		    $msg
145	    }
146
147	    # Parse error, low-level syntax breakdown, extract the
148	    # machine-info from the errorCode, and report internally.
149	    # See the documentation of doctools::tcl::parse for the
150	    # definition of the format.
151	    struct::list assign $::errorCode _ msg pos line col
152	    # msg in {eof, char}
153	    ReportAt $_file($root) [list $pos $pos] $line $col docidx/$msg/syntax {}
154	    return 0
155	}
156
157	#doctools::parse::tcl::ShowTreeX $t {Raw Result}
158
159	# Phase 2. Check for errors.
160
161	CheckBasicConstraints  $t $root      _file
162	ResolveVarsAndIncludes $t $root vars _file
163	return 1
164    }
165
166    proc CheckBasicConstraints {t root fv} {
167	::variable ourfile
168	upvar 1 $fv _file
169
170	# Bottom-up walk through the nodes starting at the current
171	# root.
172
173	$t walk $root -type dfs -order pre n {
174	    # Ignore the root node itself. Except for one thing: The
175	    # path information is remembered for the root as well.
176
177	    set _file($n) $ourfile
178	    #puts "_file($n) = $ourfile"
179	    if {$n eq $root} continue
180
181	    switch -exact [$t get $n type] {
182		Text {
183		    # Texts at the top level are irrelevant and
184		    # removed. They have to contain only whitespace as
185		    # well.
186		    if {[$t depth $n] == 1} {
187			if {[regexp {[^[:blank:]\n]} [$t get $n text]]} {
188			    Error $t $n docidx/plaintext
189			}
190			MarkDrop $n
191		    }
192		}
193		Word {
194		    # Word nodes we ignore. They are just argument
195		    # aggregators. They will be gone later, when
196		    # reduce arguments to their text form.
197		}
198		Command {
199		    set cmdname [$t get $n text]
200		    set parens  [$t parent $n]
201
202		    if {$parens eq $root} {
203			set parentt {}
204		    } else {
205			set parentt [$t get $parens type]
206		    }
207		    set nested 0
208
209		    if {($parentt eq "Command") || ($parentt eq "Word")} {
210			# Commands can be children/arguments of other
211			# commands only in very restricted
212			# circumstances => rb, lb, vset/1.
213			set nested 1
214			if {![Nestable $t $n $cmdname errcmdname] && [Legal $cmdname]} {
215			    # Report only legal un-nestable commands.
216			    # Illegal commands get their own report,
217			    # see below.
218			    MakeErrorMsg $t $n docidx/cmd/nested $errcmdname
219			}
220		    }
221
222		    if {![Legal $cmdname]} {
223			# Deletion is safe because we are walking
224			# bottom up. If nested we drop only the
225			# children and replace this node with a fake.
226			if {$nested} {
227			    MakeErrorMsg $t $n docidx/cmd/illegal $cmdname
228			} else {
229			    Error $t $n docidx/cmd/illegal $cmdname
230			    MarkDrop $n
231			}
232
233			continue
234		    }
235
236		    # Check arguments of the legal commands only.
237		    ArgInfo $cmdname min max
238		    set argc [llength [$t children $n]]
239
240		    if {$argc < $min} {
241			MakeErrorMsg $t $n docidx/cmd/wrongargs $cmdname $min
242		    } elseif {$argc > $max} {
243			MakeErrorMsg $t $n docidx/cmd/toomanyargs $cmdname $max
244		    }
245
246		    # Convert the quoting commands for bracket into
247		    # equivalent text nodes, and remove comments.
248		    if {$cmdname eq "lb"} {
249			MakeText $t $n "\["
250		    } elseif {$cmdname eq "rb"} {
251			MakeText $t $n "\]"
252		    } elseif {$cmdname eq "comment"} {
253			# Remove comments or replace with error node (nested).
254			if {$nested} {
255			    MakeError $t $n
256			} else {
257			    MarkDrop $n
258			}
259		    }
260		}
261	    }
262	}
263
264	# Kill the nodes marked for removal now that the walker is not
265	# accessing them any longer.
266	PerformDrop $t
267
268	#doctools::parse::tcl::ShowTreeX $t {Basic Constraints}
269	return
270    }
271
272    proc ResolveVarsAndIncludes {t root vv fv} {
273	upvar 1 $vv vars $fv _file
274
275	# Now resolve include and vset uses ... This has to be done at
276	# the same time, as each include may (re)define variables.
277
278	# Bottom-up walk. Children before parent, and from the left =>
279	# Nested vset uses are resolved in the proper order.
280
281	$t walk $root -type dfs -order post n {
282	    # Ignore the root node itself.
283	    if {$n eq $root} continue
284
285	    set ntype [$t get $n type]
286
287	    switch -exact -- $ntype {
288		Text - Error {
289		    # Ignore these nodes.
290		}
291		Word {
292		    # Children have to be fully converted to Text, or,
293		    # in case of trouble, Error. Aggregate the
294		    # information.
295		    CollapseWord $t $n
296		}
297		Command {
298		    set cmdname [$t get $n text]
299
300		    switch -exact -- $cmdname {
301			vset {
302			    set argv [$t children $n]
303			    switch -exact -- [llength $argv] {
304				1 {
305				    VariableUse $t $n [lindex $argv 0]
306				}
307				2 {
308				    struct::list assign $argv var val
309				    VariableDefine $t $n $var $val
310				}
311			    }
312			    # vset commands at the structural toplevel are
313			    # irrelevant and removed.
314			    if {[$t depth $n] == 1} {
315				MarkDrop $n
316			    }
317			}
318			include {
319			    # Pulls vars, _file from this scope
320			    ProcessInclude $t $n [lindex [$t children $n] 0]
321			}
322			default {
323			    # For all other commands move the argument
324			    # information into an attribute. Errors in
325			    # the argument cause the command to conert
326			    # into an error.
327			    CollapseArguments $t $n
328			}
329		    }
330		}
331	    }
332	}
333
334	# Kill the nodes marked for removal now that the walker is
335	# not accessing them any longer.
336	PerformDrop $t
337
338	#doctools::parse::tcl::ShowTreeX $t {Vars/Includes Resolved}
339	return
340    }
341
342    proc ReshapeTree {t} {
343	upvar 1 _file _file
344
345	# We are assuming that there are no illegal commands in the
346	# tree, and further that all of lb, rb, vset, comment, and
347	# include are gone as well, per the operation of the previous
348	# phases (-> CheckBasicConstraints, ResolveVarsAndIncludes).
349	# The only commands which can occur here are
350	#
351	#     index_begin, index_end, key, manpage, url
352
353	# Grammar:
354	#     INDEX := index_begin KEYS index_end
355	#     KEYS  := { key ITEMS }
356	#     ITEMS := { manpage | url }
357
358	# Hand coded LL(1) parser with explicit state machine. No
359	# stack required for this grammar.
360
361	set root     [$t rootname]
362	set children [$t children $root]
363	lappend children $root
364
365	$t set $root text <EOF>
366	$t set $root range {0 0}
367	$t set $root line  1
368	$t set $root col   0
369
370	set at    {}
371	set state INDEX
372
373	foreach n $children {
374	    set cmdname [$t get $n text]
375	    #puts <$n>|$cmdname|$state|
376
377	    # We store the location of the last node in the root, for
378	    # use when an unexpected eof triggers an error.
379	    if {$n ne $root} {
380		$t set $root range [$t get $n range]
381		$t set $root line  [$t get $n line]
382		$t set $root col   [$t get $n col]
383	    }
384
385	    # LL(1) parser table. State/Nexttoken determine action and
386	    # next state.
387	    switch -exact -- [list $state $cmdname] {
388		{INDEX index_begin} {
389		    # Pull arguments of the proper index_begin up into
390		    # the root. Drop the expected node.
391		    $t set $root argv [$t get $n argv]
392		    $t delete $n
393		    # Starting series of keywwords and their
394		    # references. Destination is root, not that it
395		    # matters, and we remember the state.
396		    set at    $root
397		    set state KEYS
398		}
399		{KEYS key} {
400		    # Starting series of references in a keyword.
401		    # Destination for movement is this keyword, and we
402		    # remember the state.
403		    set at    $n
404		    set state ITEMS
405		}
406		{ITEMS index_end} -
407		{KEYS index_end} {
408		    # End of the document reached, with proper closing
409		    # of keys and references. Drop the node, and jump to
410		    # the end state
411		    set state EOF
412		    $t delete $n
413		}
414		{ITEMS manpage} -
415		{ITEMS url} {
416		    # Move references to their keyword.
417		    $t move $at end $n
418		}
419		{ITEMS key} {
420		    # Move destination of references forward.
421		    set at $n
422		}
423		{EOF <EOF>} {
424		    # Good, really reached the end. Nothing to be
425		    # done.
426		}
427		{INDEX index_end} -
428		{INDEX key} -
429		{INDEX manpage} -
430		{INDEX url} -
431		{INDEX <EOF>} {
432		    Error $t $n docidx/index_begin/missing
433		    if {$n ne $root} {
434			$t delete $n
435		    }
436		}
437		{KEYS index_begin} -
438		{KEYS manpage} -
439		{KEYS url} {
440		    Error $t $n docidx/key/missing
441		    if {$n ne $root} {
442			$t delete $n
443		    }
444		}
445		{EOF index_begin} -
446		{EOF index_end} -
447		{EOF key} -
448		{EOF manpage} -
449		{EOF url} -
450		{ITEMS index_begin} {
451		    # TODO ?! Split this, and add message which command was expected.
452		    # Unexpected and wrong. The node is dropped.
453		    Error $t $n docidx/$cmdname/syntax
454		    $t delete $n
455		}
456		{KEYS <EOF>} -
457		{ITEMS <EOF>} {
458		    Error $t $n docidx/index_end/missing
459		}
460	    }
461	}
462
463	$t unset $root text
464	$t unset $root range
465	$t unset $root line
466	$t unset $root col
467
468	#doctools::parse::tcl::ShowTreeX $t Shaped/Structure
469	return
470    }
471
472    proc Serialize {t} {
473	upvar 1 _file _file
474	# We assume here that the tree is already in the correct
475	# shape/structure, i.e. of at most depth 2, a root, optionally
476	# a series of children for the keywords, and each keyword with
477	# an optional series of children for the items, i.e. manpage
478	# and url references.
479
480	# We now extract the basic information about the index from
481	# the tree, do some higher level checking on the references,
482	# and return the serialization of the index generated from the
483	# extracted data.
484
485	set error 0
486	set root [$t rootname]
487
488	# Root delivers index label and title.
489	struct::list assign [$t get $root argv] label title
490
491	array set k {}
492	array set r {}
493
494	# Each keyword in the tree
495	foreach key [$t children $root] {
496	    set kw [lindex [$t get $key argv] 0]
497	    set k($kw) {}
498
499	    # Each reference in a key.
500	    foreach item [$t children $key] {
501		struct::list assign [$t get $item argv] id rlabel
502		set rtype [$t get $item text]
503		set decl  [list $rtype $rlabel]
504
505		lappend k($kw) $id
506
507		# Checking that all uses of a reference use the same
508		# type and label.
509		if {[info exists r($id)]} {
510		    if {$r($id) ne $decl} {
511			struct::list assign $r($id) otype olabel
512			MakeErrorMsg $t $item docidx/ref/redef \
513			    $id $otype $olabel $rtype $rlabel
514			set error 1
515		    }
516		    continue
517		}
518		set r($id) $decl
519	    }
520	}
521
522	if {$error} return
523	# Caller will handle the errors.
524
525	## ### ### ### ######### ######### #########
526	## The part below is identical to the serialization backend of
527	## command 'doctools::idx::structure merge'.
528
529	# Now construct the result, from the inside out, with proper
530	# sorting at all levels.
531
532	set keywords {}
533	foreach kw [lsort -dict [array names k]] {
534	    # Sort references in a keyword by their _labels_.
535	    set tmp {}
536	    foreach rid $k($kw) { lappend tmp [list $rid [lindex $r($rid) 1]] }
537	    set refs {}
538	    foreach item [lsort -dict -index 1 $tmp] {
539		lappend refs [lindex $item 0]
540	    }
541	    lappend keywords $kw $refs
542	}
543
544	set references {}
545	foreach rid [lsort -dict [array names r]] {
546	    lappend references $rid $r($rid)
547	}
548
549	set serial [list doctools::idx \
550			[list \
551			     label      $label \
552			     keywords   $keywords \
553			     references $references \
554			     title      $title]]
555
556
557	# Caller verify, ensure contract
558	#::doctools::idx::structure verify-as-canonical $serial
559	return $serial
560    }
561
562    # # ## ### ##### ######## #############
563
564    proc CollapseArguments {t n} {
565	#puts __CA($n)
566
567	set ok 1
568	set argv {}
569	foreach ch [$t children $n] {
570	    lappend argv [$t get $ch text]
571	    if {[$t get $ch type] eq "Error"} {
572		set ok 0
573		break
574	    }
575	}
576	if {$ok} {
577	    $t set $n argv $argv
578	    DropChildren $t $n
579	} else {
580	    MakeError $t $n
581	}
582	return
583    }
584
585    proc CollapseWord {t n} {
586	#puts __CW($n)
587
588	set ok 1
589	set text {}
590	foreach ch [$t children $n] {
591	    append text [$t get $ch text]
592	    if {[$t get $ch type] eq "Error"} {
593		set ok 0
594		break
595	    }
596	}
597	if {$ok} {
598	    MakeText $t $n $text
599	} else {
600	    MakeError $t $n
601	}
602	return
603    }
604
605    proc VariableUse {t n var} {
606	upvar 1 vars vars _file _file
607
608	# vset/1 - the command returns text information to the
609	# caller. Extract the argument data.
610
611	set vartype [$t get $var type]
612	set varname [$t get $var text]
613
614	# Remove the now superfluous argument nodes.
615	DropChildren $t $n
616
617	if {$vartype eq "Error"} {
618	    # First we check if the command is in trouble because it
619	    # has a bogus argument. If so we convert it into an error
620	    # node to signal even higher commands, and ignore it. We
621	    # do not report an error, as the actual problem was
622	    # reported already.
623
624	    MakeError $t $n
625	} elseif {![info exists vars($varname)]} {
626	    # Secondly we check if the referenced variable is
627	    # known. If not it is trouble, and we report it.
628
629	    MakeErrorMsg $t $n docidx/vset/varname/unknown $varname
630	} elseif {[$t depth $n] == 1} {
631	    # Commands at the structural toplevel are irrelevant and
632	    # removed (see caller). They have to checked again however
633	    # to see if the use introduced non-whitespace where it
634	    # should not be.
635
636	    if {[regexp {[^[:blank:]\n]} $vars($varname)]} {
637		Error $t $n docidx/plaintext
638	    }
639	} else {
640	    MakeText $t $n $vars($varname)
641	}
642    }
643
644    proc VariableDefine {t n var val} {
645	upvar 1 vars vars
646
647	# vset/2 - the command links a variable to a value. Extract
648	# the argument data.
649
650	set vartype [$t get $var type]
651	set valtype [$t get $val type]
652	set varname [$t get $var text]
653	set value   [$t get $val text]
654
655	# Remove the now superfluous argument nodes.
656	DropChildren $t $n
657
658	if {($vartype eq "Error") || ($valtype eq "Error")} {
659	    # First we check if the command is in trouble because it
660	    # has one or more bogus arguments. If so we convert it
661	    # into an error node to signal even higher commands, and
662	    # ignore it. We do not report an error, as the actual
663	    # problem was reported already.
664
665	    MakeError $t $n
666	    return
667	}
668
669	# And save the change to the symbol table we are lugging
670	# around during the processing.
671
672	set vars($varname) $value
673	return
674    }
675
676    proc ProcessInclude {t n path} {
677	upvar 1 vars vars _file _file
678	::variable ourfile
679
680	# include - the command returns file content and inserts it in
681	# the place of the command.  First extract the argument data
682
683	set pathtype [$t get $path type]
684	set pathname [$t get $path text]
685
686	# Remove the now superfluous argument nodes.
687	DropChildren $t $n
688
689	# Check for problems stemming from other trouble.
690	if {$pathtype eq "Error"} {
691	    # First we check if the command is in trouble because it
692	    # has a bogus argument. If so convert it into an error
693	    # node to signal even higher commands, and ignore it. We
694	    # do not report an error, as the actual problem was
695	    # reported already.
696
697	    MakeError $t $n
698	    return
699	}
700
701	if {![GetFile $ourfile $pathname text fullpath error emsg]} {
702	    switch -exact -- $error {
703		notfound { Error $t $n docidx/include/path/notfound $pathname       }
704		notread  { Error $t $n docidx/include/read-failed   $fullpath $emsg }
705	    }
706	    MarkDrop $n
707	    return
708	}
709
710	# Parse the file. This also resolves variables further.
711
712	set currenterrors [GetErrors]
713	set currentpath $ourfile
714	ClearErrors
715
716	# WIBNI :: Remember the path as relative to the current path.
717	set ourfile $fullpath
718	if {![Process $t $text $n vars _file]} {
719
720	    set newerrors [GetErrors]
721	    SetErrors $currenterrors
722	    set ourfile $currentpath
723	    Error $t $n docidx/include/syntax $fullpath $newerrors
724	    MarkDrop $n
725	    return
726	}
727
728	if {![$t numchildren $n]} {
729	    # Inclusion did not generate additional content, we can
730	    # ignore the command completely.
731	    MarkDrop $n
732	    return
733	}
734
735	# Create marker nodes which show the file entry/exit
736	# transitions. Disabled, makes shaping tree structure too
737	# complex. And checking the syntax as well, if we wish to have
738	# only proper complete structures in an include file. Need
739	# proper LR parser for that (is not LL(1)), or maybe even
740	# something like earley-aycock for full handling of an
741	# ambigous grammar.
742	if 0 {
743	    set fstart [$t insert $n 0]
744	    set fstop  [$t insert $n end]
745
746	    $t set $fstart type Command
747	    $t set $fstop  type Command
748
749	    $t set $fstart text include_begin
750	    $t set $fstop  text include_end
751
752	    $t set $fstart path $fullpath
753	    $t set $fstop  path $fullpath
754	}
755	# Remove the include command itself, merging its children
756	# into the place it occupied in its parent.
757	$t cut $n
758	return
759    }
760
761    # # ## ### ##### ######## #############
762
763    ## Note: The import plugin for docidx rewrites the 'GetFile'
764    ##       command below to make use of an alias provided by the
765    ##       plugin manager. This re-enables the ability of this class
766    ##       to handle include files which would otherwise be gone due
767    ##       to the necessary file operations (exists, isfile,
768    ##       readable, open, read) be disallowed by the safe
769    ##       environment the plugin operates in.
770    ##
771    ## Any changes to GetFile have to reviewed for their impact on
772    ## doctools::idx::import::docidx, and possibly ported over.
773
774    proc GetFile {currentfile path dv pv ev mv} {
775	upvar 1 $dv data $pv fullpath $ev error $mv emessage
776	set data     {}
777	set error    {}
778	set emessage {}
779
780	# Find the file, or not.
781	set fullpath [Locate $path]
782	if {$fullpath eq {}} {
783	    set fullpath $path
784	    set error notfound
785	    return 0
786	}
787
788	# Read contents, or not.
789	if {[catch {
790	    set data [fileutil::cat $fullpath]
791	} msg]} {
792	    set error notread
793	    set emessage $msg
794	    return 0
795	}
796
797	return 1
798    }
799
800    proc Locate {path} {
801	upvar 1 currentfile currentfile
802
803	if {$currentfile ne {}} {
804	    set pathstosearch \
805		[linsert $ourincpaths 0 \
806		     [file dirname [file normalize $currentfile]]]
807	} else {
808	    set pathstosearch $ourincpaths
809	}
810
811	foreach base $pathstosearch {
812	    set try [file join $base $path]
813	    if {![file exists $try]} continue
814	    return $try
815	}
816	# Nothing found
817	return {}
818    }
819
820    # # ## ### ##### ######## #############
821    ## Management of nodes to kill
822
823    proc MarkDrop {n} {
824	::variable ourtokill
825	lappend ourtokill $n
826	#puts %%mark4kill=$n|[info level -1]
827	return
828    }
829
830    proc DropChildren {t n} {
831	foreach child [$t children $n] {
832	    MarkDrop $child
833	}
834	return
835    }
836
837    proc PerformDrop {t} {
838	::variable ourtokill
839	#puts __PD($t)=<[join $ourtokill ,]>
840	foreach n $ourtokill {
841	    #puts x($n/[$t exists $n])
842	    if {![$t exists $n]} continue
843	    #puts ^^DEL($n)
844	    $t delete $n
845	}
846	set ourtokill {}
847	return
848    }
849
850    # # ## ### ##### ######## #############
851    ## Command predicates
852
853    proc Nestable {t n cmdname cv} {
854	upvar 1 $cv outname
855	set outname $cmdname
856	switch -exact -- $cmdname {
857	    lb - rb { return 1 }
858	    vset {
859		if {[$t numchildren $n] == 1} {
860		    return 1
861		}
862		append outname /2
863	    }
864	}
865	return 0
866    }
867
868    proc Legal {cmdname} {
869	::variable ourcmds
870	#parray ourcmds
871	return [info exists ourcmds($cmdname)]
872    }
873
874    proc ArgInfo {cmdname minv maxv} {
875	::variable ourcmds
876	upvar 1 $minv min $maxv max
877	foreach {min max} $ourcmds($cmdname) break
878	return
879    }
880
881    # # ## ### ##### ######## #############
882    ## Higher level error handling, node conversion.
883
884    proc MakeError {t n} {
885	#puts %%error=$n|[info level -1]
886	$t set $n type Error
887	DropChildren $t $n
888	return
889    }
890
891    proc MakeErrorMsg {t n msg args} {
892	upvar 1 _file _file
893	#puts %%error=$n|[info level -1]
894	Report $t $n $msg $args
895	$t set $n type Error
896	DropChildren $t $n
897	return
898    }
899
900    proc MakeText {t n text} {
901	#puts %%text=$n|[info level -1]
902	$t set $n type Text
903	$t set $n text $text
904	DropChildren $t $n
905	return
906    }
907
908    # # ## ### ##### ######## #############
909    ## Error reporting
910
911    proc Error {t n text args} {
912	upvar 1 _file _file
913	Report $t $n $text $args
914    }
915
916    proc Report {t n text details} {
917	upvar 1 _file _file
918	ReportAt $_file($n) [$t get $n range] [$t get $n line] [$t get $n col] $text $details
919	return
920    }
921
922    proc ReportAt {file range line col text details} {
923	::variable ourerrors
924	#puts !![list $file $range $line $col $text $details]/[info level -1]
925	lappend ourerrors [list $file $range $line $col $text $details]
926	return
927    }
928
929    # # ## ### ##### ######## #############
930    ## Error Management
931
932    proc ClearErrors {} {
933	::variable ourerrors {}
934	return
935    }
936
937    proc GetErrors {} {
938	::variable ourerrors
939	return $ourerrors
940    }
941
942    proc SetErrors {t} {
943	::variable ourerrors $t
944	return
945    }
946
947    # # ## ### ##### ######## #############
948    ## Error Response
949
950    proc StopOnErrors {} {
951	::variable ourerrors
952	if {![llength $ourerrors]} return
953
954	upvar 1 t t
955	$t destroy
956
957	doctools::msgcat::init idx
958	set info [SortMessages $ourerrors]
959	set msg  [Formatted $info {}]
960
961	return -code error -errorcode $info $msg
962    }
963
964    proc Formatted {errors prefix} {
965	set lines {}
966	foreach err $errors {
967	    struct::list assign $err file range line col msg details
968	    #8.5: set text [msgcat::mc $msg {*}$details]
969	    set text [eval [linsert $details 0 msgcat::mc $msg]]
970	    if {![string length $prefix] && [string length $file]} {
971		set prefix "\"$file\" "
972	    }
973
974	    lappend lines "${prefix}error on line $line.$col: $text"
975
976	    if {$msg eq "docidx/include/syntax"} {
977		struct::list assign $details path moreerrors
978		lappend lines [Formatted [SortMessages $moreerrors] "\"$path\": "]
979	    }
980	}
981	return [join $lines \n]
982    }
983
984    proc SortMessages {messages} {
985	return [lsort -dict -index 0 \
986		    [lsort -dict -index 2 \
987			 [lsort -dict -index 3 \
988			      [lsort -unique $messages]]]]
989    }
990
991    # # ## ### ##### ######## #############
992    ## Parser state
993
994    # Path to the file currently processed, if known. Empty if not known
995    typevariable ourfile {}
996
997    # Array of variables for use by vset. During parsing a local copy
998    # is used so that variables set by the document cannot spill back
999    # to the parser state.
1000    typevariable ourvars -array {}
1001
1002    # List of paths to use when searching for an include file.
1003    typevariable ourincpaths {}
1004
1005    # Record of errors found so far. List of 5-tuples containing token
1006    # range, line, column of firt character after the token, error
1007    # code, and error arguments, in this order.
1008    typevariable ourerrors {}
1009
1010    # List of nodes marked for removal.
1011    typevariable ourtokill {}
1012
1013    # Map of legal commands to their min/max number of arguments.
1014    typevariable ourcmds -array {
1015	comment     {1 1}
1016	include     {1 1}
1017	lb          {0 0}
1018	rb          {0 0}
1019	vset        {1 2}
1020
1021	index_begin {2 2}
1022	index_end   {0 0}
1023	key         {1 1}
1024	manpage     {2 2}
1025	url         {2 2}
1026    }
1027
1028    # # ## ### ##### ######## #############
1029    ## Configuration
1030
1031    pragma -hasinstances   no ; # singleton
1032    pragma -hastypeinfo    no ; # no introspection
1033    pragma -hastypedestroy no ; # immortal
1034
1035    ##
1036    # # ## ### ##### ######## #############
1037}
1038
1039# # ## ### ##### ######## ############# #####################
1040## Ready
1041
1042package provide doctools::idx::parse 0.1
1043return
1044