1# $Id: check,v 10.159 2008/02/19 15:41:28 bostic Exp $
2#
3# Script to check HTML output for links that reference nonexistent
4# pages, and for pages that aren't referenced by anything.
5
6###################################################
7# Tests
8###################################################
9t1()
10{
11	echo "$of: CVS conflicts (<<<<<<< and >>>>>>>)..."
12	cat ${SO_FILES} |
13	    xargs egrep '<<<<<<<|>>>>>>>' > ${T}/check_$of
14	[ -s ${T}/check_$of ] || rm ${T}/check_$of
15}
16
17t2()
18{
19	echo "$of: Inline HTML (&gt; and &lt;)..."
20	cat ${SO_FILES} |
21	    xargs egrep '&[lg]t;' > ${T}/check_$of
22	[ -s ${T}/check_$of ] || rm ${T}/check_$of
23}
24
25t3()
26{
27	echo "$of: Berkeley DB (should be m4_db)..."
28	cat ${SO_FILES} |
29	    xargs egrep -w 'Berkeley DB' > ${T}/check_$of
30	[ -s ${T}/check_$of ] || rm ${T}/check_$of
31}
32
33t4()
34{
35	echo "$of: dialogue (should be dialog)..."
36	cat ${HTML_FILES} |
37	    xargs egrep -i 'dialogue' > ${T}/check_$of
38	[ -s ${T}/check_$of ] || rm ${T}/check_$of
39}
40
41t5()
42{
43	echo "$of: dataset (should be data set)..."
44	cat ${HTML_FILES} |
45	    xargs egrep -i -w 'dataset' |
46	    sed -e '/embedded.html:/d' > ${T}/check_$of
47	[ -s ${T}/check_$of ] || rm ${T}/check_$of
48}
49
50t6()
51{
52	echo "$of: co-existing (should be coexisting)..."
53	cat ${HTML_FILES} |
54	    xargs egrep -i 'co-existing' > ${T}/check_$of
55	[ -s ${T}/check_$of ] || rm ${T}/check_$of
56}
57
58t7()
59{
60	echo "$of: writeable (should be writable)..."
61	cat ${HTML_FILES} |
62	    xargs egrep -i 'writeable' > ${T}/check_$of
63	[ -s ${T}/check_$of ] || rm ${T}/check_$of
64}
65
66t8()
67{
68	echo "$of: indexes (should be indices)..."
69	cat ${HTML_FILES} |
70	    xargs egrep -i 'indexes' > ${T}/check_$of
71	[ -s ${T}/check_$of ] || rm ${T}/check_$of
72}
73
74t9()
75{
76	echo "$of: useable (should be usable)..."
77	cat ${HTML_FILES} |
78	    xargs egrep -i 'useable' > ${T}/check_$of
79	[ -s ${T}/check_$of ] || rm ${T}/check_$of
80}
81
82t10()
83{
84	echo "$of: B-tree, B+tree (should be Btree)..."
85	cat ${SO_FILES} |
86	    xargs egrep -i 'B[+-]tree' |
87	    sed -e '/refs.so:.*The Ubiquitous B-tree/d' \
88		-e '/refs.so:.*Prefix B-trees/d' > ${T}/check_$of
89	[ -s ${T}/check_$of ] || rm ${T}/check_$of
90}
91
92t11()
93{
94	echo "$of: two phase (should be two-phase)..."
95	cat ${SO_FILES} |
96	    xargs egrep -i 'two phase' > ${T}/check_$of
97	[ -s ${T}/check_$of ] || rm ${T}/check_$of
98}
99
100t12()
101{
102	echo "$of: Punctuation/macro typos"
103	cat ${HTML_FILES} |
104	    xargs egrep ' [,;)]|\( ' > ${T}/check_$of
105	[ -s ${T}/check_$of ] || rm ${T}/check_$of
106}
107
108t13()
109{
110	echo "$of: Too many closing/opening m4 quotes..."
111	(cat ${HTML_FILES} |
112	    xargs egrep '\(\[|\]\)' |
113	    sed -e '/(\*fsp)\[\]/d' \
114	        -e 's/Object\[\]//g' \
115	        -e 's/\[DB_XIDDATASIZE\]//g' \
116	        -e 's/byte\[\]//g' \
117	        -e 's/byte\[\]\[\]//g' \
118	        -e 's/char\[\]//g' \
119	        -e 's/\[\]\&nbsp;//g' \
120	        -e 's/raceElement\[\]//g' \
121	        -e 's/onflicts(\[\])//g' |
122	    egrep '\(\[|\]\)') > ${T}/check_$of
123	[ -s ${T}/check_$of ] || rm ${T}/check_$of
124}
125
126t14()
127{
128	echo "$of: i.e. or e.g. (should be that is, for example)..."
129	cat ${HTML_FILES} |
130	    xargs egrep 'i\.e\.|e\.g\.' |
131	    sed -e '/embedded.html:/d' > ${T}/check_$of
132	[ -s ${T}/check_$of ] || rm ${T}/check_$of
133}
134
135t15()
136{
137	echo "$of: multi-thread, multi-process (should be multiXXX)..."
138	cat ${HTML_FILES} |
139	    xargs egrep -i 'multi-thread|multi-process' |
140	    sed -e '/bdb_usenix.html:/d' \
141		-e '/embedded.html:/d' > ${T}/check_$of
142	[ -s ${T}/check_$of ] || rm ${T}/check_$of
143}
144
145t16()
146{
147	echo "$of: readonly (should be read-only)..."
148	cat ${HTML_FILES} |
149	    xargs egrep -w -i 'readonly' > ${T}/check_$of
150	[ -s ${T}/check_$of ] || rm ${T}/check_$of
151}
152
153t17()
154{
155	echo "$of: off-site (should be offsite)..."
156	cat ${HTML_FILES} |
157	    xargs egrep -i 'off-site' > ${T}/check_$of
158	[ -s ${T}/check_$of ] || rm ${T}/check_$of
159}
160
161t18()
162{
163	echo "$of: user defined/specified (should be user-XXX)..."
164	cat ${HTML_FILES} |
165	    xargs egrep -i 'user defined|user specified' > ${T}/check_$of
166	[ -s ${T}/check_$of ] || rm ${T}/check_$of
167}
168
169t19()
170{
171	echo "$of: on-going (should be ongoing)..."
172	cat ${HTML_FILES} |
173	    xargs egrep -i 'on-going' > ${T}/check_$of
174	[ -s ${T}/check_$of ] || rm ${T}/check_$of
175}
176
177t20()
178{
179	echo "$of: vice-versa (should be vice versa)..."
180	cat ${HTML_FILES} |
181	    xargs egrep -i 'vice-versa' > ${T}/check_$of
182	[ -s ${T}/check_$of ] || rm ${T}/check_$of
183}
184
185t21()
186{
187	echo "$of: data dependent (should be data-dependent)..."
188	cat ${HTML_FILES} |
189	    xargs egrep -i 'data dependent' > ${T}/check_$of
190	[ -s ${T}/check_$of ] || rm ${T}/check_$of
191}
192
193t22()
194{
195	echo "$of: non-existXXX (should be nonexistXXX)..."
196	cat ${HTML_FILES} |
197	    xargs egrep -i 'non-exist' > ${T}/check_$of
198	[ -s ${T}/check_$of ] || rm ${T}/check_$of
199}
200
201t23()
202{
203	echo "$of: pre-built (should be prebuilt)..."
204	cat ${HTML_FILES} |
205	    xargs egrep -i 'pre-built' > ${T}/check_$of
206	[ -s ${T}/check_$of ] || rm ${T}/check_$of
207}
208
209t24()
210{
211	echo "$of: file/path name (should be filename/pathname)..."
212	cat ${HTML_FILES} |
213	    xargs egrep -i 'file name|path name' |
214	    sed '/file named/d' > ${T}/check_$of
215	[ -s ${T}/check_$of ] || rm ${T}/check_$of
216}
217
218t25()
219{
220	echo "$of: wish/wishes/wishing (should be want/wants/wanting)..."
221	cat ${HTML_FILES} |
222	    xargs egrep -i -w 'wish|wishes|wishing' |
223	    sed '/wish session/d' > ${T}/check_$of
224	[ -s ${T}/check_$of ] || rm ${T}/check_$of
225}
226
227t26()
228{
229	echo "$of: file systems (should be filesystems)..."
230	cat ${HTML_FILES} |
231	    xargs egrep -i 'file systems' |
232	    sed '/embedded.html:/d' > ${T}/check_$of
233	[ -s ${T}/check_$of ] || rm ${T}/check_$of
234}
235
236t27()
237{
238	# The rule is that reXXX is re-XXX if there are two ways to read it
239	# (re-creation vs. recreation) or if the first X is 'e' (re-entrant
240	# vs. reentrant).
241	echo "$of: re-XXX (that should be reXXX)..."
242	cat ${HTML_FILES} |
243	    xargs egrep -i \
244	're-acquired|re-alloc|re-compile|re-configure|re-direct|re-instantiat|re-link|re-load|re-number|re-open|re-start' |
245	    sed '/pre-/d' > ${T}/check_$of
246	[ -s ${T}/check_$of ] || rm ${T}/check_$of
247}
248
249t28()
250{
251	echo "$of: reXXX (that should be re-XXX)..."
252	cat ${HTML_FILES} |
253	    xargs egrep -i 'reent|recreat' |
254	    sed -e '/_REENTRANT/d' -e '/embedded.html:/d' > ${T}/check_$of
255	[ -s ${T}/check_$of ] || rm ${T}/check_$of
256}
257
258t29()
259{
260	echo "$of: stand-alone (should be standalone)..."
261	cat ${HTML_FILES} |
262	    xargs egrep -i 'stand-alone' |
263	    sed '/bdb_usenix.html:/d' > ${T}/check_$of
264	[ -s ${T}/check_$of ] || rm ${T}/check_$of
265}
266
267t30()
268{
269	echo "$of: meta-data (should be metadata)..."
270	cat ${HTML_FILES} |
271	    xargs egrep -i 'meta-data' |
272	    sed '/bdb_usenix.html:/d' > ${T}/check_$of
273	[ -s ${T}/check_$of ] || rm ${T}/check_$of
274}
275
276t31()
277{
278	echo "$of: the the, and and, or or, that that..."
279	cat ${HTML_FILES} |
280	    xargs egrep -i -w 'the the|and and|or or|that that' > ${T}/check_$of
281	[ -s ${T}/check_$of ] || rm ${T}/check_$of
282}
283
284t32()
285{
286	echo "$of: doubled punctuation..."
287	cat ${HTML_FILES} |
288	    xargs egrep -i '\.\.' |
289	    sed -e '/"\.\."/d' \
290		-e '/\.\.\//d' \
291	        -e '/\.\.\./d' > ${T}/check_$of
292	[ -s ${T}/check_$of ] || rm ${T}/check_$of
293}
294
295t33()
296{
297	echo "$of: transaction protect (should be transaction-protect)..."
298	cat ${HTML_FILES} |
299	    xargs egrep -i -w 'transaction protect' |
300	    sed '/embedded.html:/d' > ${T}/check_$of
301	[ -s ${T}/check_$of ] || rm ${T}/check_$of
302}
303
304t34()
305{
306	echo "$of: backward compatible (should be backward-compatible)..."
307	cat ${HTML_FILES} |
308	    xargs egrep -i 'backward compatible' > ${T}/check_$of
309	[ -s ${T}/check_$of ] || rm ${T}/check_$of
310}
311
312t35()
313{
314	echo "$of: shutdown (should be shut down)..."
315	cat ${HTML_FILES} |
316	    xargs egrep -i -w 'shutdown' > ${T}/check_$of
317	[ -s ${T}/check_$of ] || rm ${T}/check_$of
318}
319
320t36()
321{
322	echo "$of: described above/below (should be previously/as follows)..."
323	cat ${HTML_FILES} |
324	    xargs egrep -i 'described above|described below' |
325	    sed '/embedded.html:/d' > ${T}/check_$of
326	[ -s ${T}/check_$of ] || rm ${T}/check_$of
327}
328
329t37()
330{
331	echo "$of: dnl..."
332	cat ${HTML_FILES} |
333	    xargs egrep -w dnl > ${T}/check_$of
334	[ -s ${T}/check_$of ] || rm ${T}/check_$of
335}
336
337t38()
338{
339	echo "$of: NOAPI..."
340	cat ${HTML_FILES} |
341	    xargs egrep -w NOAPI > ${T}/check_$of
342	[ -s ${T}/check_$of ] || rm ${T}/check_$of
343}
344
345t39()
346{
347	echo "$of: Trailing whitespace..."
348	cat ${SO_FILES} |
349	    xargs egrep -l '[	 ]$' > ${T}/check_$of
350	[ -s ${T}/check_$of ] || rm ${T}/check_$of
351}
352
353t40()
354{
355	echo "$of: Unprintable characters..."
356	(for i in `cat ${SO_FILES}`; do
357		tr -d '\0-\10\13-\37\177-\377' < $i > ${T}/1
358		cmp $i ${T}/1 > /dev/null || echo $i
359	done) > ${T}/check_$of
360	[ -s ${T}/check_$of ] || rm ${T}/check_$of
361}
362
363t41()
364{
365	echo "$of: Empty brackets..."
366	cat ${SO_FILES} ${M4_FILES} |
367	    xargs egrep '\[\]' |
368	    sed -e 's/M4XMLMAJOR\[\]//g' \
369	        -e 's/M4XMLMINOR\[\]//g' \
370	        -e 's/__LB__\[\]//g' \
371	        -e 's/libdbxml\[\]//g' \
372	        -e 's/libdbxml_tcl\[\]//g' \
373	        -e '/txn_recover.so:.*DB_XIDDATASIZE/d' \
374		-e '/\/m4\/m4.links:/d' \
375		-e '/\/ref\/build_win\/faq.so:/d' \
376		-e '/\/ref\/build_win\/intro.so:/d' \
377		-e '/\/ref\/dumpload\/format.so:/d' \
378		-e '/\/ref\/dumpload\/text.so:/d' \
379		-e '/\/ref\/env\/naming.so:/d' \
380		-e '/\/ref\/xa\/xa_config.so:/d' \
381		-e '/\/ref_xml\/xml_win\/intro.so:/d' |
382	    egrep '\[\]' > ${T}/check_$of
383	[ -s ${T}/check_$of ] || rm ${T}/check_$of
384}
385
386t42()
387{
388	echo "$of: parenthesis count ..."
389cat << END-OF-PROGRAM > /tmp/paren.c
390	#include <stdio.h>
391	main(argc, argv)
392		int argc;
393		char *argv[];
394	{
395		int ch, c, o;
396		for (o = c = 0; (ch = getchar()) != EOF;) {
397			if (ch == '(')
398				++o;
399			if (ch == ')')
400				++c;
401			if (c > o) {
402				printf("%s: too many closing parenthesis\n",
403				    argv[1]);
404				return (0);
405			}
406		}
407		if (c != o)
408			printf("%s: too many open parenthesis\n", argv[1]);
409		return (0);
410	}
411END-OF-PROGRAM
412
413	cc -O /tmp/paren.c -o /tmp/paren
414
415	(for i in `cat ${HTML_FILES}`; do
416		/tmp/paren $i < $i >> ${T}/check_$of
417	done)
418	[ -s ${T}/check_$of ] || rm ${T}/check_$of
419	rm -f /tmp/paren /tmp/paren.c
420}
421
422t43()
423{
424	if [ "$PRODUCT" = "XML" ]; then
425		echo "$of: Missing log records... (skipping test for XML)"
426		return
427	fi
428	echo "$of: Missing log records..."
429	f=$BDB_SRC/test/logtrack.list
430	if [ ! -f $f ]; then
431		echo "$of: $f: file not found (skipping test)"
432		return
433	fi
434	egrep 'BEGIN|PREFIX' $f |
435	    awk '/PREFIX/{p = $2} /BEGIN/{printf "%s_%s\n", p, $2}' |
436	    sed 's/^__//' |
437	    sort > ${T}/1
438	sed -e '/START LOG RECORD TYPES/,/STOP LOG RECORD TYPES/p' \
439	    -e d ref/debug/printlog.so |
440	    egrep 'm4_table_element' |
441	    sed -e 's/[^(]*(//' -e 's/,.*//' | sort > ${T}/2
442	cmp ${T}/1 ${T}/2 > /dev/null ||
443	(echo "<<< Source code >>> Ref Guide" &&
444	    diff ${T}/1 ${T}/2) > ${T}/check_$of
445}
446
447###################################################
448# Chapter headings.
449###################################################
450t44()
451{
452	echo "$of: Chapter headings ..."
453	(cat ${HTML_FILES} | xargs egrep '<td><h3><dl><dt>' |
454	sed -e 's/docs\/ref\///' \
455	    -e 's/:.*<dd>/@/' -e 's/<\/dl>.*//' |
456	sed -e '/\/am.*@Access Methods$/d' \
457	    -e '/\/apprec.*@Application Specific Logging and Recovery$/d' \
458	    -e '/\/arch\/.*@Architecture$/d' \
459	    -e '/\/build_brew\/.*@Building Berkeley DB for BREW$/d' \
460	    -e '/\/build_s60\/.*@Building Berkeley DB for S60$/d' \
461	    -e '/\/build_unix\/.*@Building Berkeley DB for UNIX\/POSIX systems$/d' \
462	    -e '/\/build_vxworks\/.*@Building Berkeley DB for VxWorks systems$/d' \
463	    -e '/\/build_win\/.*@Building Berkeley DB for Windows systems$/d' \
464	    -e '/\/build_wince\/.*@Building Berkeley DB for Windows CE systems$/d' \
465	    -e '/\/cam\/.*@Berkeley DB Concurrent Data Store Applications$/d' \
466	    -e '/\/changelog\/.*@Upgrading Berkeley DB Applications$/d' \
467	    -e '/\/debug\/.*@Debugging Applications$/d' \
468	    -e '/\/distrib\/.*@Distribution$/d' \
469	    -e '/\/dumpload\/.*@Dumping and Reloading$/d' \
470	    -e '/\/env\/.*@Environment$/d' \
471	    -e '/\/ext\/.*@Berkeley DB Extensions$/d' \
472	    -e '/\/install\/.*@System Installation Notes$/d' \
473	    -e '/\/intro\/.*@Introduction$/d' \
474	    -e '/\/java\/.*@Java API$/d' \
475	    -e '/\/lock\/.*@Locking Subsystem$/d' \
476	    -e '/\/log\/.*@Logging Subsystem$/d' \
477	    -e '/\/mp\/.*@Memory Pool Subsystem$/d' \
478	    -e '/\/program\/.*@Programmer Notes$/d' \
479	    -e '/\/refs\/.*@Additional References$/d' \
480	    -e '/\/rep\/.*@Berkeley DB Replication$/d' \
481	    -e '/\/rpc\/.*@RPC Client\/Server$/d' \
482	    -e '/\/sendmail\/.*@Sendmail$/d' \
483	    -e '/\/sequence\/.*@Sequences$/d' \
484	    -e '/\/tcl\/.*@Tcl API$/d' \
485	    -e '/\/test\/.*@Test Suite$/d' \
486	    -e '/\/transapp\/.*@Berkeley DB Transactional Data Store Applications$/d' \
487	    -e '/\/txn\/.*@Transaction Subsystem$/d' \
488	    -e '/\/upgrade[\.0-9\]*\/.*@Upgrading Berkeley DB.*Applications$/d' \
489	    -e '/\/xa\/.*@Distributed Transactions$/d' \
490	    -e '/\/xml\/.*@Berkeley DB XML$/d' \
491	    -e '/\/xml_unix\/.*@Building Berkeley DB XML for UNIX\/POSIX systems$/d' \
492	    -e '/\/xml_win\/.*@Building Berkeley DB XML for Windows systems$/d' \
493	    -e '/\/xquery\/.*@Berkeley DB XML XQuery Extensions$/d' |
494	    column -t -s@) > ${T}/check_$of
495	[ -s ${T}/check_$of ] || rm ${T}/check_$of
496}
497
498###################################################
499# Layout document.
500###################################################
501t45()
502{
503	if [ "$PRODUCT" = "XML" ]; then
504		echo "$of: Layout document... (skipping test for XML)"
505		return
506	fi
507	echo "$of: Layout document..."
508	egrep m4_table_element ref/distrib/layout.so |
509	    sed -e 's/,.*//' \
510		-e 's/.*(//' |
511	    sort > ${T}/1
512	ls $BDB_SRC |
513	    sed -e /CVS/d \
514		-e /README/d \
515		-e /build[0-9]/d \
516		-e /test_erlang/d \
517		-e /test_perf/d \
518		-e /test_purify/d \
519		-e /test_server/d \
520		-e /test_vxworks/d > ${T}/2
521
522	cmp ${T}/1 ${T}/2 > /dev/null ||
523	(echo "<<< Ref Guide >>> Directory ls" && diff ${T}/1 ${T}/2) > ${T}/check_$of
524}
525
526###################################################
527# Missing flags.
528###################################################
529t46()
530{
531	echo "$of: Missing flags..."
532
533	# Compare all the items in the source files against the table of
534	# C flags.
535	cat ${SO_FILES} ${M4_FILES} |
536	    xargs egrep 'm4_idef[(]DB_|m4_idefz*[(]DB_' |
537	    tr -c 'A-Z0-9_' '\012' |
538	    egrep DB_ |
539	    sort -u > ${T}/1
540
541	sed -e 's/^\$1, \(DB_[^,]*\),.*/\1/p' \
542	    -e d $BDB_SRC/docs_src/m4/m4.links | sort > ${T}/2
543	cmp ${T}/1 ${T}/2 > /dev/null ||
544	(echo "<<< source files >>> m4/m4.links file" &&
545	    diff ${T}/1 ${T}/2 | egrep '^[<>]') >> ${T}/check_$of
546}
547
548###################################################
549# Makefile install
550###################################################
551t47()
552{
553	echo "$of: Makefile.in document install list..."
554
555	f=$DOCS_SRC/../dist/Makefile.in
556	if [ ! -f $f ]; then
557		echo "$of: $f: file not found"
558		exit 1
559	fi
560	(echo java && ls $TARGET) | sed /CVS/d | sort -u > ${T}/1
561	sed -n \
562	    -e '/DOCLIST/,/install_docs/{' \
563	    -e '/install_docs/q' \
564	    -e 's/\\//' \
565	    -e 's/DOCLIST=[	 ]*//' \
566	    -e 's/^[	 ]*//' \
567	    -e 's/[	 ]*$//' \
568	    -e '/^$/d' \
569	    -e 'p' \
570	    -e '}' < $f | tr -s ' ' '\012' | sort > ${T}/2
571	cmp ${T}/1 ${T}/2 > /dev/null ||
572	(echo "<<< docs directory >>> Makefile.in document install list" &&
573	    diff ${T}/1 ${T}/2) > ${T}/check_$of
574}
575
576###################################################
577# Spelling checks.
578###################################################
579t48()
580{
581	echo "$of: Spelling checks..."
582	for i in `find . -type d ! -name '*CVS' ! -name 'support'`; do
583		(cd $i
584		s="NO"
585		rm -f ${T}/1 ${T}/2
586		test -f ../spell.ok && s="../spell.ok"
587		test -f spell.ok && s="spell.ok"
588		if test $s != "NO"; then
589			sort -u $s > ${T}/1
590			[ "`echo m4.*`" != "m4.*" ] && \
591			    spell m4.* | \
592			    sort -u | comm -23 /dev/stdin ${T}/1 >> ${T}/2
593			[ "`echo *.so`" != "*.so" ] && \
594			    spell *.so | \
595			    sort -u | comm -23 /dev/stdin ${T}/1 >> ${T}/2
596		fi
597		[ -s ${T}/2 ] &&
598		    (echo "==== $i" && cat ${T}/2) >> ${T}/check_$of)
599	done
600}
601
602###################################################
603# Check for unexpanded m4 macros.
604###################################################
605t49()
606{
607	echo "$of: Unexpanded M4 macros..."
608
609	(cat ${HTML_FILES} | xargs egrep 'm4_|UNREF' > ${T}/check_$of)
610	[ -s ${T}/check_$of ] || rm ${T}/check_$of
611}
612
613###################################################
614# weblint
615###################################################
616t50()
617{
618	echo "$of: Weblint..."
619
620cat << END-OF-WEBPAGE > ${T}/1
621<html>
622<head>
623<title>
624title
625</title>
626</head>
627<body>
628body
629</body>
630</html>
631END-OF-WEBPAGE
632	wb=/a/db/bin/weblint
633	$wb -f ${T}/1 > /dev/null
634	if [ $? -eq 0 ] ; then
635		(cd $TARGET/.. && $wb -d heading-order -i -l docs | \
636		    sed -e '/\/frame.html/d' \
637			-e '/\/object.html/d' \
638			-e '/recommends the TITLE be no longer than 64 characters/d' \
639			-e '/ref.html.*expected tag(s) not seen: BODY/d' \
640		        -e '/directory does not have an index file/d' \
641		        -e '/docs\/articles\//d' \
642		        -e '/docs\/collections\//d' \
643		        -e '/docs\/gsg\//d' \
644		        -e '/docs\/gsg_db_rep\//d' \
645		        -e '/docs\/gsg_txn\//d' \
646		        -e '/docs\/gsg_xml\//d' \
647		        -e '/docs\/gsg_xml_txn\//d' \
648		        -e '/docs\/intro_xml\//d' \
649		        -e '/docs\/java\//d' \
650		        -e '/docs\/javaxml\//d' \
651			> ${T}/check_$of)
652		[ -s ${T}/check_$of ] || rm ${T}/check_$of
653	else
654		echo "$of: weblint not found (skipping test)"
655	fi
656}
657
658###################################################
659# Non-existent page links, unreferenced pages.
660###################################################
661missing_page()
662{
663	# $1 == path
664	d=`dirname $1`
665	f=`basename $1`
666
667	(cd $d &&
668	cat $f | tr '[:space:]' '\n' | egrep -i 'href="' | \
669	    sed -e 's/[Hh][Rr][Ee][Ff]="//' \
670		-e 's/".*//' \
671		-e 's/#.*//' \
672		-e '/.ps$/d' \
673		-e '/[Hh][Tt][Tt][Pp]:\/\//d' \
674		-e '/examples_java\/src\/com\/sleepycat\/examples\//d' \
675		-e '/ftp:\/\//d' \
676		-e '/java\/index.html/d' \
677		-e '/mailto:/d' | \
678	    while read i; do
679		if [ -z $i ]; then
680			continue;
681		fi
682		if [ ! -f $i ]; then
683			echo "$1: $i missing" >> ${BAD_LINKS}
684		else
685			(cd `dirname $i` && echo $PWD/`basename $i`)
686		fi
687	    done)
688}
689
690t51()
691{
692	echo "$of: Non-existent page links, unreferenced pages..."
693	BAD_LINKS=${T}/bad_links
694	UNREF_FILES=${T}/unref_files
695
696	# Check for links referencing nonexistent pages.
697	for i in `cat ${HTML_FILES}`; do
698		missing_page $i
699	done > ${T}/1 2>&1
700	[ -s ${BAD_LINKS} ] || rm -f ${BAD_LINKS}
701
702	# Check for pages that aren't referenced by any link.
703	(cat ${T}/1 |
704	    sed -e 's;.*/docs/;docs/;' |
705	    sort -u;
706	cat ${HTML_FILES} |
707	    sed -e 's;.*/docs/;docs/;') |
708	sort |
709	uniq -u |
710	sed -e '/docs\/api_c\/c_pindex.html/d' \
711	    -e '/docs\/api_c\/frame.html/d' \
712	    -e '/docs\/api_c\/object.html/d' \
713	    -e '/docs\/api_cxx\/cxx_pindex.html/d' \
714	    -e '/docs\/api_cxx\/object.html/d' \
715	    -e '/docs\/api_tcl\/tcl_pindex.html/d' \
716	    -e '/docs\/collections\//d' \
717	    -e '/docs\/gsg.*pdf$/d' \
718	    -e '/docs\/gsg\/CXX\/index.html/d' \
719	    -e '/docs\/gsg\/C\/index.html/d' \
720	    -e '/docs\/gsg\/JAVA\/index.html/d' \
721	    -e '/docs\/gsg_txn\/CXX\/index.html/d' \
722	    -e '/docs\/gsg_txn\/C\/index.html/d' \
723	    -e '/docs\/gsg_txn\/JAVA\/index.html/d' \
724	    -e '/docs\/gsg_xml\/cxx\/index.html/d' \
725	    -e '/docs\/gsg_xml\/cxx\/introduction.html/d' \
726	    -e '/docs\/gsg_xml\/java\/introduction.html/d' \
727	    -e '/docs\/gsg_xml_txn\/cxx\/index.html/d' \
728	    -e '/docs\/intro_xml\/BerkeleyDBXML-Intro.pdf/d' \
729	    -e '/docs\/intro_xml\/index.html/d' \
730	    -e '/docs\/java\//d' \
731	    -e '/docs\/ref\/am\/second.javas/d' \
732	    -e '/docs\/ref\/install\/magic.s5.be.txt/d' \
733	    -e '/docs\/ref\/install\/magic.s5.le.txt/d' \
734	    -e '/docs\/ref\/install\/magic.txt/d' \
735	    -e '/docs\/ref\/program\/solaris.txt/d' \
736	    -e '/docs\/ref\/transapp\/hotbackup.txt/d' \
737	    -e '/docs\/ref\/transapp\/transapp.cs/d' \
738	    -e '/docs\/ref\/transapp\/writetest.cs/d' \
739	    > ${UNREF_FILES}
740	[ -s ${UNREF_FILES} ] || rm ${UNREF_FILES}
741}
742
743###################################################
744# Initialization
745###################################################
746T=$PWD/DOC.errors
747rm -rf ${T}
748mkdir ${T}
749
750HTML_FILES=${T}/html_files
751M4_FILES=${T}/m4_files
752SO_FILES=${T}/so_files
753
754trap 'rm -f ${T}/[0-9] \
755    ${HTML_FILES} ${M4_FILES} ${SO_FILES}; exit 0' 0 1 2 3 13 15
756
757###################################################
758# Build list of output files.
759###################################################
760echo "Build local list of all HTML files..."
761find $TARGET -name '*.html' |
762    sed -e '/\/docs\/articles\//d' \
763        -e '/\/docs\/collections\//d' \
764        -e '/\/docs\/gsg\//d' \
765        -e '/\/docs\/gsg_db_rep\//d' \
766        -e '/\/docs\/gsg_txn\//d' \
767        -e '/\/docs\/gsg_xml\//d' \
768        -e '/\/docs\/gsg_xml_txn\//d' \
769        -e '/\/docs\/intro_xml\//d' \
770        -e '/\/docs\/java\//d' \
771        -e '/\/docs\/porting\//d' \
772        | sort > ${HTML_FILES}
773
774[ -z $BDB_SRC ] || f="$f $BDB_SRC/docs_src"
775[ -z $XML_SRC ] || f="$f $XML_SRC/docs_src"
776
777echo "Build list of all M4 source files..."
778find $f -name 'm4.*' | sort -u > ${M4_FILES}
779
780echo "Build list of all .so source files..."
781find $f -name '*.so' -print | sort -u > ${SO_FILES}
782
783if [ $# -eq 1 ]; then
784	of=1
785	maxtest=51
786	while [ $of -le $maxtest ] ; do
787		t$of
788		of=`expr $of + 1`
789	done
790else
791	case $2 in
792	spell)	of=47;;
793	*)	of=$2;;
794	esac
795	t$of
796fi
797
798exit 0
799