1# $NetBSD: nanpa.sed,v 1.3 2023/01/28 13:12:16 jmcneill Exp $
2#
3# Parse HTML tables output by 
4#   http://docs.nanpa.com/cgi-bin/npa_reports/nanpa
5# Specifically, for each html table row (TR),
6# print the <TD> elements separated by colons.
7#
8# This could break on HTML comments.
9#
10:top
11#				Strip ^Ms
12s/
13//g
14#				Join all lines with unterminated HTML tags
15/<[^>]*$/{
16	N
17	b top
18}
19#				Replace all </TR> with EOL tag
20s;</[Tt][Rr]>;$;g
21# 				Join lines with only <TR>.
22/<[Tt][Rr][^>]*>$/{
23	N
24	s/\n//g
25	b top
26}
27#				Also, join all lines starting with <TR>.
28/<[TtRr][^>]*>[^$]*$/{
29	N
30	s/\n//g
31	b top
32}
33#				Remove EOL markers
34s/\$$//
35#				Remove lines not starting with <TR>
36/<[Tt][Rr][^>]*>/!d
37#				Replace all <TD> with colon
38s/[ 	]*<[Tt][Dd][^>]*> */:/g
39#				Strip all HTML tags
40s/<[^>]*>//g
41#				Handle HTML characters
42s/&nbsp;/ /g
43#				Compress spaces/tabs
44s/[ 	][ 	]*/ /g
45#				Strip leading colons
46s/://
47#				Strip leading/trailing whitespace
48s/ *//
49s/ $//
50#				Strip HTML comments
51s/^--.*$//
52