html.c revision 114472
1/* html.c -- html-related utilities.
2   $Id: html.c,v 1.8 2002/11/04 22:14:40 karl Exp $
3
4   Copyright (C) 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
5
6   This program is free software; you can redistribute it and/or modify
7   it under the terms of the GNU General Public License as published by
8   the Free Software Foundation; either version 2, or (at your option)
9   any later version.
10
11   This program is distributed in the hope that it will be useful,
12   but WITHOUT ANY WARRANTY; without even the implied warranty of
13   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14   GNU General Public License for more details.
15
16   You should have received a copy of the GNU General Public License
17   along with this program; if not, write to the Free Software Foundation,
18   Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
19
20#include "system.h"
21#include "cmds.h"
22#include "html.h"
23#include "lang.h"
24#include "makeinfo.h"
25#include "sectioning.h"
26
27HSTACK *htmlstack = NULL;
28
29/* See html.h.  */
30int html_output_head_p = 0;
31int html_title_written = 0;
32
33void
34html_output_head ()
35{
36  static char *html_title = NULL;
37
38  if (html_output_head_p)
39    return;
40  html_output_head_p = 1;
41
42  /* The <title> should not have markup, so use text_expansion.  */
43  if (!html_title)
44    html_title = title ? text_expansion (title) : _("Untitled");
45
46  add_word_args ("<html lang=\"%s\">\n<head>\n<title>%s</title>\n",
47                 language_table[language_code].abbrev, html_title);
48
49  add_word ("<meta http-equiv=\"Content-Type\" content=\"text/html");
50  if (document_encoding_code != no_encoding)
51    add_word_args ("; charset=%s",
52                   encoding_table[document_encoding_code].ecname);
53  add_word ("\">\n");
54
55  if (!document_description)
56    document_description = html_title;
57
58  add_word_args ("<meta name=\"description\" content=\"%s\">\n",
59                 document_description);
60  add_word_args ("<meta name=\"generator\" content=\"makeinfo %s\">\n",
61                 VERSION);
62  add_word ("<link href=\"http://www.gnu.org/software/texinfo/\" \
63rel=\"generator-home\">\n");
64
65  if (copying_text)
66    { /* copying_text has already been fully expanded in
67         begin_insertion (by full_expansion), so use insert_ rather than
68         add_.  It is not ideal that we include the html markup here within
69         <head>, but the alternative is to have yet more and different
70         expansions of the copying text.  Yuck.  */
71      insert_string ("<!--\n");
72      insert_string (copying_text);
73      insert_string ("-->\n");
74    }
75
76  add_word ("</head>\n<body>\n");
77
78  if (title && !html_title_written && titlepage_cmd_present)
79    {
80      add_word_args ("<h1 class=\"settitle\">%s</h1>\n", html_title);
81      html_title_written = 1;
82    }
83}
84
85
86/* Escape HTML special characters in the string if necessary,
87   returning a pointer to a possibly newly-allocated one. */
88char *
89escape_string (string)
90     char * string;
91{
92  int i=0, newlen=0;
93  char * newstring;
94
95  do
96    {
97      /* Find how much to allocate. */
98      switch (string[i])
99        {
100        case '&':
101          newlen += 5;          /* `&amp;' */
102          break;
103        case '<':
104        case '>':
105          newlen += 4;          /* `&lt;', `&gt;' */
106          break;
107        default:
108          newlen++;
109        }
110    }
111  while (string[i++]);
112
113  if (newlen == i) return string; /* Already OK. */
114
115  newstring = xmalloc (newlen);
116  i = 0;
117  do
118    {
119      switch (string[i])
120        {
121        case '&':
122          strcpy (newstring, "&amp;");
123          newstring += 5;
124          break;
125        case '<':
126          strcpy (newstring, "&lt;");
127          newstring += 4;
128          break;
129        case '>':
130          strcpy (newstring, "&gt;");
131          newstring += 4;
132          break;
133        default:
134          newstring[0] = string[i];
135          newstring++;
136        }
137    }
138  while (string[i++]);
139  free (string);
140  return newstring - newlen;
141}
142
143/* Save current tag.  */
144void
145push_tag (tag)
146     char *tag;
147{
148  HSTACK *newstack = xmalloc (sizeof (HSTACK));
149
150  newstack->tag = tag;
151  newstack->next = htmlstack;
152  htmlstack = newstack;
153}
154
155/* Get last tag.  */
156void
157pop_tag ()
158{
159  HSTACK *tos = htmlstack;
160
161  if (!tos)
162    {
163      line_error (_("[unexpected] no html tag to pop"));
164      return;
165    }
166
167  htmlstack = htmlstack->next;
168  free (tos);
169}
170
171/* Open or close TAG according to START_OR_END. */
172void
173insert_html_tag (start_or_end, tag)
174     int start_or_end;
175     char *tag;
176{
177  char *old_tag = NULL;
178  int do_return = 0;
179
180  if (!paragraph_is_open && (start_or_end == START))
181    {
182      /* Need to compensate for the <p> we are about to insert, or
183	 else cm_xxx functions that call us will get wrong text
184	 between START and END.  */
185      adjust_braces_following (output_paragraph_offset, 3);
186      add_word ("<p>");
187    }
188
189  if (start_or_end != START)
190    pop_tag (tag);
191
192  if (htmlstack)
193    old_tag = htmlstack->tag;
194
195  if (htmlstack
196      && (strcmp (htmlstack->tag, tag) == 0))
197    do_return = 1;
198
199  if (start_or_end == START)
200    push_tag (tag);
201
202  if (do_return)
203    return;
204
205  /* texinfo.tex doesn't support more than one font attribute
206     at the same time.  */
207  if ((start_or_end == START) && old_tag && *old_tag)
208    {
209      add_word ("</");
210      add_word (old_tag);
211      add_char ('>');
212    }
213
214  if (*tag)
215    {
216      add_char ('<');
217      if (start_or_end != START)
218        add_char ('/');
219      add_word (tag);
220      add_char ('>');
221    }
222
223  if ((start_or_end != START) && old_tag && *old_tag)
224    {
225      add_char ('<');
226      add_word (old_tag);
227      add_char ('>');
228    }
229}
230
231/* Output an HTML <link> to the filename for NODE, including the
232   other string as extra attributes. */
233void
234add_link (nodename, attributes)
235     char *nodename, *attributes;
236{
237  if (nodename)
238    {
239      add_html_elt ("<link ");
240      add_word_args ("%s", attributes);
241      add_word_args (" href=\"");
242      add_anchor_name (nodename, 1);
243      add_word ("\"></a>\n");
244    }
245}
246
247/* Output NAME with characters escaped as appropriate for an anchor
248   name, i.e., escape URL special characters as %<n>.  */
249void
250add_escaped_anchor_name (name)
251     char *name;
252{
253  for (; *name; name++)
254    {
255      if (*name == '&')
256        add_word ("&amp;");
257      else if (! URL_SAFE_CHAR (*name))
258        /* Cast so characters with the high bit set are treated as >128,
259           for example o-umlaut should be 246, not -10.  */
260        add_word_args ("%%%x", (unsigned char) *name);
261      else
262        add_char (*name);
263    }
264}
265
266/* Insert the text for the name of a reference in an HTML anchor
267   appropriate for NODENAME.  If HREF is nonzero, it will be
268   appropriate for a href= attribute, rather than name= i.e., including
269   the `#' if it's an internal reference. */
270void
271add_anchor_name (nodename, href)
272     char *nodename;
273     int href;
274{
275  if (href)
276    {
277      if (splitting)
278	add_url_name (nodename, href);
279      add_char ('#');
280    }
281  /* Always add NODENAME, so that the reference would pinpoint the
282     exact node on its file.  This is so several nodes could share the
283     same file, in case of file-name clashes, but also for more
284     accurate browser positioning.  */
285  if (strcasecmp (nodename, "(dir)") == 0)
286    /* Strip the parens, but keep the original letter-case.  */
287    add_word_args ("%.3s", nodename + 1);
288  else
289    add_escaped_anchor_name (nodename);
290}
291
292/* Insert the text for the name of a reference in an HTML url, aprropriate
293   for NODENAME */
294void
295add_url_name (nodename, href)
296     char *nodename;
297     int href;
298{
299    add_nodename_to_filename (nodename, href);
300}
301
302/* Only allow [-0-9a-zA-Z_.] when nodifying filenames.  This may
303   result in filename clashes; e.g.,
304
305   @node Foo ],,,
306   @node Foo [,,,
307
308   both map to Foo--.html.  If that happens, cm_node will put all
309   the nodes whose file names clash on the same file.  */
310void
311fix_filename (filename)
312     char *filename;
313{
314  char *p;
315  for (p = filename; *p; p++)
316    {
317      if (!(isalnum (*p) || strchr ("-._", *p)))
318	*p = '-';
319    }
320}
321
322/* As we can't look-up a (forward-referenced) nodes' html filename
323   from the tentry, we take the easy way out.  We assume that
324   nodenames are unique, and generate the html filename from the
325   nodename, that's always known.  */
326static char *
327nodename_to_filename_1 (nodename, href)
328     char *nodename;
329     int href;
330{
331  char *p;
332  char *filename;
333  char dirname[PATH_MAX];
334
335  if (strcasecmp (nodename, "Top") == 0)
336    {
337      /* We want to convert references to the Top node into
338	 "index.html#Top".  */
339      if (href)
340	filename = xstrdup ("index.html"); /* "#Top" is added by our callers */
341      else
342	filename = xstrdup ("Top");
343    }
344  else if (strcasecmp (nodename, "(dir)") == 0)
345    /* We want to convert references to the (dir) node into
346       "../index.html".  */
347    filename = xstrdup ("../index.html");
348  else
349    {
350      filename = xmalloc (PATH_MAX);
351      dirname[0] = '\0';
352      *filename = '\0';
353
354      /* Check for external reference: ``(info-document)node-name''
355	 Assume this node lives at: ``../info-document/node-name.html''
356
357	 We need to handle the special case (sigh): ``(info-document)'',
358	 ie, an external top-node, which should translate to:
359	 ``../info-document/info-document.html'' */
360
361      p = nodename;
362      if (*nodename == '(')
363	{
364	  int length;
365
366	  p = strchr (nodename, ')');
367	  if (p == NULL)
368	    {
369	      line_error (_("[unexpected] invalid node name: `%s'"), nodename);
370	      xexit (1);
371	    }
372
373	  length = p - nodename - 1;
374	  if (length > 5 &&
375	      FILENAME_CMPN (p - 5, ".info", 5) == 0)
376	    length -= 5;
377	  /* This is for DOS, and also for Windows and GNU/Linux
378	     systems that might have Info files copied from a DOS 8+3
379	     filesystem.  */
380	  if (length > 4 &&
381	      FILENAME_CMPN (p - 4, ".inf", 4) == 0)
382	    length -= 4;
383	  strcpy (filename, "../");
384	  strncpy (dirname, nodename + 1, length);
385	  *(dirname + length) = '\0';
386	  fix_filename (dirname);
387	  strcat (filename, dirname);
388	  strcat (filename, "/");
389	  p++;
390	}
391
392      /* In the case of just (info-document), there will be nothing
393	 remaining, and we will refer to ../info-document/, which will
394	 work fine.  */
395      strcat (filename, p);
396      if (*p)
397	{
398	  /* Hmm */
399	  fix_filename (filename + strlen (filename) - strlen (p));
400	  strcat (filename, ".html");
401	}
402    }
403
404  /* Produce a file name suitable for the underlying filesystem.  */
405  normalize_filename (filename);
406
407#if 0
408  /* We add ``#Nodified-filename'' anchor to external references to be
409     prepared for non-split HTML support.  Maybe drop this. */
410  if (href && *dirname)
411    {
412      strcat (filename, "#");
413      strcat (filename, p);
414      /* Hmm, again */
415      fix_filename (filename + strlen (filename) - strlen (p));
416    }
417#endif
418
419  return filename;
420}
421
422/* If necessary, ie, if current filename != filename of node, output
423   the node name.  */
424void
425add_nodename_to_filename (nodename, href)
426     char *nodename;
427     int href;
428{
429  /* for now, don't check: always output filename */
430  char *filename = nodename_to_filename_1 (nodename, href);
431  add_word (filename);
432  free (filename);
433}
434
435char *
436nodename_to_filename (nodename)
437     char *nodename;
438{
439  /* The callers of nodename_to_filename use the result to produce
440     <a href=, so call nodename_to_filename_1 with last arg non-zero.  */
441  return nodename_to_filename_1 (nodename, 1);
442}
443