html.c revision 116525
1/* html.c -- html-related utilities.
2   $Id: html.c,v 1.18 2003/06/02 12:32:29 karl Exp $
3
4   Copyright (C) 1999, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
5
6   This program is free software; you can redistribute it and/or modify
7   it under the terms of the GNU General Public License as published by
8   the Free Software Foundation; either version 2, or (at your option)
9   any later version.
10
11   This program is distributed in the hope that it will be useful,
12   but WITHOUT ANY WARRANTY; without even the implied warranty of
13   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14   GNU General Public License for more details.
15
16   You should have received a copy of the GNU General Public License
17   along with this program; if not, write to the Free Software Foundation,
18   Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
19
20#include "system.h"
21#include "cmds.h"
22#include "html.h"
23#include "lang.h"
24#include "makeinfo.h"
25#include "sectioning.h"
26
27HSTACK *htmlstack = NULL;
28
29static char *process_css_file (/* char * */);
30
31/* See html.h.  */
32int html_output_head_p = 0;
33int html_title_written = 0;
34
35
36void
37html_output_head ()
38{
39  static const char *html_title = NULL;
40
41  if (html_output_head_p)
42    return;
43  html_output_head_p = 1;
44
45  /* The <title> should not have markup, so use text_expansion.  */
46  if (!html_title)
47    html_title = title ? text_expansion (title) : _("Untitled");
48
49  add_word_args ("<html lang=\"%s\">\n<head>\n<title>%s</title>\n",
50                 language_table[language_code].abbrev, html_title);
51
52  add_word ("<meta http-equiv=\"Content-Type\" content=\"text/html");
53  if (document_encoding_code != no_encoding)
54    add_word_args ("; charset=%s",
55                   encoding_table[document_encoding_code].encname);
56  add_word ("\">\n");
57
58  if (!document_description)
59    document_description = html_title;
60
61  add_word_args ("<meta name=\"description\" content=\"%s\">\n",
62                 document_description);
63  add_word_args ("<meta name=\"generator\" content=\"makeinfo %s\">\n",
64                 VERSION);
65#if 0
66  /* let's not do this now, since it causes mozilla to put up a
67     navigation bar.  */
68  add_word ("<link href=\"http://www.gnu.org/software/texinfo/\" \
69rel=\"generator-home\">\n");
70#endif
71
72  if (copying_text)
73    { /* copying_text has already been fully expanded in
74         begin_insertion (by full_expansion), so use insert_ rather than
75         add_.  It is not ideal that we include the html markup here within
76         <head>, but the alternative is to have yet more and different
77         expansions of the copying text.  Yuck.  */
78      insert_string ("<!--\n");
79      insert_string (copying_text);
80      insert_string ("-->\n");
81    }
82
83  /* Put the style definitions in a comment for the sake of browsers
84     that don't support <style>.  */
85  add_word ("<meta http-equiv=\"Content-Style-Type\" content=\"text/css\">\n");
86  add_word ("<style type=\"text/css\"><!--\n");
87
88  {
89    char *css_inline = NULL;
90
91    if (css_include)
92      /* This writes out any @import commands from the --css-file,
93         and returns any actual css code following the imports.  */
94      css_inline = process_css_file (css_include);
95
96    /* This seems cleaner than adding <br>'s at the end of each line for
97       these "roman" displays.  It's hardly the end of the world if the
98       browser doesn't do <style>s, in any case; they'll just come out in
99       typewriter.  */
100#define CSS_FONT_INHERIT "font-family:inherit"
101    add_word_args ("  pre.display { %s }\n", CSS_FONT_INHERIT);
102    add_word_args ("  pre.format  { %s }\n", CSS_FONT_INHERIT);
103
104    /* Alternatively, we could do <font size=-1> in insertion.c, but this
105       way makes it easier to override.  */
106#define CSS_FONT_SMALLER "font-size:smaller"
107    add_word_args ("  pre.smalldisplay { %s; %s }\n", CSS_FONT_INHERIT,
108                   CSS_FONT_SMALLER);
109    add_word_args ("  pre.smallformat  { %s; %s }\n", CSS_FONT_INHERIT,
110                   CSS_FONT_SMALLER);
111    add_word_args ("  pre.smallexample { %s }\n", CSS_FONT_SMALLER);
112    add_word_args ("  pre.smalllisp    { %s }\n", CSS_FONT_SMALLER);
113
114    /* Write out any css code from the user's --css-file.  */
115    if (css_inline)
116      add_word (css_inline);
117
118    add_word ("--></style>\n");
119  }
120
121  add_word ("</head>\n<body>\n");
122
123  if (title && !html_title_written && titlepage_cmd_present)
124    {
125      add_word_args ("<h1 class=\"settitle\">%s</h1>\n", html_title);
126      html_title_written = 1;
127    }
128}
129
130
131
132/* Append CHAR to BUFFER, (re)allocating as necessary.  We don't handle
133   null characters.  */
134
135typedef struct
136{
137  unsigned size;    /* allocated */
138  unsigned length;  /* used */
139  char *buffer;
140} buffer_type;
141
142
143static buffer_type *
144init_buffer ()
145{
146  buffer_type *buf = xmalloc (sizeof (buffer_type));
147  buf->length = 0;
148  buf->size = 0;
149  buf->buffer = NULL;
150
151  return buf;
152}
153
154
155static void
156append_char (buf, c)
157    buffer_type *buf;
158    int c;
159{
160  buf->length++;
161  if (buf->length >= buf->size)
162    {
163      buf->size += 100;
164      buf->buffer = xrealloc (buf->buffer, buf->size);
165    }
166  buf->buffer[buf->length - 1] = c;
167  buf->buffer[buf->length] = 0;
168}
169
170
171/* Read the cascading style-sheet file FILENAME.  Write out any @import
172   commands, which must come first, by the definition of css.  If the
173   file contains any actual css code following the @imports, return it;
174   else return NULL.  */
175
176static char *
177process_css_file (filename)
178    char *filename;
179{
180  int c, lastchar;
181  FILE *f;
182  buffer_type *import_text = init_buffer ();
183  buffer_type *inline_text = init_buffer ();
184  unsigned lineno = 1;
185  enum { null_state, comment_state, import_state, inline_state } state
186    = null_state, prev_state;
187
188  /* read from stdin if `-' is the filename.  */
189  f = STREQ (filename, "-") ? stdin : fopen (filename, "r");
190  if (!f)
191    {
192      error (_("%s: could not open --css-file: %s"), progname, filename);
193      return NULL;
194    }
195
196  /* Read the file.  The @import statements must come at the beginning,
197     with only whitespace and comments allowed before any inline css code.  */
198  while ((c = getc (f)) >= 0)
199    {
200      if (c == '\n')
201        lineno++;
202
203      switch (state)
204        {
205        case null_state: /* between things */
206          if (c == '@')
207            {
208              /* If there's some other @command, just call it an
209                 import, it's all the same to us.  So don't bother
210                 looking for the `import'.  */
211              append_char (import_text, c);
212              state = import_state;
213            }
214          else if (c == '/')
215            { /* possible start of a comment */
216              int nextchar = getc (f);
217              if (nextchar == '*')
218                state = comment_state;
219              else
220                {
221                  ungetc (nextchar, f); /* wasn't a comment */
222                  state = inline_state;
223                }
224            }
225          else if (isspace (c))
226            ; /* skip whitespace; maybe should use c_isspace?  */
227
228          else
229            /* not an @import, not a comment, not whitespace: we must
230               have started the inline text.  */
231            state = inline_state;
232
233          if (state == inline_state)
234            append_char (inline_text, c);
235
236          if (state != null_state)
237            prev_state = null_state;
238          break;
239
240        case comment_state:
241          if (c == '/' && lastchar == '*')
242            state = prev_state;  /* end of comment */
243          break;  /* else ignore this comment char */
244
245        case import_state:
246          append_char (import_text, c);  /* include this import char */
247          if (c == ';')
248            { /* done with @import */
249              append_char (import_text, '\n');  /* make the output nice */
250              state = null_state;
251              prev_state = import_state;
252            }
253          break;
254
255        case inline_state:
256          /* No harm in writing out comments, so don't bother parsing
257             them out, just append everything.  */
258          append_char (inline_text, c);
259          break;
260        }
261
262      lastchar = c;
263    }
264
265  /* Reached the end of the file.  We should not be still in a comment.  */
266  if (state == comment_state)
267    warning (_("%s:%d: --css-file ended in comment"), filename, lineno);
268
269  /* Write the @import text, if any.  */
270  if (import_text->buffer)
271    {
272      add_word (import_text->buffer);
273      free (import_text->buffer);
274      free (import_text);
275    }
276
277  /* We're wasting the buffer struct memory, but so what.  */
278  return inline_text->buffer;
279}
280
281
282
283/* Escape HTML special characters in the string if necessary,
284   returning a pointer to a possibly newly-allocated one. */
285char *
286escape_string (string)
287     char * string;
288{
289  int i=0, newlen=0;
290  char * newstring;
291
292  do
293    {
294      /* Find how much to allocate. */
295      switch (string[i])
296        {
297        case '&':
298          newlen += 5;          /* `&amp;' */
299          break;
300        case '<':
301        case '>':
302          newlen += 4;          /* `&lt;', `&gt;' */
303          break;
304        default:
305          newlen++;
306        }
307    }
308  while (string[i++]);
309
310  if (newlen == i) return string; /* Already OK. */
311
312  newstring = xmalloc (newlen);
313  i = 0;
314  do
315    {
316      switch (string[i])
317        {
318        case '&':
319          strcpy (newstring, "&amp;");
320          newstring += 5;
321          break;
322        case '<':
323          strcpy (newstring, "&lt;");
324          newstring += 4;
325          break;
326        case '>':
327          strcpy (newstring, "&gt;");
328          newstring += 4;
329          break;
330        default:
331          newstring[0] = string[i];
332          newstring++;
333        }
334    }
335  while (string[i++]);
336  free (string);
337  return newstring - newlen;
338}
339
340
341
342/* Save current tag.  */
343void
344push_tag (tag)
345     char *tag;
346{
347  HSTACK *newstack = xmalloc (sizeof (HSTACK));
348
349  newstack->tag = tag;
350  newstack->next = htmlstack;
351  htmlstack = newstack;
352}
353
354/* Get last tag.  */
355void
356pop_tag ()
357{
358  HSTACK *tos = htmlstack;
359
360  if (!tos)
361    {
362      line_error (_("[unexpected] no html tag to pop"));
363      return;
364    }
365
366  htmlstack = htmlstack->next;
367  free (tos);
368}
369
370/* Open or close TAG according to START_OR_END. */
371void
372insert_html_tag (start_or_end, tag)
373     int start_or_end;
374     char *tag;
375{
376  char *old_tag = NULL;
377  int do_return = 0;
378
379  if (!paragraph_is_open && (start_or_end == START))
380    {
381      /* Need to compensate for the <p> we are about to insert, or
382	 else cm_xxx functions that call us will get wrong text
383	 between START and END.  */
384      adjust_braces_following (output_paragraph_offset, 3);
385      add_word ("<p>");
386    }
387
388  if (start_or_end != START)
389    pop_tag ();
390
391  if (htmlstack)
392    old_tag = htmlstack->tag;
393
394  if (htmlstack
395      && (strcmp (htmlstack->tag, tag) == 0))
396    do_return = 1;
397
398  if (start_or_end == START)
399    push_tag (tag);
400
401  if (do_return)
402    return;
403
404  /* texinfo.tex doesn't support more than one font attribute
405     at the same time.  */
406  if ((start_or_end == START) && old_tag && *old_tag)
407    {
408      add_word ("</");
409      add_word (old_tag);
410      add_char ('>');
411    }
412
413  if (*tag)
414    {
415      add_char ('<');
416      if (start_or_end != START)
417        add_char ('/');
418      add_word (tag);
419      add_char ('>');
420    }
421
422  if ((start_or_end != START) && old_tag && *old_tag)
423    {
424      add_char ('<');
425      add_word (old_tag);
426      add_char ('>');
427    }
428}
429
430
431
432/* Output an HTML <link> to the filename for NODE, including the
433   other string as extra attributes. */
434void
435add_link (nodename, attributes)
436     char *nodename, *attributes;
437{
438  if (nodename)
439    {
440      add_html_elt ("<link ");
441      add_word_args ("%s", attributes);
442      add_word_args (" href=\"");
443      add_anchor_name (nodename, 1);
444      add_word ("\">\n");
445    }
446}
447
448/* Output NAME with characters escaped as appropriate for an anchor
449   name, i.e., escape URL special characters as %<n>.  */
450void
451add_escaped_anchor_name (name)
452     char *name;
453{
454  for (; *name; name++)
455    {
456      if (*name == '&')
457        add_word ("&amp;");
458      else if (! URL_SAFE_CHAR (*name))
459        /* Cast so characters with the high bit set are treated as >128,
460           for example o-umlaut should be 246, not -10.  */
461        add_word_args ("%%%x", (unsigned char) *name);
462      else
463        add_char (*name);
464    }
465}
466
467/* Insert the text for the name of a reference in an HTML anchor
468   appropriate for NODENAME.  If HREF is nonzero, it will be
469   appropriate for a href= attribute, rather than name= i.e., including
470   the `#' if it's an internal reference. */
471void
472add_anchor_name (nodename, href)
473     char *nodename;
474     int href;
475{
476  if (href)
477    {
478      if (splitting)
479	add_url_name (nodename, href);
480      add_char ('#');
481    }
482  /* Always add NODENAME, so that the reference would pinpoint the
483     exact node on its file.  This is so several nodes could share the
484     same file, in case of file-name clashes, but also for more
485     accurate browser positioning.  */
486  if (strcasecmp (nodename, "(dir)") == 0)
487    /* Strip the parens, but keep the original letter-case.  */
488    add_word_args ("%.3s", nodename + 1);
489  else
490    add_escaped_anchor_name (nodename);
491}
492
493/* Insert the text for the name of a reference in an HTML url, aprropriate
494   for NODENAME */
495void
496add_url_name (nodename, href)
497     char *nodename;
498     int href;
499{
500    add_nodename_to_filename (nodename, href);
501}
502
503/* Only allow [-0-9a-zA-Z_.] when nodifying filenames.  This may
504   result in filename clashes; e.g.,
505
506   @node Foo ],,,
507   @node Foo [,,,
508
509   both map to Foo--.html.  If that happens, cm_node will put all
510   the nodes whose file names clash on the same file.  */
511void
512fix_filename (filename)
513     char *filename;
514{
515  char *p;
516  for (p = filename; *p; p++)
517    {
518      if (!(isalnum (*p) || strchr ("-._", *p)))
519	*p = '-';
520    }
521}
522
523/* As we can't look-up a (forward-referenced) nodes' html filename
524   from the tentry, we take the easy way out.  We assume that
525   nodenames are unique, and generate the html filename from the
526   nodename, that's always known.  */
527static char *
528nodename_to_filename_1 (nodename, href)
529     char *nodename;
530     int href;
531{
532  char *p;
533  char *filename;
534  char dirname[PATH_MAX];
535
536  if (strcasecmp (nodename, "Top") == 0)
537    {
538      /* We want to convert references to the Top node into
539	 "index.html#Top".  */
540      if (href)
541	filename = xstrdup ("index.html"); /* "#Top" is added by our callers */
542      else
543	filename = xstrdup ("Top");
544    }
545  else if (strcasecmp (nodename, "(dir)") == 0)
546    /* We want to convert references to the (dir) node into
547       "../index.html".  */
548    filename = xstrdup ("../index.html");
549  else
550    {
551      filename = xmalloc (PATH_MAX);
552      dirname[0] = '\0';
553      *filename = '\0';
554
555      /* Check for external reference: ``(info-document)node-name''
556	 Assume this node lives at: ``../info-document/node-name.html''
557
558	 We need to handle the special case (sigh): ``(info-document)'',
559	 ie, an external top-node, which should translate to:
560	 ``../info-document/info-document.html'' */
561
562      p = nodename;
563      if (*nodename == '(')
564	{
565	  int length;
566
567	  p = strchr (nodename, ')');
568	  if (p == NULL)
569	    {
570	      line_error (_("[unexpected] invalid node name: `%s'"), nodename);
571	      xexit (1);
572	    }
573
574	  length = p - nodename - 1;
575	  if (length > 5 &&
576	      FILENAME_CMPN (p - 5, ".info", 5) == 0)
577	    length -= 5;
578	  /* This is for DOS, and also for Windows and GNU/Linux
579	     systems that might have Info files copied from a DOS 8+3
580	     filesystem.  */
581	  if (length > 4 &&
582	      FILENAME_CMPN (p - 4, ".inf", 4) == 0)
583	    length -= 4;
584	  strcpy (filename, "../");
585	  strncpy (dirname, nodename + 1, length);
586	  *(dirname + length) = '\0';
587	  fix_filename (dirname);
588	  strcat (filename, dirname);
589	  strcat (filename, "/");
590	  p++;
591	}
592
593      /* In the case of just (info-document), there will be nothing
594	 remaining, and we will refer to ../info-document/, which will
595	 work fine.  */
596      strcat (filename, p);
597      if (*p)
598	{
599	  /* Hmm */
600	  fix_filename (filename + strlen (filename) - strlen (p));
601	  strcat (filename, ".html");
602	}
603    }
604
605  /* Produce a file name suitable for the underlying filesystem.  */
606  normalize_filename (filename);
607
608#if 0
609  /* We add ``#Nodified-filename'' anchor to external references to be
610     prepared for non-split HTML support.  Maybe drop this. */
611  if (href && *dirname)
612    {
613      strcat (filename, "#");
614      strcat (filename, p);
615      /* Hmm, again */
616      fix_filename (filename + strlen (filename) - strlen (p));
617    }
618#endif
619
620  return filename;
621}
622
623/* If necessary, ie, if current filename != filename of node, output
624   the node name.  */
625void
626add_nodename_to_filename (nodename, href)
627     char *nodename;
628     int href;
629{
630  /* for now, don't check: always output filename */
631  char *filename = nodename_to_filename_1 (nodename, href);
632  add_word (filename);
633  free (filename);
634}
635
636char *
637nodename_to_filename (nodename)
638     char *nodename;
639{
640  /* The callers of nodename_to_filename use the result to produce
641     <a href=, so call nodename_to_filename_1 with last arg non-zero.  */
642  return nodename_to_filename_1 (nodename, 1);
643}
644