156160Sru/* html.c -- html-related utilities.
2146515Sru   $Id: html.c,v 1.28 2004/12/06 01:13:06 karl Exp $
356160Sru
4146515Sru   Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004 Free Software
5146515Sru   Foundation, Inc.
656160Sru
756160Sru   This program is free software; you can redistribute it and/or modify
856160Sru   it under the terms of the GNU General Public License as published by
956160Sru   the Free Software Foundation; either version 2, or (at your option)
1056160Sru   any later version.
1156160Sru
1256160Sru   This program is distributed in the hope that it will be useful,
1356160Sru   but WITHOUT ANY WARRANTY; without even the implied warranty of
1456160Sru   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
1556160Sru   GNU General Public License for more details.
1656160Sru
1756160Sru   You should have received a copy of the GNU General Public License
1856160Sru   along with this program; if not, write to the Free Software Foundation,
1956160Sru   Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
2056160Sru
2156160Sru#include "system.h"
2256160Sru#include "cmds.h"
23146515Sru#include "files.h"
2456160Sru#include "html.h"
2556160Sru#include "lang.h"
2656160Sru#include "makeinfo.h"
27146515Sru#include "node.h"
2856160Sru#include "sectioning.h"
2956160Sru
3056160Sru
31116525Sru/* Append CHAR to BUFFER, (re)allocating as necessary.  We don't handle
32116525Sru   null characters.  */
33116525Sru
34116525Srutypedef struct
35116525Sru{
36116525Sru  unsigned size;    /* allocated */
37116525Sru  unsigned length;  /* used */
38116525Sru  char *buffer;
39116525Sru} buffer_type;
40116525Sru
41116525Srustatic buffer_type *
42146515Sruinit_buffer (void)
43116525Sru{
44116525Sru  buffer_type *buf = xmalloc (sizeof (buffer_type));
45116525Sru  buf->length = 0;
46116525Sru  buf->size = 0;
47116525Sru  buf->buffer = NULL;
48116525Sru
49116525Sru  return buf;
50116525Sru}
51116525Sru
52116525Srustatic void
53146515Sruappend_char (buffer_type *buf, int c)
54116525Sru{
55116525Sru  buf->length++;
56116525Sru  if (buf->length >= buf->size)
57116525Sru    {
58116525Sru      buf->size += 100;
59116525Sru      buf->buffer = xrealloc (buf->buffer, buf->size);
60116525Sru    }
61116525Sru  buf->buffer[buf->length - 1] = c;
62116525Sru  buf->buffer[buf->length] = 0;
63116525Sru}
64116525Sru
65116525Sru/* Read the cascading style-sheet file FILENAME.  Write out any @import
66116525Sru   commands, which must come first, by the definition of css.  If the
67116525Sru   file contains any actual css code following the @imports, return it;
68116525Sru   else return NULL.  */
69116525Srustatic char *
70146515Sruprocess_css_file (char *filename)
71116525Sru{
72146515Sru  int c;
73146515Sru  int lastchar = 0;
74116525Sru  FILE *f;
75116525Sru  buffer_type *import_text = init_buffer ();
76116525Sru  buffer_type *inline_text = init_buffer ();
77116525Sru  unsigned lineno = 1;
78116525Sru  enum { null_state, comment_state, import_state, inline_state } state
79116525Sru    = null_state, prev_state;
80116525Sru
81146515Sru  prev_state = null_state;
82146515Sru
83116525Sru  /* read from stdin if `-' is the filename.  */
84116525Sru  f = STREQ (filename, "-") ? stdin : fopen (filename, "r");
85116525Sru  if (!f)
86116525Sru    {
87116525Sru      error (_("%s: could not open --css-file: %s"), progname, filename);
88116525Sru      return NULL;
89116525Sru    }
90116525Sru
91116525Sru  /* Read the file.  The @import statements must come at the beginning,
92116525Sru     with only whitespace and comments allowed before any inline css code.  */
93116525Sru  while ((c = getc (f)) >= 0)
94116525Sru    {
95116525Sru      if (c == '\n')
96116525Sru        lineno++;
97116525Sru
98116525Sru      switch (state)
99116525Sru        {
100116525Sru        case null_state: /* between things */
101116525Sru          if (c == '@')
102146515Sru            { /* Only @import and @charset should switch into
103146515Sru                 import_state, other @-commands, such as @media, should
104146515Sru                 put us into inline_state.  I don't think any other css
105146515Sru                 @-commands start with `i' or `c', although of course
106146515Sru                 this will break when such a command is defined.  */
107146515Sru              int nextchar = getc (f);
108146515Sru              if (nextchar == 'i' || nextchar == 'c')
109146515Sru                {
110146515Sru                  append_char (import_text, c);
111146515Sru                  state = import_state;
112146515Sru                }
113146515Sru              else
114146515Sru                {
115146515Sru                  ungetc (nextchar, f);  /* wasn't an @import */
116146515Sru                  state = inline_state;
117146515Sru                }
118116525Sru            }
119116525Sru          else if (c == '/')
120116525Sru            { /* possible start of a comment */
121116525Sru              int nextchar = getc (f);
122116525Sru              if (nextchar == '*')
123116525Sru                state = comment_state;
124116525Sru              else
125116525Sru                {
126116525Sru                  ungetc (nextchar, f); /* wasn't a comment */
127116525Sru                  state = inline_state;
128116525Sru                }
129116525Sru            }
130116525Sru          else if (isspace (c))
131116525Sru            ; /* skip whitespace; maybe should use c_isspace?  */
132116525Sru
133116525Sru          else
134116525Sru            /* not an @import, not a comment, not whitespace: we must
135116525Sru               have started the inline text.  */
136116525Sru            state = inline_state;
137116525Sru
138116525Sru          if (state == inline_state)
139116525Sru            append_char (inline_text, c);
140116525Sru
141116525Sru          if (state != null_state)
142116525Sru            prev_state = null_state;
143116525Sru          break;
144116525Sru
145116525Sru        case comment_state:
146116525Sru          if (c == '/' && lastchar == '*')
147116525Sru            state = prev_state;  /* end of comment */
148116525Sru          break;  /* else ignore this comment char */
149116525Sru
150116525Sru        case import_state:
151116525Sru          append_char (import_text, c);  /* include this import char */
152116525Sru          if (c == ';')
153116525Sru            { /* done with @import */
154116525Sru              append_char (import_text, '\n');  /* make the output nice */
155116525Sru              state = null_state;
156116525Sru              prev_state = import_state;
157116525Sru            }
158116525Sru          break;
159116525Sru
160116525Sru        case inline_state:
161116525Sru          /* No harm in writing out comments, so don't bother parsing
162116525Sru             them out, just append everything.  */
163116525Sru          append_char (inline_text, c);
164116525Sru          break;
165116525Sru        }
166116525Sru
167116525Sru      lastchar = c;
168116525Sru    }
169116525Sru
170116525Sru  /* Reached the end of the file.  We should not be still in a comment.  */
171116525Sru  if (state == comment_state)
172116525Sru    warning (_("%s:%d: --css-file ended in comment"), filename, lineno);
173116525Sru
174116525Sru  /* Write the @import text, if any.  */
175116525Sru  if (import_text->buffer)
176116525Sru    {
177116525Sru      add_word (import_text->buffer);
178116525Sru      free (import_text->buffer);
179116525Sru      free (import_text);
180116525Sru    }
181116525Sru
182116525Sru  /* We're wasting the buffer struct memory, but so what.  */
183116525Sru  return inline_text->buffer;
184116525Sru}
185146515Sru
186146515SruHSTACK *htmlstack = NULL;
187116525Sru
188146515Sru/* See html.h.  */
189146515Sruint html_output_head_p = 0;
190146515Sruint html_title_written = 0;
191116525Sru
192146515Sruvoid
193146515Sruhtml_output_head (void)
194146515Sru{
195146515Sru  static const char *html_title = NULL;
196146515Sru  char *encoding;
197146515Sru
198146515Sru  if (html_output_head_p)
199146515Sru    return;
200146515Sru  html_output_head_p = 1;
201146515Sru
202146515Sru  encoding = current_document_encoding ();
203146515Sru
204146515Sru  /* The <title> should not have markup, so use text_expansion.  */
205146515Sru  if (!html_title)
206146515Sru    html_title = escape_string (title ?
207146515Sru        text_expansion (title) : (char *) _("Untitled"));
208146515Sru
209146515Sru  /* Make sure this is the very first string of the output document.  */
210146515Sru  output_paragraph_offset = 0;
211146515Sru
212146515Sru  add_html_block_elt_args ("<html lang=\"%s\">\n<head>\n",
213146515Sru      language_table[language_code].abbrev);
214146515Sru
215146515Sru  /* When splitting, add current node's name to title if it's available and not
216146515Sru     Top.  */
217146515Sru  if (splitting && current_node && !STREQ (current_node, "Top"))
218146515Sru    add_word_args ("<title>%s - %s</title>\n",
219146515Sru        escape_string (xstrdup (current_node)), html_title);
220146515Sru  else
221146515Sru    add_word_args ("<title>%s</title>\n",  html_title);
222146515Sru
223146515Sru  add_word ("<meta http-equiv=\"Content-Type\" content=\"text/html");
224146515Sru  if (encoding && *encoding)
225146515Sru    add_word_args ("; charset=%s", encoding);
226146515Sru
227146515Sru  add_word ("\">\n");
228146515Sru
229146515Sru  if (!document_description)
230146515Sru    document_description = html_title;
231146515Sru
232146515Sru  add_word_args ("<meta name=\"description\" content=\"%s\">\n",
233146515Sru                 document_description);
234146515Sru  add_word_args ("<meta name=\"generator\" content=\"makeinfo %s\">\n",
235146515Sru                 VERSION);
236146515Sru
237146515Sru  /* Navigation bar links.  */
238146515Sru  if (!splitting)
239146515Sru    add_word ("<link title=\"Top\" rel=\"top\" href=\"#Top\">\n");
240146515Sru  else if (tag_table)
241146515Sru    {
242146515Sru      /* Always put a top link.  */
243146515Sru      add_word ("<link title=\"Top\" rel=\"start\" href=\"index.html#Top\">\n");
244146515Sru
245146515Sru      /* We already have a top link, avoid duplication.  */
246146515Sru      if (tag_table->up && !STREQ (tag_table->up, "Top"))
247146515Sru        add_link (tag_table->up, "rel=\"up\"");
248146515Sru
249146515Sru      if (tag_table->prev)
250146515Sru        add_link (tag_table->prev, "rel=\"prev\"");
251146515Sru
252146515Sru      if (tag_table->next)
253146515Sru        add_link (tag_table->next, "rel=\"next\"");
254146515Sru
255146515Sru      /* fixxme: Look for a way to put links to various indices in the
256146515Sru         document.  Also possible candidates to be added here are First and
257146515Sru         Last links.  */
258146515Sru    }
259146515Sru  else
260146515Sru    {
261146515Sru      /* We are splitting, but we neither have a tag_table.  So this must be
262146515Sru         index.html.  So put a link to Top. */
263146515Sru      add_word ("<link title=\"Top\" rel=\"start\" href=\"#Top\">\n");
264146515Sru    }
265146515Sru
266146515Sru  add_word ("<link href=\"http://www.gnu.org/software/texinfo/\" \
267146515Srurel=\"generator-home\" title=\"Texinfo Homepage\">\n");
268146515Sru
269146515Sru  if (copying_text)
270146515Sru    { /* It is not ideal that we include the html markup here within
271146515Sru         <head>, so we use text_expansion.  */
272146515Sru      insert_string ("<!--\n");
273146515Sru      insert_string (text_expansion (copying_text));
274146515Sru      insert_string ("-->\n");
275146515Sru    }
276146515Sru
277146515Sru  /* Put the style definitions in a comment for the sake of browsers
278146515Sru     that don't support <style>.  */
279146515Sru  add_word ("<meta http-equiv=\"Content-Style-Type\" content=\"text/css\">\n");
280146515Sru  add_word ("<style type=\"text/css\"><!--\n");
281146515Sru
282146515Sru  {
283146515Sru    char *css_inline = NULL;
284146515Sru
285146515Sru    if (css_include)
286146515Sru      /* This writes out any @import commands from the --css-file,
287146515Sru         and returns any actual css code following the imports.  */
288146515Sru      css_inline = process_css_file (css_include);
289146515Sru
290146515Sru    /* This seems cleaner than adding <br>'s at the end of each line for
291146515Sru       these "roman" displays.  It's hardly the end of the world if the
292146515Sru       browser doesn't do <style>s, in any case; they'll just come out in
293146515Sru       typewriter.  */
294146515Sru#define CSS_FONT_INHERIT "font-family:inherit"
295146515Sru    add_word_args ("  pre.display { %s }\n", CSS_FONT_INHERIT);
296146515Sru    add_word_args ("  pre.format  { %s }\n", CSS_FONT_INHERIT);
297146515Sru
298146515Sru    /* Alternatively, we could do <font size=-1> in insertion.c, but this
299146515Sru       way makes it easier to override.  */
300146515Sru#define CSS_FONT_SMALLER "font-size:smaller"
301146515Sru    add_word_args ("  pre.smalldisplay { %s; %s }\n", CSS_FONT_INHERIT,
302146515Sru                   CSS_FONT_SMALLER);
303146515Sru    add_word_args ("  pre.smallformat  { %s; %s }\n", CSS_FONT_INHERIT,
304146515Sru                   CSS_FONT_SMALLER);
305146515Sru    add_word_args ("  pre.smallexample { %s }\n", CSS_FONT_SMALLER);
306146515Sru    add_word_args ("  pre.smalllisp    { %s }\n", CSS_FONT_SMALLER);
307146515Sru
308146515Sru    /* Since HTML doesn't have a sc element, we use span with a bit of
309146515Sru       CSS spice instead.  */
310146515Sru#define CSS_FONT_SMALL_CAPS "font-variant:small-caps"
311146515Sru    add_word_args ("  span.sc    { %s }\n", CSS_FONT_SMALL_CAPS);
312146515Sru
313146515Sru    /* Roman (default) font class, closest we can come.  */
314146515Sru#define CSS_FONT_ROMAN "font-family:serif; font-weight:normal;"
315146515Sru    add_word_args ("  span.roman { %s } \n", CSS_FONT_ROMAN);
316146515Sru
317146515Sru    /* Sans serif font class.  */
318146515Sru#define CSS_FONT_SANSSERIF "font-family:sans-serif; font-weight:normal;"
319146515Sru    add_word_args ("  span.sansserif { %s } \n", CSS_FONT_SANSSERIF);
320146515Sru
321146515Sru    /* Write out any css code from the user's --css-file.  */
322146515Sru    if (css_inline)
323146515Sru      insert_string (css_inline);
324146515Sru
325146515Sru    add_word ("--></style>\n");
326146515Sru  }
327146515Sru
328146515Sru  add_word ("</head>\n<body>\n");
329146515Sru
330146515Sru  if (title && !html_title_written && titlepage_cmd_present)
331146515Sru    {
332146515Sru      add_word_args ("<h1 class=\"settitle\">%s</h1>\n", html_title);
333146515Sru      html_title_written = 1;
334146515Sru    }
335146515Sru
336146515Sru  free (encoding);
337146515Sru}
338116525Sru
33956160Sru/* Escape HTML special characters in the string if necessary,
34056160Sru   returning a pointer to a possibly newly-allocated one. */
34156160Sruchar *
342146515Sruescape_string (char *string)
34356160Sru{
344146515Sru  char *newstring;
345146515Sru  int i = 0, newlen = 0;
34656160Sru
34756160Sru  do
34856160Sru    {
34956160Sru      /* Find how much to allocate. */
35056160Sru      switch (string[i])
35156160Sru        {
352146515Sru        case '"':
353146515Sru          newlen += 6;          /* `&quot;' */
354146515Sru          break;
35556160Sru        case '&':
35656160Sru          newlen += 5;          /* `&amp;' */
35756160Sru          break;
35856160Sru        case '<':
35956160Sru        case '>':
36056160Sru          newlen += 4;          /* `&lt;', `&gt;' */
36156160Sru          break;
36256160Sru        default:
36356160Sru          newlen++;
36456160Sru        }
36556160Sru    }
36693139Sru  while (string[i++]);
36756160Sru
36856160Sru  if (newlen == i) return string; /* Already OK. */
36956160Sru
37093139Sru  newstring = xmalloc (newlen);
37156160Sru  i = 0;
37256160Sru  do
37356160Sru    {
37456160Sru      switch (string[i])
37556160Sru        {
376146515Sru        case '"':
377146515Sru          strcpy (newstring, "&quot;");
378146515Sru          newstring += 6;
379146515Sru          break;
38056160Sru        case '&':
38156160Sru          strcpy (newstring, "&amp;");
38256160Sru          newstring += 5;
38356160Sru          break;
38456160Sru        case '<':
38556160Sru          strcpy (newstring, "&lt;");
38656160Sru          newstring += 4;
38756160Sru          break;
38856160Sru        case '>':
38956160Sru          strcpy (newstring, "&gt;");
39056160Sru          newstring += 4;
39156160Sru          break;
39256160Sru        default:
39356160Sru          newstring[0] = string[i];
39456160Sru          newstring++;
39556160Sru        }
39656160Sru    }
39756160Sru  while (string[i++]);
39856160Sru  free (string);
39993139Sru  return newstring - newlen;
40056160Sru}
401114472Sru
402114472Sru/* Save current tag.  */
403146515Srustatic void
404146515Srupush_tag (char *tag, char *attribs)
405114472Sru{
406114472Sru  HSTACK *newstack = xmalloc (sizeof (HSTACK));
40756160Sru
408114472Sru  newstack->tag = tag;
409146515Sru  newstack->attribs = xstrdup (attribs);
410114472Sru  newstack->next = htmlstack;
411114472Sru  htmlstack = newstack;
412114472Sru}
413114472Sru
414114472Sru/* Get last tag.  */
415146515Srustatic void
416146515Srupop_tag (void)
417114472Sru{
418114472Sru  HSTACK *tos = htmlstack;
419114472Sru
420114472Sru  if (!tos)
421114472Sru    {
422114472Sru      line_error (_("[unexpected] no html tag to pop"));
423114472Sru      return;
424114472Sru    }
425114472Sru
426146515Sru  free (htmlstack->attribs);
427146515Sru
428114472Sru  htmlstack = htmlstack->next;
429114472Sru  free (tos);
430114472Sru}
431114472Sru
432146515Sru/* Check if tag is an empty or a whitespace only element.
433146515Sru   If so, remove it, keeping whitespace intact.  */
434146515Sruint
435146515Srurollback_empty_tag (char *tag)
436146515Sru{
437146515Sru  int check_position = output_paragraph_offset;
438146515Sru  int taglen = strlen (tag);
439146515Sru  int rollback_happened = 0;
440146515Sru  char *contents = "";
441146515Sru  char *contents_canon_white = "";
442146515Sru
443146515Sru  /* If output_paragraph is empty, we cannot rollback :-\  */
444146515Sru  if (output_paragraph_offset <= 0)
445146515Sru    return 0;
446146515Sru
447146515Sru  /* Find the end of the previous tag.  */
448146515Sru  while (output_paragraph[check_position-1] != '>' && check_position > 0)
449146515Sru    check_position--;
450146515Sru
451146515Sru  /* Save stuff between tag's end to output_paragraph's end.  */
452146515Sru  if (check_position != output_paragraph_offset)
453146515Sru    {
454146515Sru      contents = xmalloc (output_paragraph_offset - check_position + 1);
455146515Sru      memcpy (contents, output_paragraph + check_position,
456146515Sru          output_paragraph_offset - check_position);
457146515Sru
458146515Sru      contents[output_paragraph_offset - check_position] = '\0';
459146515Sru
460146515Sru      contents_canon_white = xstrdup (contents);
461146515Sru      canon_white (contents_canon_white);
462146515Sru    }
463146515Sru
464146515Sru  /* Find the start of the previous tag.  */
465146515Sru  while (output_paragraph[check_position-1] != '<' && check_position > 0)
466146515Sru    check_position--;
467146515Sru
468146515Sru  /* Check to see if this is the tag.  */
469146515Sru  if (strncmp ((char *) output_paragraph + check_position, tag, taglen) == 0
470146515Sru      && (whitespace (output_paragraph[check_position + taglen])
471146515Sru          || output_paragraph[check_position + taglen] == '>'))
472146515Sru    {
473146515Sru      if (!contents_canon_white || !*contents_canon_white)
474146515Sru        {
475146515Sru          /* Empty content after whitespace removal, so roll it back.  */
476146515Sru          output_paragraph_offset = check_position - 1;
477146515Sru          rollback_happened = 1;
478146515Sru
479146515Sru          /* Original contents may not be empty (whitespace.)  */
480146515Sru          if (contents && *contents)
481146515Sru            {
482146515Sru              insert_string (contents);
483146515Sru              free (contents);
484146515Sru            }
485146515Sru        }
486146515Sru    }
487146515Sru
488146515Sru  return rollback_happened;
489146515Sru}
490146515Sru
49156160Sru/* Open or close TAG according to START_OR_END. */
49256160Sruvoid
493146515Sru#if defined (VA_FPRINTF) && __STDC__
494146515Sruinsert_html_tag_with_attribute (int start_or_end, char *tag, char *format, ...)
495146515Sru#else
496146515Sruinsert_html_tag_with_attribute (start_or_end, tag, format, va_alist)
49756160Sru     int start_or_end;
49856160Sru     char *tag;
499146515Sru     char *format;
500146515Sru     va_dcl
501146515Sru#endif
50256160Sru{
503114472Sru  char *old_tag = NULL;
504146515Sru  char *old_attribs = NULL;
505146515Sru  char formatted_attribs[2000]; /* xx no fixed limits */
506114472Sru  int do_return = 0;
507146515Sru  extern int in_html_elt;
508114472Sru
50956160Sru  if (start_or_end != START)
510116525Sru    pop_tag ();
511114472Sru
512114472Sru  if (htmlstack)
513146515Sru    {
514146515Sru      old_tag = htmlstack->tag;
515146515Sru      old_attribs = htmlstack->attribs;
516146515Sru    }
517146515Sru
518146515Sru  if (format)
519146515Sru    {
520146515Sru#ifdef VA_SPRINTF
521146515Sru      va_list ap;
522146515Sru#endif
523114472Sru
524146515Sru      VA_START (ap, format);
525146515Sru#ifdef VA_SPRINTF
526146515Sru      VA_SPRINTF (formatted_attribs, format, ap);
527146515Sru#else
528146515Sru      sprintf (formatted_attribs, format, a1, a2, a3, a4, a5, a6, a7, a8);
529146515Sru#endif
530146515Sru      va_end (ap);
531146515Sru    }
532146515Sru  else
533146515Sru    formatted_attribs[0] = '\0';
534146515Sru
535146515Sru  /* Exception: can nest multiple spans.  */
536114472Sru  if (htmlstack
537146515Sru      && STREQ (htmlstack->tag, tag)
538146515Sru      && !(STREQ (tag, "span") && STREQ (old_attribs, formatted_attribs)))
539114472Sru    do_return = 1;
540114472Sru
541114472Sru  if (start_or_end == START)
542146515Sru    push_tag (tag, formatted_attribs);
543114472Sru
544114472Sru  if (do_return)
545114472Sru    return;
546114472Sru
547146515Sru  in_html_elt++;
548146515Sru
549114472Sru  /* texinfo.tex doesn't support more than one font attribute
550114472Sru     at the same time.  */
551146515Sru  if ((start_or_end == START) && old_tag && *old_tag
552146515Sru      && !rollback_empty_tag (old_tag))
553146515Sru    add_word_args ("</%s>", old_tag);
554114472Sru
555114472Sru  if (*tag)
556114472Sru    {
557146515Sru      if (start_or_end == START)
558146515Sru        add_word_args (format ? "<%s %s>" : "<%s>", tag, formatted_attribs);
559146515Sru      else if (!rollback_empty_tag (tag))
560146515Sru        /* Insert close tag only if we didn't rollback,
561146515Sru           in which case the opening tag is removed.  */
562146515Sru        add_word_args ("</%s>", tag);
563114472Sru    }
564114472Sru
565114472Sru  if ((start_or_end != START) && old_tag && *old_tag)
566146515Sru    add_word_args (strlen (old_attribs) > 0 ? "<%s %s>" : "<%s>",
567146515Sru        old_tag, old_attribs);
568146515Sru
569146515Sru  in_html_elt--;
57056160Sru}
57156160Sru
572146515Sruvoid
573146515Sruinsert_html_tag (int start_or_end, char *tag)
574146515Sru{
575146515Sru  insert_html_tag_with_attribute (start_or_end, tag, NULL);
576146515Sru}
577116525Sru
57856160Sru/* Output an HTML <link> to the filename for NODE, including the
57956160Sru   other string as extra attributes. */
58056160Sruvoid
581146515Sruadd_link (char *nodename, char *attributes)
58256160Sru{
58393139Sru  if (nodename)
58456160Sru    {
58593139Sru      add_html_elt ("<link ");
58693139Sru      add_word_args ("%s", attributes);
58793139Sru      add_word_args (" href=\"");
58893139Sru      add_anchor_name (nodename, 1);
589146515Sru      add_word_args ("\" title=\"%s\">\n", nodename);
59056160Sru    }
59156160Sru}
59256160Sru
59356160Sru/* Output NAME with characters escaped as appropriate for an anchor
594146515Sru   name, i.e., escape URL special characters with our _00hh convention
595146515Sru   if OLD is zero.  (See the manual for details on the new scheme.)
596146515Sru
597146515Sru   If OLD is nonzero, generate the node name with the 4.6-and-earlier
598146515Sru   convention of %hh (and more special characters output as-is, notably
599146515Sru   - and *).  This is only so that external references to old names can
600146515Sru   still work with HTML generated by the new makeinfo; the gcc folks
601146515Sru   needed this.  Our own HTML does not refer to these names.  */
602146515Sru
60356160Sruvoid
604146515Sruadd_escaped_anchor_name (char *name, int old)
60556160Sru{
606146515Sru  canon_white (name);
607146515Sru
608146515Sru  if (!old && !strchr ("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ",
609146515Sru                       *name))
610146515Sru    { /* XHTML does not allow anything but an ASCII letter to start an
611146515Sru         identifier.  Therefore kludge in this constant string if we
612146515Sru         have a nonletter.  */
613146515Sru      add_word ("g_t");
614146515Sru    }
615146515Sru
61656160Sru  for (; *name; name++)
61756160Sru    {
618146515Sru      if (cr_or_whitespace (*name))
619146515Sru        add_char ('-');
620146515Sru
621146515Sru      else if (!old && !URL_SAFE_CHAR (*name))
62256160Sru        /* Cast so characters with the high bit set are treated as >128,
62356160Sru           for example o-umlaut should be 246, not -10.  */
624146515Sru        add_word_args ("_00%x", (unsigned char) *name);
625146515Sru
626146515Sru      else if (old && !URL_SAFE_CHAR (*name) && !OLD_URL_SAFE_CHAR (*name))
627146515Sru        /* Different output convention, but still cast as above.  */
62856160Sru        add_word_args ("%%%x", (unsigned char) *name);
629146515Sru
63056160Sru      else
63156160Sru        add_char (*name);
63256160Sru    }
63356160Sru}
63456160Sru
63556160Sru/* Insert the text for the name of a reference in an HTML anchor
636146515Sru   appropriate for NODENAME.
637146515Sru
638146515Sru   If HREF is zero, generate text for name= in the new node name
639146515Sru     conversion convention.
640146515Sru   If HREF is negative, generate text for name= in the old convention.
641146515Sru   If HREF is positive, generate the name for an href= attribute, i.e.,
642146515Sru     including the `#' if it's an internal reference.   */
64356160Sruvoid
644146515Sruadd_anchor_name (char *nodename, int href)
64556160Sru{
646146515Sru  if (href > 0)
64793139Sru    {
64893139Sru      if (splitting)
64993139Sru	add_url_name (nodename, href);
65093139Sru      add_char ('#');
65193139Sru    }
65293139Sru  /* Always add NODENAME, so that the reference would pinpoint the
65393139Sru     exact node on its file.  This is so several nodes could share the
65493139Sru     same file, in case of file-name clashes, but also for more
65593139Sru     accurate browser positioning.  */
65693139Sru  if (strcasecmp (nodename, "(dir)") == 0)
65793139Sru    /* Strip the parens, but keep the original letter-case.  */
65893139Sru    add_word_args ("%.3s", nodename + 1);
659146515Sru  else if (strcasecmp (nodename, "top") == 0)
660146515Sru    add_word ("Top");
66193139Sru  else
662146515Sru    add_escaped_anchor_name (nodename, href < 0);
66393139Sru}
66456160Sru
66593139Sru/* Insert the text for the name of a reference in an HTML url, aprropriate
66693139Sru   for NODENAME */
66793139Sruvoid
668146515Sruadd_url_name (char *nodename, int href)
66993139Sru{
67093139Sru    add_nodename_to_filename (nodename, href);
67156160Sru}
67293139Sru
673146515Sru/* Convert non [A-Za-z0-9] to _00xx, where xx means the hexadecimal
674146515Sru   representation of the ASCII character.  Also convert spaces and
675146515Sru   newlines to dashes.  */
676146515Srustatic void
677146515Srufix_filename (char *filename)
678146515Sru{
679146515Sru  int i;
680146515Sru  int len = strlen (filename);
681146515Sru  char *oldname = xstrdup (filename);
68293139Sru
683146515Sru  *filename = '\0';
68493139Sru
685146515Sru  for (i = 0; i < len; i++)
68693139Sru    {
687146515Sru      if (cr_or_whitespace (oldname[i]))
688146515Sru        strcat (filename, "-");
689146515Sru      else if (URL_SAFE_CHAR (oldname[i]))
690146515Sru        strncat (filename, (char *) oldname + i, 1);
691146515Sru      else
692146515Sru        {
693146515Sru          char *hexchar = xmalloc (6 * sizeof (char));
694146515Sru          sprintf (hexchar, "_00%x", (unsigned char) oldname[i]);
695146515Sru          strcat (filename, hexchar);
696146515Sru          free (hexchar);
697146515Sru        }
698146515Sru
699146515Sru      /* Check if we are nearing boundaries.  */
700146515Sru      if (strlen (filename) >= PATH_MAX - 20)
701146515Sru        break;
70293139Sru    }
703146515Sru
704146515Sru  free (oldname);
70593139Sru}
70693139Sru
70793139Sru/* As we can't look-up a (forward-referenced) nodes' html filename
70893139Sru   from the tentry, we take the easy way out.  We assume that
70993139Sru   nodenames are unique, and generate the html filename from the
71093139Sru   nodename, that's always known.  */
71193139Srustatic char *
712146515Srunodename_to_filename_1 (char *nodename, int href)
71393139Sru{
71493139Sru  char *p;
71593139Sru  char *filename;
71693139Sru  char dirname[PATH_MAX];
71793139Sru
71893139Sru  if (strcasecmp (nodename, "Top") == 0)
71993139Sru    {
72093139Sru      /* We want to convert references to the Top node into
72193139Sru	 "index.html#Top".  */
72293139Sru      if (href)
72393139Sru	filename = xstrdup ("index.html"); /* "#Top" is added by our callers */
72493139Sru      else
72593139Sru	filename = xstrdup ("Top");
72693139Sru    }
72793139Sru  else if (strcasecmp (nodename, "(dir)") == 0)
72893139Sru    /* We want to convert references to the (dir) node into
72993139Sru       "../index.html".  */
73093139Sru    filename = xstrdup ("../index.html");
73193139Sru  else
73293139Sru    {
73393139Sru      filename = xmalloc (PATH_MAX);
73493139Sru      dirname[0] = '\0';
73593139Sru      *filename = '\0';
73693139Sru
73793139Sru      /* Check for external reference: ``(info-document)node-name''
73893139Sru	 Assume this node lives at: ``../info-document/node-name.html''
73993139Sru
74093139Sru	 We need to handle the special case (sigh): ``(info-document)'',
74193139Sru	 ie, an external top-node, which should translate to:
74293139Sru	 ``../info-document/info-document.html'' */
74393139Sru
74493139Sru      p = nodename;
74593139Sru      if (*nodename == '(')
74693139Sru	{
74793139Sru	  int length;
74893139Sru
74993139Sru	  p = strchr (nodename, ')');
75093139Sru	  if (p == NULL)
75193139Sru	    {
752114472Sru	      line_error (_("[unexpected] invalid node name: `%s'"), nodename);
753114472Sru	      xexit (1);
75493139Sru	    }
75593139Sru
75693139Sru	  length = p - nodename - 1;
75793139Sru	  if (length > 5 &&
75893139Sru	      FILENAME_CMPN (p - 5, ".info", 5) == 0)
75993139Sru	    length -= 5;
76093139Sru	  /* This is for DOS, and also for Windows and GNU/Linux
76193139Sru	     systems that might have Info files copied from a DOS 8+3
76293139Sru	     filesystem.  */
76393139Sru	  if (length > 4 &&
76493139Sru	      FILENAME_CMPN (p - 4, ".inf", 4) == 0)
76593139Sru	    length -= 4;
76693139Sru	  strcpy (filename, "../");
76793139Sru	  strncpy (dirname, nodename + 1, length);
76893139Sru	  *(dirname + length) = '\0';
76993139Sru	  fix_filename (dirname);
77093139Sru	  strcat (filename, dirname);
77193139Sru	  strcat (filename, "/");
77293139Sru	  p++;
77393139Sru	}
77493139Sru
77593139Sru      /* In the case of just (info-document), there will be nothing
77693139Sru	 remaining, and we will refer to ../info-document/, which will
77793139Sru	 work fine.  */
77893139Sru      strcat (filename, p);
77993139Sru      if (*p)
78093139Sru	{
78193139Sru	  /* Hmm */
78293139Sru	  fix_filename (filename + strlen (filename) - strlen (p));
78393139Sru	  strcat (filename, ".html");
78493139Sru	}
78593139Sru    }
78693139Sru
78793139Sru  /* Produce a file name suitable for the underlying filesystem.  */
78893139Sru  normalize_filename (filename);
78993139Sru
79093139Sru#if 0
79193139Sru  /* We add ``#Nodified-filename'' anchor to external references to be
79293139Sru     prepared for non-split HTML support.  Maybe drop this. */
79393139Sru  if (href && *dirname)
79493139Sru    {
79593139Sru      strcat (filename, "#");
79693139Sru      strcat (filename, p);
79793139Sru      /* Hmm, again */
79893139Sru      fix_filename (filename + strlen (filename) - strlen (p));
79993139Sru    }
80093139Sru#endif
80193139Sru
80293139Sru  return filename;
80393139Sru}
80493139Sru
80593139Sru/* If necessary, ie, if current filename != filename of node, output
80693139Sru   the node name.  */
80793139Sruvoid
808146515Sruadd_nodename_to_filename (char *nodename, int href)
80993139Sru{
81093139Sru  /* for now, don't check: always output filename */
81193139Sru  char *filename = nodename_to_filename_1 (nodename, href);
81293139Sru  add_word (filename);
81393139Sru  free (filename);
81493139Sru}
81593139Sru
81693139Sruchar *
817146515Srunodename_to_filename (char *nodename)
81893139Sru{
81993139Sru  /* The callers of nodename_to_filename use the result to produce
82093139Sru     <a href=, so call nodename_to_filename_1 with last arg non-zero.  */
82193139Sru  return nodename_to_filename_1 (nodename, 1);
82293139Sru}
823