156160Sru/* html.c -- html-related utilities. 2146515Sru $Id: html.c,v 1.28 2004/12/06 01:13:06 karl Exp $ 356160Sru 4146515Sru Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004 Free Software 5146515Sru Foundation, Inc. 656160Sru 756160Sru This program is free software; you can redistribute it and/or modify 856160Sru it under the terms of the GNU General Public License as published by 956160Sru the Free Software Foundation; either version 2, or (at your option) 1056160Sru any later version. 1156160Sru 1256160Sru This program is distributed in the hope that it will be useful, 1356160Sru but WITHOUT ANY WARRANTY; without even the implied warranty of 1456160Sru MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 1556160Sru GNU General Public License for more details. 1656160Sru 1756160Sru You should have received a copy of the GNU General Public License 1856160Sru along with this program; if not, write to the Free Software Foundation, 1956160Sru Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ 2056160Sru 2156160Sru#include "system.h" 2256160Sru#include "cmds.h" 23146515Sru#include "files.h" 2456160Sru#include "html.h" 2556160Sru#include "lang.h" 2656160Sru#include "makeinfo.h" 27146515Sru#include "node.h" 2856160Sru#include "sectioning.h" 2956160Sru 3056160Sru 31116525Sru/* Append CHAR to BUFFER, (re)allocating as necessary. We don't handle 32116525Sru null characters. */ 33116525Sru 34116525Srutypedef struct 35116525Sru{ 36116525Sru unsigned size; /* allocated */ 37116525Sru unsigned length; /* used */ 38116525Sru char *buffer; 39116525Sru} buffer_type; 40116525Sru 41116525Srustatic buffer_type * 42146515Sruinit_buffer (void) 43116525Sru{ 44116525Sru buffer_type *buf = xmalloc (sizeof (buffer_type)); 45116525Sru buf->length = 0; 46116525Sru buf->size = 0; 47116525Sru buf->buffer = NULL; 48116525Sru 49116525Sru return buf; 50116525Sru} 51116525Sru 52116525Srustatic void 53146515Sruappend_char (buffer_type *buf, int c) 54116525Sru{ 55116525Sru buf->length++; 56116525Sru if (buf->length >= buf->size) 57116525Sru { 58116525Sru buf->size += 100; 59116525Sru buf->buffer = xrealloc (buf->buffer, buf->size); 60116525Sru } 61116525Sru buf->buffer[buf->length - 1] = c; 62116525Sru buf->buffer[buf->length] = 0; 63116525Sru} 64116525Sru 65116525Sru/* Read the cascading style-sheet file FILENAME. Write out any @import 66116525Sru commands, which must come first, by the definition of css. If the 67116525Sru file contains any actual css code following the @imports, return it; 68116525Sru else return NULL. */ 69116525Srustatic char * 70146515Sruprocess_css_file (char *filename) 71116525Sru{ 72146515Sru int c; 73146515Sru int lastchar = 0; 74116525Sru FILE *f; 75116525Sru buffer_type *import_text = init_buffer (); 76116525Sru buffer_type *inline_text = init_buffer (); 77116525Sru unsigned lineno = 1; 78116525Sru enum { null_state, comment_state, import_state, inline_state } state 79116525Sru = null_state, prev_state; 80116525Sru 81146515Sru prev_state = null_state; 82146515Sru 83116525Sru /* read from stdin if `-' is the filename. */ 84116525Sru f = STREQ (filename, "-") ? stdin : fopen (filename, "r"); 85116525Sru if (!f) 86116525Sru { 87116525Sru error (_("%s: could not open --css-file: %s"), progname, filename); 88116525Sru return NULL; 89116525Sru } 90116525Sru 91116525Sru /* Read the file. The @import statements must come at the beginning, 92116525Sru with only whitespace and comments allowed before any inline css code. */ 93116525Sru while ((c = getc (f)) >= 0) 94116525Sru { 95116525Sru if (c == '\n') 96116525Sru lineno++; 97116525Sru 98116525Sru switch (state) 99116525Sru { 100116525Sru case null_state: /* between things */ 101116525Sru if (c == '@') 102146515Sru { /* Only @import and @charset should switch into 103146515Sru import_state, other @-commands, such as @media, should 104146515Sru put us into inline_state. I don't think any other css 105146515Sru @-commands start with `i' or `c', although of course 106146515Sru this will break when such a command is defined. */ 107146515Sru int nextchar = getc (f); 108146515Sru if (nextchar == 'i' || nextchar == 'c') 109146515Sru { 110146515Sru append_char (import_text, c); 111146515Sru state = import_state; 112146515Sru } 113146515Sru else 114146515Sru { 115146515Sru ungetc (nextchar, f); /* wasn't an @import */ 116146515Sru state = inline_state; 117146515Sru } 118116525Sru } 119116525Sru else if (c == '/') 120116525Sru { /* possible start of a comment */ 121116525Sru int nextchar = getc (f); 122116525Sru if (nextchar == '*') 123116525Sru state = comment_state; 124116525Sru else 125116525Sru { 126116525Sru ungetc (nextchar, f); /* wasn't a comment */ 127116525Sru state = inline_state; 128116525Sru } 129116525Sru } 130116525Sru else if (isspace (c)) 131116525Sru ; /* skip whitespace; maybe should use c_isspace? */ 132116525Sru 133116525Sru else 134116525Sru /* not an @import, not a comment, not whitespace: we must 135116525Sru have started the inline text. */ 136116525Sru state = inline_state; 137116525Sru 138116525Sru if (state == inline_state) 139116525Sru append_char (inline_text, c); 140116525Sru 141116525Sru if (state != null_state) 142116525Sru prev_state = null_state; 143116525Sru break; 144116525Sru 145116525Sru case comment_state: 146116525Sru if (c == '/' && lastchar == '*') 147116525Sru state = prev_state; /* end of comment */ 148116525Sru break; /* else ignore this comment char */ 149116525Sru 150116525Sru case import_state: 151116525Sru append_char (import_text, c); /* include this import char */ 152116525Sru if (c == ';') 153116525Sru { /* done with @import */ 154116525Sru append_char (import_text, '\n'); /* make the output nice */ 155116525Sru state = null_state; 156116525Sru prev_state = import_state; 157116525Sru } 158116525Sru break; 159116525Sru 160116525Sru case inline_state: 161116525Sru /* No harm in writing out comments, so don't bother parsing 162116525Sru them out, just append everything. */ 163116525Sru append_char (inline_text, c); 164116525Sru break; 165116525Sru } 166116525Sru 167116525Sru lastchar = c; 168116525Sru } 169116525Sru 170116525Sru /* Reached the end of the file. We should not be still in a comment. */ 171116525Sru if (state == comment_state) 172116525Sru warning (_("%s:%d: --css-file ended in comment"), filename, lineno); 173116525Sru 174116525Sru /* Write the @import text, if any. */ 175116525Sru if (import_text->buffer) 176116525Sru { 177116525Sru add_word (import_text->buffer); 178116525Sru free (import_text->buffer); 179116525Sru free (import_text); 180116525Sru } 181116525Sru 182116525Sru /* We're wasting the buffer struct memory, but so what. */ 183116525Sru return inline_text->buffer; 184116525Sru} 185146515Sru 186146515SruHSTACK *htmlstack = NULL; 187116525Sru 188146515Sru/* See html.h. */ 189146515Sruint html_output_head_p = 0; 190146515Sruint html_title_written = 0; 191116525Sru 192146515Sruvoid 193146515Sruhtml_output_head (void) 194146515Sru{ 195146515Sru static const char *html_title = NULL; 196146515Sru char *encoding; 197146515Sru 198146515Sru if (html_output_head_p) 199146515Sru return; 200146515Sru html_output_head_p = 1; 201146515Sru 202146515Sru encoding = current_document_encoding (); 203146515Sru 204146515Sru /* The <title> should not have markup, so use text_expansion. */ 205146515Sru if (!html_title) 206146515Sru html_title = escape_string (title ? 207146515Sru text_expansion (title) : (char *) _("Untitled")); 208146515Sru 209146515Sru /* Make sure this is the very first string of the output document. */ 210146515Sru output_paragraph_offset = 0; 211146515Sru 212146515Sru add_html_block_elt_args ("<html lang=\"%s\">\n<head>\n", 213146515Sru language_table[language_code].abbrev); 214146515Sru 215146515Sru /* When splitting, add current node's name to title if it's available and not 216146515Sru Top. */ 217146515Sru if (splitting && current_node && !STREQ (current_node, "Top")) 218146515Sru add_word_args ("<title>%s - %s</title>\n", 219146515Sru escape_string (xstrdup (current_node)), html_title); 220146515Sru else 221146515Sru add_word_args ("<title>%s</title>\n", html_title); 222146515Sru 223146515Sru add_word ("<meta http-equiv=\"Content-Type\" content=\"text/html"); 224146515Sru if (encoding && *encoding) 225146515Sru add_word_args ("; charset=%s", encoding); 226146515Sru 227146515Sru add_word ("\">\n"); 228146515Sru 229146515Sru if (!document_description) 230146515Sru document_description = html_title; 231146515Sru 232146515Sru add_word_args ("<meta name=\"description\" content=\"%s\">\n", 233146515Sru document_description); 234146515Sru add_word_args ("<meta name=\"generator\" content=\"makeinfo %s\">\n", 235146515Sru VERSION); 236146515Sru 237146515Sru /* Navigation bar links. */ 238146515Sru if (!splitting) 239146515Sru add_word ("<link title=\"Top\" rel=\"top\" href=\"#Top\">\n"); 240146515Sru else if (tag_table) 241146515Sru { 242146515Sru /* Always put a top link. */ 243146515Sru add_word ("<link title=\"Top\" rel=\"start\" href=\"index.html#Top\">\n"); 244146515Sru 245146515Sru /* We already have a top link, avoid duplication. */ 246146515Sru if (tag_table->up && !STREQ (tag_table->up, "Top")) 247146515Sru add_link (tag_table->up, "rel=\"up\""); 248146515Sru 249146515Sru if (tag_table->prev) 250146515Sru add_link (tag_table->prev, "rel=\"prev\""); 251146515Sru 252146515Sru if (tag_table->next) 253146515Sru add_link (tag_table->next, "rel=\"next\""); 254146515Sru 255146515Sru /* fixxme: Look for a way to put links to various indices in the 256146515Sru document. Also possible candidates to be added here are First and 257146515Sru Last links. */ 258146515Sru } 259146515Sru else 260146515Sru { 261146515Sru /* We are splitting, but we neither have a tag_table. So this must be 262146515Sru index.html. So put a link to Top. */ 263146515Sru add_word ("<link title=\"Top\" rel=\"start\" href=\"#Top\">\n"); 264146515Sru } 265146515Sru 266146515Sru add_word ("<link href=\"http://www.gnu.org/software/texinfo/\" \ 267146515Srurel=\"generator-home\" title=\"Texinfo Homepage\">\n"); 268146515Sru 269146515Sru if (copying_text) 270146515Sru { /* It is not ideal that we include the html markup here within 271146515Sru <head>, so we use text_expansion. */ 272146515Sru insert_string ("<!--\n"); 273146515Sru insert_string (text_expansion (copying_text)); 274146515Sru insert_string ("-->\n"); 275146515Sru } 276146515Sru 277146515Sru /* Put the style definitions in a comment for the sake of browsers 278146515Sru that don't support <style>. */ 279146515Sru add_word ("<meta http-equiv=\"Content-Style-Type\" content=\"text/css\">\n"); 280146515Sru add_word ("<style type=\"text/css\"><!--\n"); 281146515Sru 282146515Sru { 283146515Sru char *css_inline = NULL; 284146515Sru 285146515Sru if (css_include) 286146515Sru /* This writes out any @import commands from the --css-file, 287146515Sru and returns any actual css code following the imports. */ 288146515Sru css_inline = process_css_file (css_include); 289146515Sru 290146515Sru /* This seems cleaner than adding <br>'s at the end of each line for 291146515Sru these "roman" displays. It's hardly the end of the world if the 292146515Sru browser doesn't do <style>s, in any case; they'll just come out in 293146515Sru typewriter. */ 294146515Sru#define CSS_FONT_INHERIT "font-family:inherit" 295146515Sru add_word_args (" pre.display { %s }\n", CSS_FONT_INHERIT); 296146515Sru add_word_args (" pre.format { %s }\n", CSS_FONT_INHERIT); 297146515Sru 298146515Sru /* Alternatively, we could do <font size=-1> in insertion.c, but this 299146515Sru way makes it easier to override. */ 300146515Sru#define CSS_FONT_SMALLER "font-size:smaller" 301146515Sru add_word_args (" pre.smalldisplay { %s; %s }\n", CSS_FONT_INHERIT, 302146515Sru CSS_FONT_SMALLER); 303146515Sru add_word_args (" pre.smallformat { %s; %s }\n", CSS_FONT_INHERIT, 304146515Sru CSS_FONT_SMALLER); 305146515Sru add_word_args (" pre.smallexample { %s }\n", CSS_FONT_SMALLER); 306146515Sru add_word_args (" pre.smalllisp { %s }\n", CSS_FONT_SMALLER); 307146515Sru 308146515Sru /* Since HTML doesn't have a sc element, we use span with a bit of 309146515Sru CSS spice instead. */ 310146515Sru#define CSS_FONT_SMALL_CAPS "font-variant:small-caps" 311146515Sru add_word_args (" span.sc { %s }\n", CSS_FONT_SMALL_CAPS); 312146515Sru 313146515Sru /* Roman (default) font class, closest we can come. */ 314146515Sru#define CSS_FONT_ROMAN "font-family:serif; font-weight:normal;" 315146515Sru add_word_args (" span.roman { %s } \n", CSS_FONT_ROMAN); 316146515Sru 317146515Sru /* Sans serif font class. */ 318146515Sru#define CSS_FONT_SANSSERIF "font-family:sans-serif; font-weight:normal;" 319146515Sru add_word_args (" span.sansserif { %s } \n", CSS_FONT_SANSSERIF); 320146515Sru 321146515Sru /* Write out any css code from the user's --css-file. */ 322146515Sru if (css_inline) 323146515Sru insert_string (css_inline); 324146515Sru 325146515Sru add_word ("--></style>\n"); 326146515Sru } 327146515Sru 328146515Sru add_word ("</head>\n<body>\n"); 329146515Sru 330146515Sru if (title && !html_title_written && titlepage_cmd_present) 331146515Sru { 332146515Sru add_word_args ("<h1 class=\"settitle\">%s</h1>\n", html_title); 333146515Sru html_title_written = 1; 334146515Sru } 335146515Sru 336146515Sru free (encoding); 337146515Sru} 338116525Sru 33956160Sru/* Escape HTML special characters in the string if necessary, 34056160Sru returning a pointer to a possibly newly-allocated one. */ 34156160Sruchar * 342146515Sruescape_string (char *string) 34356160Sru{ 344146515Sru char *newstring; 345146515Sru int i = 0, newlen = 0; 34656160Sru 34756160Sru do 34856160Sru { 34956160Sru /* Find how much to allocate. */ 35056160Sru switch (string[i]) 35156160Sru { 352146515Sru case '"': 353146515Sru newlen += 6; /* `"' */ 354146515Sru break; 35556160Sru case '&': 35656160Sru newlen += 5; /* `&' */ 35756160Sru break; 35856160Sru case '<': 35956160Sru case '>': 36056160Sru newlen += 4; /* `<', `>' */ 36156160Sru break; 36256160Sru default: 36356160Sru newlen++; 36456160Sru } 36556160Sru } 36693139Sru while (string[i++]); 36756160Sru 36856160Sru if (newlen == i) return string; /* Already OK. */ 36956160Sru 37093139Sru newstring = xmalloc (newlen); 37156160Sru i = 0; 37256160Sru do 37356160Sru { 37456160Sru switch (string[i]) 37556160Sru { 376146515Sru case '"': 377146515Sru strcpy (newstring, """); 378146515Sru newstring += 6; 379146515Sru break; 38056160Sru case '&': 38156160Sru strcpy (newstring, "&"); 38256160Sru newstring += 5; 38356160Sru break; 38456160Sru case '<': 38556160Sru strcpy (newstring, "<"); 38656160Sru newstring += 4; 38756160Sru break; 38856160Sru case '>': 38956160Sru strcpy (newstring, ">"); 39056160Sru newstring += 4; 39156160Sru break; 39256160Sru default: 39356160Sru newstring[0] = string[i]; 39456160Sru newstring++; 39556160Sru } 39656160Sru } 39756160Sru while (string[i++]); 39856160Sru free (string); 39993139Sru return newstring - newlen; 40056160Sru} 401114472Sru 402114472Sru/* Save current tag. */ 403146515Srustatic void 404146515Srupush_tag (char *tag, char *attribs) 405114472Sru{ 406114472Sru HSTACK *newstack = xmalloc (sizeof (HSTACK)); 40756160Sru 408114472Sru newstack->tag = tag; 409146515Sru newstack->attribs = xstrdup (attribs); 410114472Sru newstack->next = htmlstack; 411114472Sru htmlstack = newstack; 412114472Sru} 413114472Sru 414114472Sru/* Get last tag. */ 415146515Srustatic void 416146515Srupop_tag (void) 417114472Sru{ 418114472Sru HSTACK *tos = htmlstack; 419114472Sru 420114472Sru if (!tos) 421114472Sru { 422114472Sru line_error (_("[unexpected] no html tag to pop")); 423114472Sru return; 424114472Sru } 425114472Sru 426146515Sru free (htmlstack->attribs); 427146515Sru 428114472Sru htmlstack = htmlstack->next; 429114472Sru free (tos); 430114472Sru} 431114472Sru 432146515Sru/* Check if tag is an empty or a whitespace only element. 433146515Sru If so, remove it, keeping whitespace intact. */ 434146515Sruint 435146515Srurollback_empty_tag (char *tag) 436146515Sru{ 437146515Sru int check_position = output_paragraph_offset; 438146515Sru int taglen = strlen (tag); 439146515Sru int rollback_happened = 0; 440146515Sru char *contents = ""; 441146515Sru char *contents_canon_white = ""; 442146515Sru 443146515Sru /* If output_paragraph is empty, we cannot rollback :-\ */ 444146515Sru if (output_paragraph_offset <= 0) 445146515Sru return 0; 446146515Sru 447146515Sru /* Find the end of the previous tag. */ 448146515Sru while (output_paragraph[check_position-1] != '>' && check_position > 0) 449146515Sru check_position--; 450146515Sru 451146515Sru /* Save stuff between tag's end to output_paragraph's end. */ 452146515Sru if (check_position != output_paragraph_offset) 453146515Sru { 454146515Sru contents = xmalloc (output_paragraph_offset - check_position + 1); 455146515Sru memcpy (contents, output_paragraph + check_position, 456146515Sru output_paragraph_offset - check_position); 457146515Sru 458146515Sru contents[output_paragraph_offset - check_position] = '\0'; 459146515Sru 460146515Sru contents_canon_white = xstrdup (contents); 461146515Sru canon_white (contents_canon_white); 462146515Sru } 463146515Sru 464146515Sru /* Find the start of the previous tag. */ 465146515Sru while (output_paragraph[check_position-1] != '<' && check_position > 0) 466146515Sru check_position--; 467146515Sru 468146515Sru /* Check to see if this is the tag. */ 469146515Sru if (strncmp ((char *) output_paragraph + check_position, tag, taglen) == 0 470146515Sru && (whitespace (output_paragraph[check_position + taglen]) 471146515Sru || output_paragraph[check_position + taglen] == '>')) 472146515Sru { 473146515Sru if (!contents_canon_white || !*contents_canon_white) 474146515Sru { 475146515Sru /* Empty content after whitespace removal, so roll it back. */ 476146515Sru output_paragraph_offset = check_position - 1; 477146515Sru rollback_happened = 1; 478146515Sru 479146515Sru /* Original contents may not be empty (whitespace.) */ 480146515Sru if (contents && *contents) 481146515Sru { 482146515Sru insert_string (contents); 483146515Sru free (contents); 484146515Sru } 485146515Sru } 486146515Sru } 487146515Sru 488146515Sru return rollback_happened; 489146515Sru} 490146515Sru 49156160Sru/* Open or close TAG according to START_OR_END. */ 49256160Sruvoid 493146515Sru#if defined (VA_FPRINTF) && __STDC__ 494146515Sruinsert_html_tag_with_attribute (int start_or_end, char *tag, char *format, ...) 495146515Sru#else 496146515Sruinsert_html_tag_with_attribute (start_or_end, tag, format, va_alist) 49756160Sru int start_or_end; 49856160Sru char *tag; 499146515Sru char *format; 500146515Sru va_dcl 501146515Sru#endif 50256160Sru{ 503114472Sru char *old_tag = NULL; 504146515Sru char *old_attribs = NULL; 505146515Sru char formatted_attribs[2000]; /* xx no fixed limits */ 506114472Sru int do_return = 0; 507146515Sru extern int in_html_elt; 508114472Sru 50956160Sru if (start_or_end != START) 510116525Sru pop_tag (); 511114472Sru 512114472Sru if (htmlstack) 513146515Sru { 514146515Sru old_tag = htmlstack->tag; 515146515Sru old_attribs = htmlstack->attribs; 516146515Sru } 517146515Sru 518146515Sru if (format) 519146515Sru { 520146515Sru#ifdef VA_SPRINTF 521146515Sru va_list ap; 522146515Sru#endif 523114472Sru 524146515Sru VA_START (ap, format); 525146515Sru#ifdef VA_SPRINTF 526146515Sru VA_SPRINTF (formatted_attribs, format, ap); 527146515Sru#else 528146515Sru sprintf (formatted_attribs, format, a1, a2, a3, a4, a5, a6, a7, a8); 529146515Sru#endif 530146515Sru va_end (ap); 531146515Sru } 532146515Sru else 533146515Sru formatted_attribs[0] = '\0'; 534146515Sru 535146515Sru /* Exception: can nest multiple spans. */ 536114472Sru if (htmlstack 537146515Sru && STREQ (htmlstack->tag, tag) 538146515Sru && !(STREQ (tag, "span") && STREQ (old_attribs, formatted_attribs))) 539114472Sru do_return = 1; 540114472Sru 541114472Sru if (start_or_end == START) 542146515Sru push_tag (tag, formatted_attribs); 543114472Sru 544114472Sru if (do_return) 545114472Sru return; 546114472Sru 547146515Sru in_html_elt++; 548146515Sru 549114472Sru /* texinfo.tex doesn't support more than one font attribute 550114472Sru at the same time. */ 551146515Sru if ((start_or_end == START) && old_tag && *old_tag 552146515Sru && !rollback_empty_tag (old_tag)) 553146515Sru add_word_args ("</%s>", old_tag); 554114472Sru 555114472Sru if (*tag) 556114472Sru { 557146515Sru if (start_or_end == START) 558146515Sru add_word_args (format ? "<%s %s>" : "<%s>", tag, formatted_attribs); 559146515Sru else if (!rollback_empty_tag (tag)) 560146515Sru /* Insert close tag only if we didn't rollback, 561146515Sru in which case the opening tag is removed. */ 562146515Sru add_word_args ("</%s>", tag); 563114472Sru } 564114472Sru 565114472Sru if ((start_or_end != START) && old_tag && *old_tag) 566146515Sru add_word_args (strlen (old_attribs) > 0 ? "<%s %s>" : "<%s>", 567146515Sru old_tag, old_attribs); 568146515Sru 569146515Sru in_html_elt--; 57056160Sru} 57156160Sru 572146515Sruvoid 573146515Sruinsert_html_tag (int start_or_end, char *tag) 574146515Sru{ 575146515Sru insert_html_tag_with_attribute (start_or_end, tag, NULL); 576146515Sru} 577116525Sru 57856160Sru/* Output an HTML <link> to the filename for NODE, including the 57956160Sru other string as extra attributes. */ 58056160Sruvoid 581146515Sruadd_link (char *nodename, char *attributes) 58256160Sru{ 58393139Sru if (nodename) 58456160Sru { 58593139Sru add_html_elt ("<link "); 58693139Sru add_word_args ("%s", attributes); 58793139Sru add_word_args (" href=\""); 58893139Sru add_anchor_name (nodename, 1); 589146515Sru add_word_args ("\" title=\"%s\">\n", nodename); 59056160Sru } 59156160Sru} 59256160Sru 59356160Sru/* Output NAME with characters escaped as appropriate for an anchor 594146515Sru name, i.e., escape URL special characters with our _00hh convention 595146515Sru if OLD is zero. (See the manual for details on the new scheme.) 596146515Sru 597146515Sru If OLD is nonzero, generate the node name with the 4.6-and-earlier 598146515Sru convention of %hh (and more special characters output as-is, notably 599146515Sru - and *). This is only so that external references to old names can 600146515Sru still work with HTML generated by the new makeinfo; the gcc folks 601146515Sru needed this. Our own HTML does not refer to these names. */ 602146515Sru 60356160Sruvoid 604146515Sruadd_escaped_anchor_name (char *name, int old) 60556160Sru{ 606146515Sru canon_white (name); 607146515Sru 608146515Sru if (!old && !strchr ("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", 609146515Sru *name)) 610146515Sru { /* XHTML does not allow anything but an ASCII letter to start an 611146515Sru identifier. Therefore kludge in this constant string if we 612146515Sru have a nonletter. */ 613146515Sru add_word ("g_t"); 614146515Sru } 615146515Sru 61656160Sru for (; *name; name++) 61756160Sru { 618146515Sru if (cr_or_whitespace (*name)) 619146515Sru add_char ('-'); 620146515Sru 621146515Sru else if (!old && !URL_SAFE_CHAR (*name)) 62256160Sru /* Cast so characters with the high bit set are treated as >128, 62356160Sru for example o-umlaut should be 246, not -10. */ 624146515Sru add_word_args ("_00%x", (unsigned char) *name); 625146515Sru 626146515Sru else if (old && !URL_SAFE_CHAR (*name) && !OLD_URL_SAFE_CHAR (*name)) 627146515Sru /* Different output convention, but still cast as above. */ 62856160Sru add_word_args ("%%%x", (unsigned char) *name); 629146515Sru 63056160Sru else 63156160Sru add_char (*name); 63256160Sru } 63356160Sru} 63456160Sru 63556160Sru/* Insert the text for the name of a reference in an HTML anchor 636146515Sru appropriate for NODENAME. 637146515Sru 638146515Sru If HREF is zero, generate text for name= in the new node name 639146515Sru conversion convention. 640146515Sru If HREF is negative, generate text for name= in the old convention. 641146515Sru If HREF is positive, generate the name for an href= attribute, i.e., 642146515Sru including the `#' if it's an internal reference. */ 64356160Sruvoid 644146515Sruadd_anchor_name (char *nodename, int href) 64556160Sru{ 646146515Sru if (href > 0) 64793139Sru { 64893139Sru if (splitting) 64993139Sru add_url_name (nodename, href); 65093139Sru add_char ('#'); 65193139Sru } 65293139Sru /* Always add NODENAME, so that the reference would pinpoint the 65393139Sru exact node on its file. This is so several nodes could share the 65493139Sru same file, in case of file-name clashes, but also for more 65593139Sru accurate browser positioning. */ 65693139Sru if (strcasecmp (nodename, "(dir)") == 0) 65793139Sru /* Strip the parens, but keep the original letter-case. */ 65893139Sru add_word_args ("%.3s", nodename + 1); 659146515Sru else if (strcasecmp (nodename, "top") == 0) 660146515Sru add_word ("Top"); 66193139Sru else 662146515Sru add_escaped_anchor_name (nodename, href < 0); 66393139Sru} 66456160Sru 66593139Sru/* Insert the text for the name of a reference in an HTML url, aprropriate 66693139Sru for NODENAME */ 66793139Sruvoid 668146515Sruadd_url_name (char *nodename, int href) 66993139Sru{ 67093139Sru add_nodename_to_filename (nodename, href); 67156160Sru} 67293139Sru 673146515Sru/* Convert non [A-Za-z0-9] to _00xx, where xx means the hexadecimal 674146515Sru representation of the ASCII character. Also convert spaces and 675146515Sru newlines to dashes. */ 676146515Srustatic void 677146515Srufix_filename (char *filename) 678146515Sru{ 679146515Sru int i; 680146515Sru int len = strlen (filename); 681146515Sru char *oldname = xstrdup (filename); 68293139Sru 683146515Sru *filename = '\0'; 68493139Sru 685146515Sru for (i = 0; i < len; i++) 68693139Sru { 687146515Sru if (cr_or_whitespace (oldname[i])) 688146515Sru strcat (filename, "-"); 689146515Sru else if (URL_SAFE_CHAR (oldname[i])) 690146515Sru strncat (filename, (char *) oldname + i, 1); 691146515Sru else 692146515Sru { 693146515Sru char *hexchar = xmalloc (6 * sizeof (char)); 694146515Sru sprintf (hexchar, "_00%x", (unsigned char) oldname[i]); 695146515Sru strcat (filename, hexchar); 696146515Sru free (hexchar); 697146515Sru } 698146515Sru 699146515Sru /* Check if we are nearing boundaries. */ 700146515Sru if (strlen (filename) >= PATH_MAX - 20) 701146515Sru break; 70293139Sru } 703146515Sru 704146515Sru free (oldname); 70593139Sru} 70693139Sru 70793139Sru/* As we can't look-up a (forward-referenced) nodes' html filename 70893139Sru from the tentry, we take the easy way out. We assume that 70993139Sru nodenames are unique, and generate the html filename from the 71093139Sru nodename, that's always known. */ 71193139Srustatic char * 712146515Srunodename_to_filename_1 (char *nodename, int href) 71393139Sru{ 71493139Sru char *p; 71593139Sru char *filename; 71693139Sru char dirname[PATH_MAX]; 71793139Sru 71893139Sru if (strcasecmp (nodename, "Top") == 0) 71993139Sru { 72093139Sru /* We want to convert references to the Top node into 72193139Sru "index.html#Top". */ 72293139Sru if (href) 72393139Sru filename = xstrdup ("index.html"); /* "#Top" is added by our callers */ 72493139Sru else 72593139Sru filename = xstrdup ("Top"); 72693139Sru } 72793139Sru else if (strcasecmp (nodename, "(dir)") == 0) 72893139Sru /* We want to convert references to the (dir) node into 72993139Sru "../index.html". */ 73093139Sru filename = xstrdup ("../index.html"); 73193139Sru else 73293139Sru { 73393139Sru filename = xmalloc (PATH_MAX); 73493139Sru dirname[0] = '\0'; 73593139Sru *filename = '\0'; 73693139Sru 73793139Sru /* Check for external reference: ``(info-document)node-name'' 73893139Sru Assume this node lives at: ``../info-document/node-name.html'' 73993139Sru 74093139Sru We need to handle the special case (sigh): ``(info-document)'', 74193139Sru ie, an external top-node, which should translate to: 74293139Sru ``../info-document/info-document.html'' */ 74393139Sru 74493139Sru p = nodename; 74593139Sru if (*nodename == '(') 74693139Sru { 74793139Sru int length; 74893139Sru 74993139Sru p = strchr (nodename, ')'); 75093139Sru if (p == NULL) 75193139Sru { 752114472Sru line_error (_("[unexpected] invalid node name: `%s'"), nodename); 753114472Sru xexit (1); 75493139Sru } 75593139Sru 75693139Sru length = p - nodename - 1; 75793139Sru if (length > 5 && 75893139Sru FILENAME_CMPN (p - 5, ".info", 5) == 0) 75993139Sru length -= 5; 76093139Sru /* This is for DOS, and also for Windows and GNU/Linux 76193139Sru systems that might have Info files copied from a DOS 8+3 76293139Sru filesystem. */ 76393139Sru if (length > 4 && 76493139Sru FILENAME_CMPN (p - 4, ".inf", 4) == 0) 76593139Sru length -= 4; 76693139Sru strcpy (filename, "../"); 76793139Sru strncpy (dirname, nodename + 1, length); 76893139Sru *(dirname + length) = '\0'; 76993139Sru fix_filename (dirname); 77093139Sru strcat (filename, dirname); 77193139Sru strcat (filename, "/"); 77293139Sru p++; 77393139Sru } 77493139Sru 77593139Sru /* In the case of just (info-document), there will be nothing 77693139Sru remaining, and we will refer to ../info-document/, which will 77793139Sru work fine. */ 77893139Sru strcat (filename, p); 77993139Sru if (*p) 78093139Sru { 78193139Sru /* Hmm */ 78293139Sru fix_filename (filename + strlen (filename) - strlen (p)); 78393139Sru strcat (filename, ".html"); 78493139Sru } 78593139Sru } 78693139Sru 78793139Sru /* Produce a file name suitable for the underlying filesystem. */ 78893139Sru normalize_filename (filename); 78993139Sru 79093139Sru#if 0 79193139Sru /* We add ``#Nodified-filename'' anchor to external references to be 79293139Sru prepared for non-split HTML support. Maybe drop this. */ 79393139Sru if (href && *dirname) 79493139Sru { 79593139Sru strcat (filename, "#"); 79693139Sru strcat (filename, p); 79793139Sru /* Hmm, again */ 79893139Sru fix_filename (filename + strlen (filename) - strlen (p)); 79993139Sru } 80093139Sru#endif 80193139Sru 80293139Sru return filename; 80393139Sru} 80493139Sru 80593139Sru/* If necessary, ie, if current filename != filename of node, output 80693139Sru the node name. */ 80793139Sruvoid 808146515Sruadd_nodename_to_filename (char *nodename, int href) 80993139Sru{ 81093139Sru /* for now, don't check: always output filename */ 81193139Sru char *filename = nodename_to_filename_1 (nodename, href); 81293139Sru add_word (filename); 81393139Sru free (filename); 81493139Sru} 81593139Sru 81693139Sruchar * 817146515Srunodename_to_filename (char *nodename) 81893139Sru{ 81993139Sru /* The callers of nodename_to_filename use the result to produce 82093139Sru <a href=, so call nodename_to_filename_1 with last arg non-zero. */ 82193139Sru return nodename_to_filename_1 (nodename, 1); 82293139Sru} 823