html.c revision 116525
1/* html.c -- html-related utilities. 2 $Id: html.c,v 1.18 2003/06/02 12:32:29 karl Exp $ 3 4 Copyright (C) 1999, 2000, 2001, 2002, 2003 Free Software Foundation, Inc. 5 6 This program is free software; you can redistribute it and/or modify 7 it under the terms of the GNU General Public License as published by 8 the Free Software Foundation; either version 2, or (at your option) 9 any later version. 10 11 This program is distributed in the hope that it will be useful, 12 but WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 GNU General Public License for more details. 15 16 You should have received a copy of the GNU General Public License 17 along with this program; if not, write to the Free Software Foundation, 18 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ 19 20#include "system.h" 21#include "cmds.h" 22#include "html.h" 23#include "lang.h" 24#include "makeinfo.h" 25#include "sectioning.h" 26 27HSTACK *htmlstack = NULL; 28 29static char *process_css_file (/* char * */); 30 31/* See html.h. */ 32int html_output_head_p = 0; 33int html_title_written = 0; 34 35 36void 37html_output_head () 38{ 39 static const char *html_title = NULL; 40 41 if (html_output_head_p) 42 return; 43 html_output_head_p = 1; 44 45 /* The <title> should not have markup, so use text_expansion. */ 46 if (!html_title) 47 html_title = title ? text_expansion (title) : _("Untitled"); 48 49 add_word_args ("<html lang=\"%s\">\n<head>\n<title>%s</title>\n", 50 language_table[language_code].abbrev, html_title); 51 52 add_word ("<meta http-equiv=\"Content-Type\" content=\"text/html"); 53 if (document_encoding_code != no_encoding) 54 add_word_args ("; charset=%s", 55 encoding_table[document_encoding_code].encname); 56 add_word ("\">\n"); 57 58 if (!document_description) 59 document_description = html_title; 60 61 add_word_args ("<meta name=\"description\" content=\"%s\">\n", 62 document_description); 63 add_word_args ("<meta name=\"generator\" content=\"makeinfo %s\">\n", 64 VERSION); 65#if 0 66 /* let's not do this now, since it causes mozilla to put up a 67 navigation bar. */ 68 add_word ("<link href=\"http://www.gnu.org/software/texinfo/\" \ 69rel=\"generator-home\">\n"); 70#endif 71 72 if (copying_text) 73 { /* copying_text has already been fully expanded in 74 begin_insertion (by full_expansion), so use insert_ rather than 75 add_. It is not ideal that we include the html markup here within 76 <head>, but the alternative is to have yet more and different 77 expansions of the copying text. Yuck. */ 78 insert_string ("<!--\n"); 79 insert_string (copying_text); 80 insert_string ("-->\n"); 81 } 82 83 /* Put the style definitions in a comment for the sake of browsers 84 that don't support <style>. */ 85 add_word ("<meta http-equiv=\"Content-Style-Type\" content=\"text/css\">\n"); 86 add_word ("<style type=\"text/css\"><!--\n"); 87 88 { 89 char *css_inline = NULL; 90 91 if (css_include) 92 /* This writes out any @import commands from the --css-file, 93 and returns any actual css code following the imports. */ 94 css_inline = process_css_file (css_include); 95 96 /* This seems cleaner than adding <br>'s at the end of each line for 97 these "roman" displays. It's hardly the end of the world if the 98 browser doesn't do <style>s, in any case; they'll just come out in 99 typewriter. */ 100#define CSS_FONT_INHERIT "font-family:inherit" 101 add_word_args (" pre.display { %s }\n", CSS_FONT_INHERIT); 102 add_word_args (" pre.format { %s }\n", CSS_FONT_INHERIT); 103 104 /* Alternatively, we could do <font size=-1> in insertion.c, but this 105 way makes it easier to override. */ 106#define CSS_FONT_SMALLER "font-size:smaller" 107 add_word_args (" pre.smalldisplay { %s; %s }\n", CSS_FONT_INHERIT, 108 CSS_FONT_SMALLER); 109 add_word_args (" pre.smallformat { %s; %s }\n", CSS_FONT_INHERIT, 110 CSS_FONT_SMALLER); 111 add_word_args (" pre.smallexample { %s }\n", CSS_FONT_SMALLER); 112 add_word_args (" pre.smalllisp { %s }\n", CSS_FONT_SMALLER); 113 114 /* Write out any css code from the user's --css-file. */ 115 if (css_inline) 116 add_word (css_inline); 117 118 add_word ("--></style>\n"); 119 } 120 121 add_word ("</head>\n<body>\n"); 122 123 if (title && !html_title_written && titlepage_cmd_present) 124 { 125 add_word_args ("<h1 class=\"settitle\">%s</h1>\n", html_title); 126 html_title_written = 1; 127 } 128} 129 130 131 132/* Append CHAR to BUFFER, (re)allocating as necessary. We don't handle 133 null characters. */ 134 135typedef struct 136{ 137 unsigned size; /* allocated */ 138 unsigned length; /* used */ 139 char *buffer; 140} buffer_type; 141 142 143static buffer_type * 144init_buffer () 145{ 146 buffer_type *buf = xmalloc (sizeof (buffer_type)); 147 buf->length = 0; 148 buf->size = 0; 149 buf->buffer = NULL; 150 151 return buf; 152} 153 154 155static void 156append_char (buf, c) 157 buffer_type *buf; 158 int c; 159{ 160 buf->length++; 161 if (buf->length >= buf->size) 162 { 163 buf->size += 100; 164 buf->buffer = xrealloc (buf->buffer, buf->size); 165 } 166 buf->buffer[buf->length - 1] = c; 167 buf->buffer[buf->length] = 0; 168} 169 170 171/* Read the cascading style-sheet file FILENAME. Write out any @import 172 commands, which must come first, by the definition of css. If the 173 file contains any actual css code following the @imports, return it; 174 else return NULL. */ 175 176static char * 177process_css_file (filename) 178 char *filename; 179{ 180 int c, lastchar; 181 FILE *f; 182 buffer_type *import_text = init_buffer (); 183 buffer_type *inline_text = init_buffer (); 184 unsigned lineno = 1; 185 enum { null_state, comment_state, import_state, inline_state } state 186 = null_state, prev_state; 187 188 /* read from stdin if `-' is the filename. */ 189 f = STREQ (filename, "-") ? stdin : fopen (filename, "r"); 190 if (!f) 191 { 192 error (_("%s: could not open --css-file: %s"), progname, filename); 193 return NULL; 194 } 195 196 /* Read the file. The @import statements must come at the beginning, 197 with only whitespace and comments allowed before any inline css code. */ 198 while ((c = getc (f)) >= 0) 199 { 200 if (c == '\n') 201 lineno++; 202 203 switch (state) 204 { 205 case null_state: /* between things */ 206 if (c == '@') 207 { 208 /* If there's some other @command, just call it an 209 import, it's all the same to us. So don't bother 210 looking for the `import'. */ 211 append_char (import_text, c); 212 state = import_state; 213 } 214 else if (c == '/') 215 { /* possible start of a comment */ 216 int nextchar = getc (f); 217 if (nextchar == '*') 218 state = comment_state; 219 else 220 { 221 ungetc (nextchar, f); /* wasn't a comment */ 222 state = inline_state; 223 } 224 } 225 else if (isspace (c)) 226 ; /* skip whitespace; maybe should use c_isspace? */ 227 228 else 229 /* not an @import, not a comment, not whitespace: we must 230 have started the inline text. */ 231 state = inline_state; 232 233 if (state == inline_state) 234 append_char (inline_text, c); 235 236 if (state != null_state) 237 prev_state = null_state; 238 break; 239 240 case comment_state: 241 if (c == '/' && lastchar == '*') 242 state = prev_state; /* end of comment */ 243 break; /* else ignore this comment char */ 244 245 case import_state: 246 append_char (import_text, c); /* include this import char */ 247 if (c == ';') 248 { /* done with @import */ 249 append_char (import_text, '\n'); /* make the output nice */ 250 state = null_state; 251 prev_state = import_state; 252 } 253 break; 254 255 case inline_state: 256 /* No harm in writing out comments, so don't bother parsing 257 them out, just append everything. */ 258 append_char (inline_text, c); 259 break; 260 } 261 262 lastchar = c; 263 } 264 265 /* Reached the end of the file. We should not be still in a comment. */ 266 if (state == comment_state) 267 warning (_("%s:%d: --css-file ended in comment"), filename, lineno); 268 269 /* Write the @import text, if any. */ 270 if (import_text->buffer) 271 { 272 add_word (import_text->buffer); 273 free (import_text->buffer); 274 free (import_text); 275 } 276 277 /* We're wasting the buffer struct memory, but so what. */ 278 return inline_text->buffer; 279} 280 281 282 283/* Escape HTML special characters in the string if necessary, 284 returning a pointer to a possibly newly-allocated one. */ 285char * 286escape_string (string) 287 char * string; 288{ 289 int i=0, newlen=0; 290 char * newstring; 291 292 do 293 { 294 /* Find how much to allocate. */ 295 switch (string[i]) 296 { 297 case '&': 298 newlen += 5; /* `&' */ 299 break; 300 case '<': 301 case '>': 302 newlen += 4; /* `<', `>' */ 303 break; 304 default: 305 newlen++; 306 } 307 } 308 while (string[i++]); 309 310 if (newlen == i) return string; /* Already OK. */ 311 312 newstring = xmalloc (newlen); 313 i = 0; 314 do 315 { 316 switch (string[i]) 317 { 318 case '&': 319 strcpy (newstring, "&"); 320 newstring += 5; 321 break; 322 case '<': 323 strcpy (newstring, "<"); 324 newstring += 4; 325 break; 326 case '>': 327 strcpy (newstring, ">"); 328 newstring += 4; 329 break; 330 default: 331 newstring[0] = string[i]; 332 newstring++; 333 } 334 } 335 while (string[i++]); 336 free (string); 337 return newstring - newlen; 338} 339 340 341 342/* Save current tag. */ 343void 344push_tag (tag) 345 char *tag; 346{ 347 HSTACK *newstack = xmalloc (sizeof (HSTACK)); 348 349 newstack->tag = tag; 350 newstack->next = htmlstack; 351 htmlstack = newstack; 352} 353 354/* Get last tag. */ 355void 356pop_tag () 357{ 358 HSTACK *tos = htmlstack; 359 360 if (!tos) 361 { 362 line_error (_("[unexpected] no html tag to pop")); 363 return; 364 } 365 366 htmlstack = htmlstack->next; 367 free (tos); 368} 369 370/* Open or close TAG according to START_OR_END. */ 371void 372insert_html_tag (start_or_end, tag) 373 int start_or_end; 374 char *tag; 375{ 376 char *old_tag = NULL; 377 int do_return = 0; 378 379 if (!paragraph_is_open && (start_or_end == START)) 380 { 381 /* Need to compensate for the <p> we are about to insert, or 382 else cm_xxx functions that call us will get wrong text 383 between START and END. */ 384 adjust_braces_following (output_paragraph_offset, 3); 385 add_word ("<p>"); 386 } 387 388 if (start_or_end != START) 389 pop_tag (); 390 391 if (htmlstack) 392 old_tag = htmlstack->tag; 393 394 if (htmlstack 395 && (strcmp (htmlstack->tag, tag) == 0)) 396 do_return = 1; 397 398 if (start_or_end == START) 399 push_tag (tag); 400 401 if (do_return) 402 return; 403 404 /* texinfo.tex doesn't support more than one font attribute 405 at the same time. */ 406 if ((start_or_end == START) && old_tag && *old_tag) 407 { 408 add_word ("</"); 409 add_word (old_tag); 410 add_char ('>'); 411 } 412 413 if (*tag) 414 { 415 add_char ('<'); 416 if (start_or_end != START) 417 add_char ('/'); 418 add_word (tag); 419 add_char ('>'); 420 } 421 422 if ((start_or_end != START) && old_tag && *old_tag) 423 { 424 add_char ('<'); 425 add_word (old_tag); 426 add_char ('>'); 427 } 428} 429 430 431 432/* Output an HTML <link> to the filename for NODE, including the 433 other string as extra attributes. */ 434void 435add_link (nodename, attributes) 436 char *nodename, *attributes; 437{ 438 if (nodename) 439 { 440 add_html_elt ("<link "); 441 add_word_args ("%s", attributes); 442 add_word_args (" href=\""); 443 add_anchor_name (nodename, 1); 444 add_word ("\">\n"); 445 } 446} 447 448/* Output NAME with characters escaped as appropriate for an anchor 449 name, i.e., escape URL special characters as %<n>. */ 450void 451add_escaped_anchor_name (name) 452 char *name; 453{ 454 for (; *name; name++) 455 { 456 if (*name == '&') 457 add_word ("&"); 458 else if (! URL_SAFE_CHAR (*name)) 459 /* Cast so characters with the high bit set are treated as >128, 460 for example o-umlaut should be 246, not -10. */ 461 add_word_args ("%%%x", (unsigned char) *name); 462 else 463 add_char (*name); 464 } 465} 466 467/* Insert the text for the name of a reference in an HTML anchor 468 appropriate for NODENAME. If HREF is nonzero, it will be 469 appropriate for a href= attribute, rather than name= i.e., including 470 the `#' if it's an internal reference. */ 471void 472add_anchor_name (nodename, href) 473 char *nodename; 474 int href; 475{ 476 if (href) 477 { 478 if (splitting) 479 add_url_name (nodename, href); 480 add_char ('#'); 481 } 482 /* Always add NODENAME, so that the reference would pinpoint the 483 exact node on its file. This is so several nodes could share the 484 same file, in case of file-name clashes, but also for more 485 accurate browser positioning. */ 486 if (strcasecmp (nodename, "(dir)") == 0) 487 /* Strip the parens, but keep the original letter-case. */ 488 add_word_args ("%.3s", nodename + 1); 489 else 490 add_escaped_anchor_name (nodename); 491} 492 493/* Insert the text for the name of a reference in an HTML url, aprropriate 494 for NODENAME */ 495void 496add_url_name (nodename, href) 497 char *nodename; 498 int href; 499{ 500 add_nodename_to_filename (nodename, href); 501} 502 503/* Only allow [-0-9a-zA-Z_.] when nodifying filenames. This may 504 result in filename clashes; e.g., 505 506 @node Foo ],,, 507 @node Foo [,,, 508 509 both map to Foo--.html. If that happens, cm_node will put all 510 the nodes whose file names clash on the same file. */ 511void 512fix_filename (filename) 513 char *filename; 514{ 515 char *p; 516 for (p = filename; *p; p++) 517 { 518 if (!(isalnum (*p) || strchr ("-._", *p))) 519 *p = '-'; 520 } 521} 522 523/* As we can't look-up a (forward-referenced) nodes' html filename 524 from the tentry, we take the easy way out. We assume that 525 nodenames are unique, and generate the html filename from the 526 nodename, that's always known. */ 527static char * 528nodename_to_filename_1 (nodename, href) 529 char *nodename; 530 int href; 531{ 532 char *p; 533 char *filename; 534 char dirname[PATH_MAX]; 535 536 if (strcasecmp (nodename, "Top") == 0) 537 { 538 /* We want to convert references to the Top node into 539 "index.html#Top". */ 540 if (href) 541 filename = xstrdup ("index.html"); /* "#Top" is added by our callers */ 542 else 543 filename = xstrdup ("Top"); 544 } 545 else if (strcasecmp (nodename, "(dir)") == 0) 546 /* We want to convert references to the (dir) node into 547 "../index.html". */ 548 filename = xstrdup ("../index.html"); 549 else 550 { 551 filename = xmalloc (PATH_MAX); 552 dirname[0] = '\0'; 553 *filename = '\0'; 554 555 /* Check for external reference: ``(info-document)node-name'' 556 Assume this node lives at: ``../info-document/node-name.html'' 557 558 We need to handle the special case (sigh): ``(info-document)'', 559 ie, an external top-node, which should translate to: 560 ``../info-document/info-document.html'' */ 561 562 p = nodename; 563 if (*nodename == '(') 564 { 565 int length; 566 567 p = strchr (nodename, ')'); 568 if (p == NULL) 569 { 570 line_error (_("[unexpected] invalid node name: `%s'"), nodename); 571 xexit (1); 572 } 573 574 length = p - nodename - 1; 575 if (length > 5 && 576 FILENAME_CMPN (p - 5, ".info", 5) == 0) 577 length -= 5; 578 /* This is for DOS, and also for Windows and GNU/Linux 579 systems that might have Info files copied from a DOS 8+3 580 filesystem. */ 581 if (length > 4 && 582 FILENAME_CMPN (p - 4, ".inf", 4) == 0) 583 length -= 4; 584 strcpy (filename, "../"); 585 strncpy (dirname, nodename + 1, length); 586 *(dirname + length) = '\0'; 587 fix_filename (dirname); 588 strcat (filename, dirname); 589 strcat (filename, "/"); 590 p++; 591 } 592 593 /* In the case of just (info-document), there will be nothing 594 remaining, and we will refer to ../info-document/, which will 595 work fine. */ 596 strcat (filename, p); 597 if (*p) 598 { 599 /* Hmm */ 600 fix_filename (filename + strlen (filename) - strlen (p)); 601 strcat (filename, ".html"); 602 } 603 } 604 605 /* Produce a file name suitable for the underlying filesystem. */ 606 normalize_filename (filename); 607 608#if 0 609 /* We add ``#Nodified-filename'' anchor to external references to be 610 prepared for non-split HTML support. Maybe drop this. */ 611 if (href && *dirname) 612 { 613 strcat (filename, "#"); 614 strcat (filename, p); 615 /* Hmm, again */ 616 fix_filename (filename + strlen (filename) - strlen (p)); 617 } 618#endif 619 620 return filename; 621} 622 623/* If necessary, ie, if current filename != filename of node, output 624 the node name. */ 625void 626add_nodename_to_filename (nodename, href) 627 char *nodename; 628 int href; 629{ 630 /* for now, don't check: always output filename */ 631 char *filename = nodename_to_filename_1 (nodename, href); 632 add_word (filename); 633 free (filename); 634} 635 636char * 637nodename_to_filename (nodename) 638 char *nodename; 639{ 640 /* The callers of nodename_to_filename use the result to produce 641 <a href=, so call nodename_to_filename_1 with last arg non-zero. */ 642 return nodename_to_filename_1 (nodename, 1); 643} 644