1/* Tags file maker to go with GNU Emacs -*- coding: latin-1 -*- 2 3Copyright (C) 1984 The Regents of the University of California 4 5Redistribution and use in source and binary forms, with or without 6modification, are permitted provided that the following conditions are 7met: 81. Redistributions of source code must retain the above copyright 9 notice, this list of conditions and the following disclaimer. 102. Redistributions in binary form must reproduce the above copyright 11 notice, this list of conditions and the following disclaimer in the 12 documentation and/or other materials provided with the 13 distribution. 143. Neither the name of the University nor the names of its 15 contributors may be used to endorse or promote products derived 16 from this software without specific prior written permission. 17 18THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' 19AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 20THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 21PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS 22BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 25BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 26WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE 27OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN 28IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 30 31Copyright (C) 1984, 1987, 1988, 1989, 1993, 1994, 1995, 1998, 1999, 32 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007 33 Free Software Foundation, Inc. 34 35This file is not considered part of GNU Emacs. 36 37This program is free software; you can redistribute it and/or modify 38it under the terms of the GNU General Public License as published by 39the Free Software Foundation; either version 2 of the License, or 40(at your option) any later version. 41 42This program is distributed in the hope that it will be useful, 43but WITHOUT ANY WARRANTY; without even the implied warranty of 44MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 45GNU General Public License for more details. 46 47You should have received a copy of the GNU General Public License 48along with this program; if not, write to the Free Software Foundation, 49Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ 50 51 52/* NB To comply with the above BSD license, copyright information is 53reproduced in etc/ETAGS.README. That file should be updated when the 54above notices are. 55 56To the best of our knowledge, this code was originally based on the 57ctags.c distributed with BSD4.2, which was copyrighted by the 58University of California, as described above. */ 59 60 61/* 62 * Authors: 63 * Ctags originally by Ken Arnold. 64 * Fortran added by Jim Kleckner. 65 * Ed Pelegri-Llopart added C typedefs. 66 * Gnu Emacs TAGS format and modifications by RMS? 67 * 1989 Sam Kendall added C++. 68 * 1992 Joseph B. Wells improved C and C++ parsing. 69 * 1993 Francesco Potort� reorganised C and C++. 70 * 1994 Line-by-line regexp tags by Tom Tromey. 71 * 2001 Nested classes by Francesco Potort� (concept by Mykola Dzyuba). 72 * 2002 #line directives by Francesco Potort�. 73 * 74 * Francesco Potort� <pot@gnu.org> has maintained and improved it since 1993. 75 */ 76 77/* 78 * If you want to add support for a new language, start by looking at the LUA 79 * language, which is the simplest. Alternatively, consider shipping a 80 * configuration file containing regexp definitions for etags. 81 */ 82 83char pot_etags_version[] = "@(#) pot revision number is 17.26"; 84 85#define TRUE 1 86#define FALSE 0 87 88#ifdef DEBUG 89# undef DEBUG 90# define DEBUG TRUE 91#else 92# define DEBUG FALSE 93# define NDEBUG /* disable assert */ 94#endif 95 96#ifdef HAVE_CONFIG_H 97# include <config.h> 98 /* On some systems, Emacs defines static as nothing for the sake 99 of unexec. We don't want that here since we don't use unexec. */ 100# undef static 101# ifndef PTR /* for XEmacs */ 102# define PTR void * 103# endif 104# ifndef __P /* for XEmacs */ 105# define __P(args) args 106# endif 107#else /* no config.h */ 108# if defined(__STDC__) && (__STDC__ || defined(__SUNPRO_C)) 109# define __P(args) args /* use prototypes */ 110# define PTR void * /* for generic pointers */ 111# else /* not standard C */ 112# define __P(args) () /* no prototypes */ 113# define const /* remove const for old compilers' sake */ 114# define PTR long * /* don't use void* */ 115# endif 116#endif /* !HAVE_CONFIG_H */ 117 118#ifndef _GNU_SOURCE 119# define _GNU_SOURCE 1 /* enables some compiler checks on GNU */ 120#endif 121 122/* WIN32_NATIVE is for XEmacs. 123 MSDOS, WINDOWSNT, DOS_NT are for Emacs. */ 124#ifdef WIN32_NATIVE 125# undef MSDOS 126# undef WINDOWSNT 127# define WINDOWSNT 128#endif /* WIN32_NATIVE */ 129 130#ifdef MSDOS 131# undef MSDOS 132# define MSDOS TRUE 133# include <fcntl.h> 134# include <sys/param.h> 135# include <io.h> 136# ifndef HAVE_CONFIG_H 137# define DOS_NT 138# include <sys/config.h> 139# endif 140#else 141# define MSDOS FALSE 142#endif /* MSDOS */ 143 144#ifdef WINDOWSNT 145# include <stdlib.h> 146# include <fcntl.h> 147# include <string.h> 148# include <direct.h> 149# include <io.h> 150# define MAXPATHLEN _MAX_PATH 151# undef HAVE_NTGUI 152# undef DOS_NT 153# define DOS_NT 154# ifndef HAVE_GETCWD 155# define HAVE_GETCWD 156# endif /* undef HAVE_GETCWD */ 157#else /* not WINDOWSNT */ 158# ifdef STDC_HEADERS 159# include <stdlib.h> 160# include <string.h> 161# else /* no standard C headers */ 162 extern char *getenv (); 163# ifdef VMS 164# define EXIT_SUCCESS 1 165# define EXIT_FAILURE 0 166# else /* no VMS */ 167# define EXIT_SUCCESS 0 168# define EXIT_FAILURE 1 169# endif 170# endif 171#endif /* !WINDOWSNT */ 172 173#ifdef HAVE_UNISTD_H 174# include <unistd.h> 175#else 176# if defined (HAVE_GETCWD) && !defined (WINDOWSNT) 177 extern char *getcwd (char *buf, size_t size); 178# endif 179#endif /* HAVE_UNISTD_H */ 180 181#include <stdio.h> 182#include <ctype.h> 183#include <errno.h> 184#ifndef errno 185 extern int errno; 186#endif 187#include <sys/types.h> 188#include <sys/stat.h> 189 190#include <assert.h> 191#ifdef NDEBUG 192# undef assert /* some systems have a buggy assert.h */ 193# define assert(x) ((void) 0) 194#endif 195 196#if !defined (S_ISREG) && defined (S_IFREG) 197# define S_ISREG(m) (((m) & S_IFMT) == S_IFREG) 198#endif 199 200#ifdef NO_LONG_OPTIONS /* define this if you don't have GNU getopt */ 201# define NO_LONG_OPTIONS TRUE 202# define getopt_long(argc,argv,optstr,lopts,lind) getopt (argc, argv, optstr) 203 extern char *optarg; 204 extern int optind, opterr; 205#else 206# define NO_LONG_OPTIONS FALSE 207# include <getopt.h> 208#endif /* NO_LONG_OPTIONS */ 209 210#ifndef HAVE_CONFIG_H /* this is a standalone compilation */ 211# ifdef __CYGWIN__ /* compiling on Cygwin */ 212 !!! NOTICE !!! 213 the regex.h distributed with Cygwin is not compatible with etags, alas! 214If you want regular expression support, you should delete this notice and 215 arrange to use the GNU regex.h and regex.c. 216# endif 217#endif 218#include <regex.h> 219 220/* Define CTAGS to make the program "ctags" compatible with the usual one. 221 Leave it undefined to make the program "etags", which makes emacs-style 222 tag tables and tags typedefs, #defines and struct/union/enum by default. */ 223#ifdef CTAGS 224# undef CTAGS 225# define CTAGS TRUE 226#else 227# define CTAGS FALSE 228#endif 229 230#define streq(s,t) (assert((s)!=NULL || (t)!=NULL), !strcmp (s, t)) 231#define strcaseeq(s,t) (assert((s)!=NULL && (t)!=NULL), !etags_strcasecmp (s, t)) 232#define strneq(s,t,n) (assert((s)!=NULL || (t)!=NULL), !strncmp (s, t, n)) 233#define strncaseeq(s,t,n) (assert((s)!=NULL && (t)!=NULL), !etags_strncasecmp (s, t, n)) 234 235#define CHARS 256 /* 2^sizeof(char) */ 236#define CHAR(x) ((unsigned int)(x) & (CHARS - 1)) 237#define iswhite(c) (_wht[CHAR(c)]) /* c is white (see white) */ 238#define notinname(c) (_nin[CHAR(c)]) /* c is not in a name (see nonam) */ 239#define begtoken(c) (_btk[CHAR(c)]) /* c can start token (see begtk) */ 240#define intoken(c) (_itk[CHAR(c)]) /* c can be in token (see midtk) */ 241#define endtoken(c) (_etk[CHAR(c)]) /* c ends tokens (see endtk) */ 242 243#define ISALNUM(c) isalnum (CHAR(c)) 244#define ISALPHA(c) isalpha (CHAR(c)) 245#define ISDIGIT(c) isdigit (CHAR(c)) 246#define ISLOWER(c) islower (CHAR(c)) 247 248#define lowcase(c) tolower (CHAR(c)) 249#define upcase(c) toupper (CHAR(c)) 250 251 252/* 253 * xnew, xrnew -- allocate, reallocate storage 254 * 255 * SYNOPSIS: Type *xnew (int n, Type); 256 * void xrnew (OldPointer, int n, Type); 257 */ 258#if DEBUG 259# include "chkmalloc.h" 260# define xnew(n,Type) ((Type *) trace_malloc (__FILE__, __LINE__, \ 261 (n) * sizeof (Type))) 262# define xrnew(op,n,Type) ((op) = (Type *) trace_realloc (__FILE__, __LINE__, \ 263 (char *) (op), (n) * sizeof (Type))) 264#else 265# define xnew(n,Type) ((Type *) xmalloc ((n) * sizeof (Type))) 266# define xrnew(op,n,Type) ((op) = (Type *) xrealloc ( \ 267 (char *) (op), (n) * sizeof (Type))) 268#endif 269 270#define bool int 271 272typedef void Lang_function __P((FILE *)); 273 274typedef struct 275{ 276 char *suffix; /* file name suffix for this compressor */ 277 char *command; /* takes one arg and decompresses to stdout */ 278} compressor; 279 280typedef struct 281{ 282 char *name; /* language name */ 283 char *help; /* detailed help for the language */ 284 Lang_function *function; /* parse function */ 285 char **suffixes; /* name suffixes of this language's files */ 286 char **filenames; /* names of this language's files */ 287 char **interpreters; /* interpreters for this language */ 288 bool metasource; /* source used to generate other sources */ 289} language; 290 291typedef struct fdesc 292{ 293 struct fdesc *next; /* for the linked list */ 294 char *infname; /* uncompressed input file name */ 295 char *infabsname; /* absolute uncompressed input file name */ 296 char *infabsdir; /* absolute dir of input file */ 297 char *taggedfname; /* file name to write in tagfile */ 298 language *lang; /* language of file */ 299 char *prop; /* file properties to write in tagfile */ 300 bool usecharno; /* etags tags shall contain char number */ 301 bool written; /* entry written in the tags file */ 302} fdesc; 303 304typedef struct node_st 305{ /* sorting structure */ 306 struct node_st *left, *right; /* left and right sons */ 307 fdesc *fdp; /* description of file to whom tag belongs */ 308 char *name; /* tag name */ 309 char *regex; /* search regexp */ 310 bool valid; /* write this tag on the tag file */ 311 bool is_func; /* function tag: use regexp in CTAGS mode */ 312 bool been_warned; /* warning already given for duplicated tag */ 313 int lno; /* line number tag is on */ 314 long cno; /* character number line starts on */ 315} node; 316 317/* 318 * A `linebuffer' is a structure which holds a line of text. 319 * `readline_internal' reads a line from a stream into a linebuffer 320 * and works regardless of the length of the line. 321 * SIZE is the size of BUFFER, LEN is the length of the string in 322 * BUFFER after readline reads it. 323 */ 324typedef struct 325{ 326 long size; 327 int len; 328 char *buffer; 329} linebuffer; 330 331/* Used to support mixing of --lang and file names. */ 332typedef struct 333{ 334 enum { 335 at_language, /* a language specification */ 336 at_regexp, /* a regular expression */ 337 at_filename, /* a file name */ 338 at_stdin, /* read from stdin here */ 339 at_end /* stop parsing the list */ 340 } arg_type; /* argument type */ 341 language *lang; /* language associated with the argument */ 342 char *what; /* the argument itself */ 343} argument; 344 345/* Structure defining a regular expression. */ 346typedef struct regexp 347{ 348 struct regexp *p_next; /* pointer to next in list */ 349 language *lang; /* if set, use only for this language */ 350 char *pattern; /* the regexp pattern */ 351 char *name; /* tag name */ 352 struct re_pattern_buffer *pat; /* the compiled pattern */ 353 struct re_registers regs; /* re registers */ 354 bool error_signaled; /* already signaled for this regexp */ 355 bool force_explicit_name; /* do not allow implict tag name */ 356 bool ignore_case; /* ignore case when matching */ 357 bool multi_line; /* do a multi-line match on the whole file */ 358} regexp; 359 360 361/* Many compilers barf on this: 362 Lang_function Ada_funcs; 363 so let's write it this way */ 364static void Ada_funcs __P((FILE *)); 365static void Asm_labels __P((FILE *)); 366static void C_entries __P((int c_ext, FILE *)); 367static void default_C_entries __P((FILE *)); 368static void plain_C_entries __P((FILE *)); 369static void Cjava_entries __P((FILE *)); 370static void Cobol_paragraphs __P((FILE *)); 371static void Cplusplus_entries __P((FILE *)); 372static void Cstar_entries __P((FILE *)); 373static void Erlang_functions __P((FILE *)); 374static void Forth_words __P((FILE *)); 375static void Fortran_functions __P((FILE *)); 376static void HTML_labels __P((FILE *)); 377static void Lisp_functions __P((FILE *)); 378static void Lua_functions __P((FILE *)); 379static void Makefile_targets __P((FILE *)); 380static void Pascal_functions __P((FILE *)); 381static void Perl_functions __P((FILE *)); 382static void PHP_functions __P((FILE *)); 383static void PS_functions __P((FILE *)); 384static void Prolog_functions __P((FILE *)); 385static void Python_functions __P((FILE *)); 386static void Scheme_functions __P((FILE *)); 387static void TeX_commands __P((FILE *)); 388static void Texinfo_nodes __P((FILE *)); 389static void Yacc_entries __P((FILE *)); 390static void just_read_file __P((FILE *)); 391 392static void print_language_names __P((void)); 393static void print_version __P((void)); 394static void print_help __P((argument *)); 395int main __P((int, char **)); 396 397static compressor *get_compressor_from_suffix __P((char *, char **)); 398static language *get_language_from_langname __P((const char *)); 399static language *get_language_from_interpreter __P((char *)); 400static language *get_language_from_filename __P((char *, bool)); 401static void readline __P((linebuffer *, FILE *)); 402static long readline_internal __P((linebuffer *, FILE *)); 403static bool nocase_tail __P((char *)); 404static void get_tag __P((char *, char **)); 405 406static void analyse_regex __P((char *)); 407static void free_regexps __P((void)); 408static void regex_tag_multiline __P((void)); 409static void error __P((const char *, const char *)); 410static void suggest_asking_for_help __P((void)); 411void fatal __P((char *, char *)); 412static void pfatal __P((char *)); 413static void add_node __P((node *, node **)); 414 415static void init __P((void)); 416static void process_file_name __P((char *, language *)); 417static void process_file __P((FILE *, char *, language *)); 418static void find_entries __P((FILE *)); 419static void free_tree __P((node *)); 420static void free_fdesc __P((fdesc *)); 421static void pfnote __P((char *, bool, char *, int, int, long)); 422static void make_tag __P((char *, int, bool, char *, int, int, long)); 423static void invalidate_nodes __P((fdesc *, node **)); 424static void put_entries __P((node *)); 425 426static char *concat __P((char *, char *, char *)); 427static char *skip_spaces __P((char *)); 428static char *skip_non_spaces __P((char *)); 429static char *savenstr __P((char *, int)); 430static char *savestr __P((char *)); 431static char *etags_strchr __P((const char *, int)); 432static char *etags_strrchr __P((const char *, int)); 433static int etags_strcasecmp __P((const char *, const char *)); 434static int etags_strncasecmp __P((const char *, const char *, int)); 435static char *etags_getcwd __P((void)); 436static char *relative_filename __P((char *, char *)); 437static char *absolute_filename __P((char *, char *)); 438static char *absolute_dirname __P((char *, char *)); 439static bool filename_is_absolute __P((char *f)); 440static void canonicalize_filename __P((char *)); 441static void linebuffer_init __P((linebuffer *)); 442static void linebuffer_setlen __P((linebuffer *, int)); 443static PTR xmalloc __P((unsigned int)); 444static PTR xrealloc __P((char *, unsigned int)); 445 446 447static char searchar = '/'; /* use /.../ searches */ 448 449static char *tagfile; /* output file */ 450static char *progname; /* name this program was invoked with */ 451static char *cwd; /* current working directory */ 452static char *tagfiledir; /* directory of tagfile */ 453static FILE *tagf; /* ioptr for tags file */ 454 455static fdesc *fdhead; /* head of file description list */ 456static fdesc *curfdp; /* current file description */ 457static int lineno; /* line number of current line */ 458static long charno; /* current character number */ 459static long linecharno; /* charno of start of current line */ 460static char *dbp; /* pointer to start of current tag */ 461 462static const int invalidcharno = -1; 463 464static node *nodehead; /* the head of the binary tree of tags */ 465static node *last_node; /* the last node created */ 466 467static linebuffer lb; /* the current line */ 468static linebuffer filebuf; /* a buffer containing the whole file */ 469static linebuffer token_name; /* a buffer containing a tag name */ 470 471/* boolean "functions" (see init) */ 472static bool _wht[CHARS], _nin[CHARS], _itk[CHARS], _btk[CHARS], _etk[CHARS]; 473static char 474 /* white chars */ 475 *white = " \f\t\n\r\v", 476 /* not in a name */ 477 *nonam = " \f\t\n\r()=,;", /* look at make_tag before modifying! */ 478 /* token ending chars */ 479 *endtk = " \t\n\r\"'#()[]{}=-+%*/&|^~!<>;,.:?", 480 /* token starting chars */ 481 *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$~@", 482 /* valid in-token chars */ 483 *midtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$0123456789"; 484 485static bool append_to_tagfile; /* -a: append to tags */ 486/* The next four default to TRUE for etags, but to FALSE for ctags. */ 487static bool typedefs; /* -t: create tags for C and Ada typedefs */ 488static bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */ 489 /* 0 struct/enum/union decls, and C++ */ 490 /* member functions. */ 491static bool constantypedefs; /* -d: create tags for C #define, enum */ 492 /* constants and variables. */ 493 /* -D: opposite of -d. Default under ctags. */ 494static bool globals; /* create tags for global variables */ 495static bool members; /* create tags for C member variables */ 496static bool declarations; /* --declarations: tag them and extern in C&Co*/ 497static bool no_line_directive; /* ignore #line directives (undocumented) */ 498static bool no_duplicates; /* no duplicate tags for ctags (undocumented) */ 499static bool update; /* -u: update tags */ 500static bool vgrind_style; /* -v: create vgrind style index output */ 501static bool no_warnings; /* -w: suppress warnings (undocumented) */ 502static bool cxref_style; /* -x: create cxref style output */ 503static bool cplusplus; /* .[hc] means C++, not C */ 504static bool ignoreindent; /* -I: ignore indentation in C */ 505static bool packages_only; /* --packages-only: in Ada, only tag packages*/ 506 507/* STDIN is defined in LynxOS system headers */ 508#ifdef STDIN 509# undef STDIN 510#endif 511 512#define STDIN 0x1001 /* returned by getopt_long on --parse-stdin */ 513static bool parsing_stdin; /* --parse-stdin used */ 514 515static regexp *p_head; /* list of all regexps */ 516static bool need_filebuf; /* some regexes are multi-line */ 517 518static struct option longopts[] = 519{ 520 { "append", no_argument, NULL, 'a' }, 521 { "packages-only", no_argument, &packages_only, TRUE }, 522 { "c++", no_argument, NULL, 'C' }, 523 { "declarations", no_argument, &declarations, TRUE }, 524 { "no-line-directive", no_argument, &no_line_directive, TRUE }, 525 { "no-duplicates", no_argument, &no_duplicates, TRUE }, 526 { "help", no_argument, NULL, 'h' }, 527 { "help", no_argument, NULL, 'H' }, 528 { "ignore-indentation", no_argument, NULL, 'I' }, 529 { "language", required_argument, NULL, 'l' }, 530 { "members", no_argument, &members, TRUE }, 531 { "no-members", no_argument, &members, FALSE }, 532 { "output", required_argument, NULL, 'o' }, 533 { "regex", required_argument, NULL, 'r' }, 534 { "no-regex", no_argument, NULL, 'R' }, 535 { "ignore-case-regex", required_argument, NULL, 'c' }, 536 { "parse-stdin", required_argument, NULL, STDIN }, 537 { "version", no_argument, NULL, 'V' }, 538 539#if CTAGS /* Ctags options */ 540 { "backward-search", no_argument, NULL, 'B' }, 541 { "cxref", no_argument, NULL, 'x' }, 542 { "defines", no_argument, NULL, 'd' }, 543 { "globals", no_argument, &globals, TRUE }, 544 { "typedefs", no_argument, NULL, 't' }, 545 { "typedefs-and-c++", no_argument, NULL, 'T' }, 546 { "update", no_argument, NULL, 'u' }, 547 { "vgrind", no_argument, NULL, 'v' }, 548 { "no-warn", no_argument, NULL, 'w' }, 549 550#else /* Etags options */ 551 { "no-defines", no_argument, NULL, 'D' }, 552 { "no-globals", no_argument, &globals, FALSE }, 553 { "include", required_argument, NULL, 'i' }, 554#endif 555 { NULL } 556}; 557 558static compressor compressors[] = 559{ 560 { "z", "gzip -d -c"}, 561 { "Z", "gzip -d -c"}, 562 { "gz", "gzip -d -c"}, 563 { "GZ", "gzip -d -c"}, 564 { "bz2", "bzip2 -d -c" }, 565 { NULL } 566}; 567 568/* 569 * Language stuff. 570 */ 571 572/* Ada code */ 573static char *Ada_suffixes [] = 574 { "ads", "adb", "ada", NULL }; 575static char Ada_help [] = 576"In Ada code, functions, procedures, packages, tasks and types are\n\ 577tags. Use the `--packages-only' option to create tags for\n\ 578packages only.\n\ 579Ada tag names have suffixes indicating the type of entity:\n\ 580 Entity type: Qualifier:\n\ 581 ------------ ----------\n\ 582 function /f\n\ 583 procedure /p\n\ 584 package spec /s\n\ 585 package body /b\n\ 586 type /t\n\ 587 task /k\n\ 588Thus, `M-x find-tag <RET> bidule/b <RET>' will go directly to the\n\ 589body of the package `bidule', while `M-x find-tag <RET> bidule <RET>'\n\ 590will just search for any tag `bidule'."; 591 592/* Assembly code */ 593static char *Asm_suffixes [] = 594 { "a", /* Unix assembler */ 595 "asm", /* Microcontroller assembly */ 596 "def", /* BSO/Tasking definition includes */ 597 "inc", /* Microcontroller include files */ 598 "ins", /* Microcontroller include files */ 599 "s", "sa", /* Unix assembler */ 600 "S", /* cpp-processed Unix assembler */ 601 "src", /* BSO/Tasking C compiler output */ 602 NULL 603 }; 604static char Asm_help [] = 605"In assembler code, labels appearing at the beginning of a line,\n\ 606followed by a colon, are tags."; 607 608 609/* Note that .c and .h can be considered C++, if the --c++ flag was 610 given, or if the `class' or `template' keyowrds are met inside the file. 611 That is why default_C_entries is called for these. */ 612static char *default_C_suffixes [] = 613 { "c", "h", NULL }; 614static char default_C_help [] = 615"In C code, any C function or typedef is a tag, and so are\n\ 616definitions of `struct', `union' and `enum'. `#define' macro\n\ 617definitions and `enum' constants are tags unless you specify\n\ 618`--no-defines'. Global variables are tags unless you specify\n\ 619`--no-globals' and so are struct members unless you specify\n\ 620`--no-members'. Use of `--no-globals', `--no-defines' and\n\ 621`--no-members' can make the tags table file much smaller.\n\ 622You can tag function declarations and external variables by\n\ 623using `--declarations'."; 624 625static char *Cplusplus_suffixes [] = 626 { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx", 627 "M", /* Objective C++ */ 628 "pdb", /* Postscript with C syntax */ 629 NULL }; 630static char Cplusplus_help [] = 631"In C++ code, all the tag constructs of C code are tagged. (Use\n\ 632--help --lang=c --lang=c++ for full help.)\n\ 633In addition to C tags, member functions are also recognized. Member\n\ 634variables are recognized unless you use the `--no-members' option.\n\ 635Tags for variables and functions in classes are named `CLASS::VARIABLE'\n\ 636and `CLASS::FUNCTION'. `operator' definitions have tag names like\n\ 637`operator+'."; 638 639static char *Cjava_suffixes [] = 640 { "java", NULL }; 641static char Cjava_help [] = 642"In Java code, all the tags constructs of C and C++ code are\n\ 643tagged. (Use --help --lang=c --lang=c++ --lang=java for full help.)"; 644 645 646static char *Cobol_suffixes [] = 647 { "COB", "cob", NULL }; 648static char Cobol_help [] = 649"In Cobol code, tags are paragraph names; that is, any word\n\ 650starting in column 8 and followed by a period."; 651 652static char *Cstar_suffixes [] = 653 { "cs", "hs", NULL }; 654 655static char *Erlang_suffixes [] = 656 { "erl", "hrl", NULL }; 657static char Erlang_help [] = 658"In Erlang code, the tags are the functions, records and macros\n\ 659defined in the file."; 660 661char *Forth_suffixes [] = 662 { "fth", "tok", NULL }; 663static char Forth_help [] = 664"In Forth code, tags are words defined by `:',\n\ 665constant, code, create, defer, value, variable, buffer:, field."; 666 667static char *Fortran_suffixes [] = 668 { "F", "f", "f90", "for", NULL }; 669static char Fortran_help [] = 670"In Fortran code, functions, subroutines and block data are tags."; 671 672static char *HTML_suffixes [] = 673 { "htm", "html", "shtml", NULL }; 674static char HTML_help [] = 675"In HTML input files, the tags are the `title' and the `h1', `h2',\n\ 676`h3' headers. Also, tags are `name=' in anchors and all\n\ 677occurrences of `id='."; 678 679static char *Lisp_suffixes [] = 680 { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL }; 681static char Lisp_help [] = 682"In Lisp code, any function defined with `defun', any variable\n\ 683defined with `defvar' or `defconst', and in general the first\n\ 684argument of any expression that starts with `(def' in column zero\n\ 685is a tag."; 686 687static char *Lua_suffixes [] = 688 { "lua", "LUA", NULL }; 689static char Lua_help [] = 690"In Lua scripts, all functions are tags."; 691 692static char *Makefile_filenames [] = 693 { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL}; 694static char Makefile_help [] = 695"In makefiles, targets are tags; additionally, variables are tags\n\ 696unless you specify `--no-globals'."; 697 698static char *Objc_suffixes [] = 699 { "lm", /* Objective lex file */ 700 "m", /* Objective C file */ 701 NULL }; 702static char Objc_help [] = 703"In Objective C code, tags include Objective C definitions for classes,\n\ 704class categories, methods and protocols. Tags for variables and\n\ 705functions in classes are named `CLASS::VARIABLE' and `CLASS::FUNCTION'.\n\ 706(Use --help --lang=c --lang=objc --lang=java for full help.)"; 707 708static char *Pascal_suffixes [] = 709 { "p", "pas", NULL }; 710static char Pascal_help [] = 711"In Pascal code, the tags are the functions and procedures defined\n\ 712in the file."; 713/* " // this is for working around an Emacs highlighting bug... */ 714 715static char *Perl_suffixes [] = 716 { "pl", "pm", NULL }; 717static char *Perl_interpreters [] = 718 { "perl", "@PERL@", NULL }; 719static char Perl_help [] = 720"In Perl code, the tags are the packages, subroutines and variables\n\ 721defined by the `package', `sub', `my' and `local' keywords. Use\n\ 722`--globals' if you want to tag global variables. Tags for\n\ 723subroutines are named `PACKAGE::SUB'. The name for subroutines\n\ 724defined in the default package is `main::SUB'."; 725 726static char *PHP_suffixes [] = 727 { "php", "php3", "php4", NULL }; 728static char PHP_help [] = 729"In PHP code, tags are functions, classes and defines. Unless you use\n\ 730the `--no-members' option, vars are tags too."; 731 732static char *plain_C_suffixes [] = 733 { "pc", /* Pro*C file */ 734 NULL }; 735 736static char *PS_suffixes [] = 737 { "ps", "psw", NULL }; /* .psw is for PSWrap */ 738static char PS_help [] = 739"In PostScript code, the tags are the functions."; 740 741static char *Prolog_suffixes [] = 742 { "prolog", NULL }; 743static char Prolog_help [] = 744"In Prolog code, tags are predicates and rules at the beginning of\n\ 745line."; 746 747static char *Python_suffixes [] = 748 { "py", NULL }; 749static char Python_help [] = 750"In Python code, `def' or `class' at the beginning of a line\n\ 751generate a tag."; 752 753/* Can't do the `SCM' or `scm' prefix with a version number. */ 754static char *Scheme_suffixes [] = 755 { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL }; 756static char Scheme_help [] = 757"In Scheme code, tags include anything defined with `def' or with a\n\ 758construct whose name starts with `def'. They also include\n\ 759variables set with `set!' at top level in the file."; 760 761static char *TeX_suffixes [] = 762 { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL }; 763static char TeX_help [] = 764"In LaTeX text, the argument of any of the commands `\\chapter',\n\ 765`\\section', `\\subsection', `\\subsubsection', `\\eqno', `\\label',\n\ 766`\\ref', `\\cite', `\\bibitem', `\\part', `\\appendix', `\\entry',\n\ 767`\\index', `\\def', `\\newcommand', `\\renewcommand',\n\ 768`\\newenvironment' or `\\renewenvironment' is a tag.\n\ 769\n\ 770Other commands can be specified by setting the environment variable\n\ 771`TEXTAGS' to a colon-separated list like, for example,\n\ 772 TEXTAGS=\"mycommand:myothercommand\"."; 773 774 775static char *Texinfo_suffixes [] = 776 { "texi", "texinfo", "txi", NULL }; 777static char Texinfo_help [] = 778"for texinfo files, lines starting with @node are tagged."; 779 780static char *Yacc_suffixes [] = 781 { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */ 782static char Yacc_help [] = 783"In Bison or Yacc input files, each rule defines as a tag the\n\ 784nonterminal it constructs. The portions of the file that contain\n\ 785C code are parsed as C code (use --help --lang=c --lang=yacc\n\ 786for full help)."; 787 788static char auto_help [] = 789"`auto' is not a real language, it indicates to use\n\ 790a default language for files base on file name suffix and file contents."; 791 792static char none_help [] = 793"`none' is not a real language, it indicates to only do\n\ 794regexp processing on files."; 795 796static char no_lang_help [] = 797"No detailed help available for this language."; 798 799 800/* 801 * Table of languages. 802 * 803 * It is ok for a given function to be listed under more than one 804 * name. I just didn't. 805 */ 806 807static language lang_names [] = 808{ 809 { "ada", Ada_help, Ada_funcs, Ada_suffixes }, 810 { "asm", Asm_help, Asm_labels, Asm_suffixes }, 811 { "c", default_C_help, default_C_entries, default_C_suffixes }, 812 { "c++", Cplusplus_help, Cplusplus_entries, Cplusplus_suffixes }, 813 { "c*", no_lang_help, Cstar_entries, Cstar_suffixes }, 814 { "cobol", Cobol_help, Cobol_paragraphs, Cobol_suffixes }, 815 { "erlang", Erlang_help, Erlang_functions, Erlang_suffixes }, 816 { "forth", Forth_help, Forth_words, Forth_suffixes }, 817 { "fortran", Fortran_help, Fortran_functions, Fortran_suffixes }, 818 { "html", HTML_help, HTML_labels, HTML_suffixes }, 819 { "java", Cjava_help, Cjava_entries, Cjava_suffixes }, 820 { "lisp", Lisp_help, Lisp_functions, Lisp_suffixes }, 821 { "lua", Lua_help, Lua_functions, Lua_suffixes }, 822 { "makefile", Makefile_help,Makefile_targets,NULL,Makefile_filenames}, 823 { "objc", Objc_help, plain_C_entries, Objc_suffixes }, 824 { "pascal", Pascal_help, Pascal_functions, Pascal_suffixes }, 825 { "perl",Perl_help,Perl_functions,Perl_suffixes,NULL,Perl_interpreters}, 826 { "php", PHP_help, PHP_functions, PHP_suffixes }, 827 { "postscript",PS_help, PS_functions, PS_suffixes }, 828 { "proc", no_lang_help, plain_C_entries, plain_C_suffixes }, 829 { "prolog", Prolog_help, Prolog_functions, Prolog_suffixes }, 830 { "python", Python_help, Python_functions, Python_suffixes }, 831 { "scheme", Scheme_help, Scheme_functions, Scheme_suffixes }, 832 { "tex", TeX_help, TeX_commands, TeX_suffixes }, 833 { "texinfo", Texinfo_help, Texinfo_nodes, Texinfo_suffixes }, 834 { "yacc", Yacc_help,Yacc_entries,Yacc_suffixes,NULL,NULL,TRUE}, 835 { "auto", auto_help }, /* default guessing scheme */ 836 { "none", none_help, just_read_file }, /* regexp matching only */ 837 { NULL } /* end of list */ 838}; 839 840 841static void 842print_language_names () 843{ 844 language *lang; 845 char **name, **ext; 846 847 puts ("\nThese are the currently supported languages, along with the\n\ 848default file names and dot suffixes:"); 849 for (lang = lang_names; lang->name != NULL; lang++) 850 { 851 printf (" %-*s", 10, lang->name); 852 if (lang->filenames != NULL) 853 for (name = lang->filenames; *name != NULL; name++) 854 printf (" %s", *name); 855 if (lang->suffixes != NULL) 856 for (ext = lang->suffixes; *ext != NULL; ext++) 857 printf (" .%s", *ext); 858 puts (""); 859 } 860 puts ("where `auto' means use default language for files based on file\n\ 861name suffix, and `none' means only do regexp processing on files.\n\ 862If no language is specified and no matching suffix is found,\n\ 863the first line of the file is read for a sharp-bang (#!) sequence\n\ 864followed by the name of an interpreter. If no such sequence is found,\n\ 865Fortran is tried first; if no tags are found, C is tried next.\n\ 866When parsing any C file, a \"class\" or \"template\" keyword\n\ 867switches to C++."); 868 puts ("Compressed files are supported using gzip and bzip2.\n\ 869\n\ 870For detailed help on a given language use, for example,\n\ 871etags --help --lang=ada."); 872} 873 874#ifndef EMACS_NAME 875# define EMACS_NAME "standalone" 876#endif 877#ifndef VERSION 878# define VERSION "version" 879#endif 880static void 881print_version () 882{ 883 printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION); 884 puts ("Copyright (C) 2007 Free Software Foundation, Inc."); 885 puts ("This program is distributed under the terms in ETAGS.README"); 886 887 exit (EXIT_SUCCESS); 888} 889 890static void 891print_help (argbuffer) 892 argument *argbuffer; 893{ 894 bool help_for_lang = FALSE; 895 896 for (; argbuffer->arg_type != at_end; argbuffer++) 897 if (argbuffer->arg_type == at_language) 898 { 899 if (help_for_lang) 900 puts (""); 901 puts (argbuffer->lang->help); 902 help_for_lang = TRUE; 903 } 904 905 if (help_for_lang) 906 exit (EXIT_SUCCESS); 907 908 printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\ 909\n\ 910These are the options accepted by %s.\n", progname, progname); 911 if (NO_LONG_OPTIONS) 912 puts ("WARNING: long option names do not work with this executable,\n\ 913as it is not linked with GNU getopt."); 914 else 915 puts ("You may use unambiguous abbreviations for the long option names."); 916 puts (" A - as file name means read names from stdin (one per line).\n\ 917Absolute names are stored in the output file as they are.\n\ 918Relative ones are stored relative to the output file's directory.\n"); 919 920 puts ("-a, --append\n\ 921 Append tag entries to existing tags file."); 922 923 puts ("--packages-only\n\ 924 For Ada files, only generate tags for packages."); 925 926 if (CTAGS) 927 puts ("-B, --backward-search\n\ 928 Write the search commands for the tag entries using '?', the\n\ 929 backward-search command instead of '/', the forward-search command."); 930 931 /* This option is mostly obsolete, because etags can now automatically 932 detect C++. Retained for backward compatibility and for debugging and 933 experimentation. In principle, we could want to tag as C++ even 934 before any "class" or "template" keyword. 935 puts ("-C, --c++\n\ 936 Treat files whose name suffix defaults to C language as C++ files."); 937 */ 938 939 puts ("--declarations\n\ 940 In C and derived languages, create tags for function declarations,"); 941 if (CTAGS) 942 puts ("\tand create tags for extern variables if --globals is used."); 943 else 944 puts 945 ("\tand create tags for extern variables unless --no-globals is used."); 946 947 if (CTAGS) 948 puts ("-d, --defines\n\ 949 Create tag entries for C #define constants and enum constants, too."); 950 else 951 puts ("-D, --no-defines\n\ 952 Don't create tag entries for C #define constants and enum constants.\n\ 953 This makes the tags file smaller."); 954 955 if (!CTAGS) 956 puts ("-i FILE, --include=FILE\n\ 957 Include a note in tag file indicating that, when searching for\n\ 958 a tag, one should also consult the tags file FILE after\n\ 959 checking the current file."); 960 961 puts ("-l LANG, --language=LANG\n\ 962 Force the following files to be considered as written in the\n\ 963 named language up to the next --language=LANG option."); 964 965 if (CTAGS) 966 puts ("--globals\n\ 967 Create tag entries for global variables in some languages."); 968 else 969 puts ("--no-globals\n\ 970 Do not create tag entries for global variables in some\n\ 971 languages. This makes the tags file smaller."); 972 puts ("--no-members\n\ 973 Do not create tag entries for members of structures\n\ 974 in some languages."); 975 976 puts ("-r REGEXP, --regex=REGEXP or --regex=@regexfile\n\ 977 Make a tag for each line matching a regular expression pattern\n\ 978 in the following files. {LANGUAGE}REGEXP uses REGEXP for LANGUAGE\n\ 979 files only. REGEXFILE is a file containing one REGEXP per line.\n\ 980 REGEXP takes the form /TAGREGEXP/TAGNAME/MODS, where TAGNAME/ is\n\ 981 optional. The TAGREGEXP pattern is anchored (as if preceded by ^)."); 982 puts (" If TAGNAME/ is present, the tags created are named.\n\ 983 For example Tcl named tags can be created with:\n\ 984 --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\".\n\ 985 MODS are optional one-letter modifiers: `i' means to ignore case,\n\ 986 `m' means to allow multi-line matches, `s' implies `m' and\n\ 987 causes dot to match any character, including newline."); 988 puts ("-R, --no-regex\n\ 989 Don't create tags from regexps for the following files."); 990 puts ("-I, --ignore-indentation\n\ 991 In C and C++ do not assume that a closing brace in the first\n\ 992 column is the final brace of a function or structure definition."); 993 puts ("-o FILE, --output=FILE\n\ 994 Write the tags to FILE."); 995 puts ("--parse-stdin=NAME\n\ 996 Read from standard input and record tags as belonging to file NAME."); 997 998 if (CTAGS) 999 { 1000 puts ("-t, --typedefs\n\ 1001 Generate tag entries for C and Ada typedefs."); 1002 puts ("-T, --typedefs-and-c++\n\ 1003 Generate tag entries for C typedefs, C struct/enum/union tags,\n\ 1004 and C++ member functions."); 1005 } 1006 1007 if (CTAGS) 1008 puts ("-u, --update\n\ 1009 Update the tag entries for the given files, leaving tag\n\ 1010 entries for other files in place. Currently, this is\n\ 1011 implemented by deleting the existing entries for the given\n\ 1012 files and then rewriting the new entries at the end of the\n\ 1013 tags file. It is often faster to simply rebuild the entire\n\ 1014 tag file than to use this."); 1015 1016 if (CTAGS) 1017 { 1018 puts ("-v, --vgrind\n\ 1019 Print on the standard output an index of items intended for\n\ 1020 human consumption, similar to the output of vgrind. The index\n\ 1021 is sorted, and gives the page number of each item."); 1022# if PRINT_UNDOCUMENTED_OPTIONS_HELP 1023 puts ("-w, --no-duplicates\n\ 1024 Do not create duplicate tag entries, for compatibility with\n\ 1025 traditional ctags."); 1026 puts ("-w, --no-warn\n\ 1027 Suppress warning messages about duplicate tag entries."); 1028# endif /* PRINT_UNDOCUMENTED_OPTIONS_HELP */ 1029 puts ("-x, --cxref\n\ 1030 Like --vgrind, but in the style of cxref, rather than vgrind.\n\ 1031 The output uses line numbers instead of page numbers, but\n\ 1032 beyond that the differences are cosmetic; try both to see\n\ 1033 which you like."); 1034 } 1035 1036 puts ("-V, --version\n\ 1037 Print the version of the program.\n\ 1038-h, --help\n\ 1039 Print this help message.\n\ 1040 Followed by one or more `--language' options prints detailed\n\ 1041 help about tag generation for the specified languages."); 1042 1043 print_language_names (); 1044 1045 puts (""); 1046 puts ("Report bugs to bug-gnu-emacs@gnu.org"); 1047 1048 exit (EXIT_SUCCESS); 1049} 1050 1051 1052#ifdef VMS /* VMS specific functions */ 1053 1054#define EOS '\0' 1055 1056/* This is a BUG! ANY arbitrary limit is a BUG! 1057 Won't someone please fix this? */ 1058#define MAX_FILE_SPEC_LEN 255 1059typedef struct { 1060 short curlen; 1061 char body[MAX_FILE_SPEC_LEN + 1]; 1062} vspec; 1063 1064/* 1065 v1.05 nmm 26-Jun-86 fn_exp - expand specification of list of file names 1066 returning in each successive call the next file name matching the input 1067 spec. The function expects that each in_spec passed 1068 to it will be processed to completion; in particular, up to and 1069 including the call following that in which the last matching name 1070 is returned, the function ignores the value of in_spec, and will 1071 only start processing a new spec with the following call. 1072 If an error occurs, on return out_spec contains the value 1073 of in_spec when the error occurred. 1074 1075 With each successive file name returned in out_spec, the 1076 function's return value is one. When there are no more matching 1077 names the function returns zero. If on the first call no file 1078 matches in_spec, or there is any other error, -1 is returned. 1079*/ 1080 1081#include <rmsdef.h> 1082#include <descrip.h> 1083#define OUTSIZE MAX_FILE_SPEC_LEN 1084static short 1085fn_exp (out, in) 1086 vspec *out; 1087 char *in; 1088{ 1089 static long context = 0; 1090 static struct dsc$descriptor_s o; 1091 static struct dsc$descriptor_s i; 1092 static bool pass1 = TRUE; 1093 long status; 1094 short retval; 1095 1096 if (pass1) 1097 { 1098 pass1 = FALSE; 1099 o.dsc$a_pointer = (char *) out; 1100 o.dsc$w_length = (short)OUTSIZE; 1101 i.dsc$a_pointer = in; 1102 i.dsc$w_length = (short)strlen(in); 1103 i.dsc$b_dtype = DSC$K_DTYPE_T; 1104 i.dsc$b_class = DSC$K_CLASS_S; 1105 o.dsc$b_dtype = DSC$K_DTYPE_VT; 1106 o.dsc$b_class = DSC$K_CLASS_VS; 1107 } 1108 if ((status = lib$find_file(&i, &o, &context, 0, 0)) == RMS$_NORMAL) 1109 { 1110 out->body[out->curlen] = EOS; 1111 return 1; 1112 } 1113 else if (status == RMS$_NMF) 1114 retval = 0; 1115 else 1116 { 1117 strcpy(out->body, in); 1118 retval = -1; 1119 } 1120 lib$find_file_end(&context); 1121 pass1 = TRUE; 1122 return retval; 1123} 1124 1125/* 1126 v1.01 nmm 19-Aug-85 gfnames - return in successive calls the 1127 name of each file specified by the provided arg expanding wildcards. 1128*/ 1129static char * 1130gfnames (arg, p_error) 1131 char *arg; 1132 bool *p_error; 1133{ 1134 static vspec filename = {MAX_FILE_SPEC_LEN, "\0"}; 1135 1136 switch (fn_exp (&filename, arg)) 1137 { 1138 case 1: 1139 *p_error = FALSE; 1140 return filename.body; 1141 case 0: 1142 *p_error = FALSE; 1143 return NULL; 1144 default: 1145 *p_error = TRUE; 1146 return filename.body; 1147 } 1148} 1149 1150#ifndef OLD /* Newer versions of VMS do provide `system'. */ 1151system (cmd) 1152 char *cmd; 1153{ 1154 error ("%s", "system() function not implemented under VMS"); 1155} 1156#endif 1157 1158#define VERSION_DELIM ';' 1159char *massage_name (s) 1160 char *s; 1161{ 1162 char *start = s; 1163 1164 for ( ; *s; s++) 1165 if (*s == VERSION_DELIM) 1166 { 1167 *s = EOS; 1168 break; 1169 } 1170 else 1171 *s = lowcase (*s); 1172 return start; 1173} 1174#endif /* VMS */ 1175 1176 1177int 1178main (argc, argv) 1179 int argc; 1180 char *argv[]; 1181{ 1182 int i; 1183 unsigned int nincluded_files; 1184 char **included_files; 1185 argument *argbuffer; 1186 int current_arg, file_count; 1187 linebuffer filename_lb; 1188 bool help_asked = FALSE; 1189#ifdef VMS 1190 bool got_err; 1191#endif 1192 char *optstring; 1193 int opt; 1194 1195 1196#ifdef DOS_NT 1197 _fmode = O_BINARY; /* all of files are treated as binary files */ 1198#endif /* DOS_NT */ 1199 1200 progname = argv[0]; 1201 nincluded_files = 0; 1202 included_files = xnew (argc, char *); 1203 current_arg = 0; 1204 file_count = 0; 1205 1206 /* Allocate enough no matter what happens. Overkill, but each one 1207 is small. */ 1208 argbuffer = xnew (argc, argument); 1209 1210 /* 1211 * If etags, always find typedefs and structure tags. Why not? 1212 * Also default to find macro constants, enum constants, struct 1213 * members and global variables. 1214 */ 1215 if (!CTAGS) 1216 { 1217 typedefs = typedefs_or_cplusplus = constantypedefs = TRUE; 1218 globals = TRUE; 1219 } 1220 1221 /* When the optstring begins with a '-' getopt_long does not rearrange the 1222 non-options arguments to be at the end, but leaves them alone. */ 1223 optstring = concat (NO_LONG_OPTIONS ? "" : "-", 1224 "ac:Cf:Il:o:r:RSVhH", 1225 (CTAGS) ? "BxdtTuvw" : "Di:"); 1226 1227 while ((opt = getopt_long (argc, argv, optstring, longopts, NULL)) != EOF) 1228 switch (opt) 1229 { 1230 case 0: 1231 /* If getopt returns 0, then it has already processed a 1232 long-named option. We should do nothing. */ 1233 break; 1234 1235 case 1: 1236 /* This means that a file name has been seen. Record it. */ 1237 argbuffer[current_arg].arg_type = at_filename; 1238 argbuffer[current_arg].what = optarg; 1239 ++current_arg; 1240 ++file_count; 1241 break; 1242 1243 case STDIN: 1244 /* Parse standard input. Idea by Vivek <vivek@etla.org>. */ 1245 argbuffer[current_arg].arg_type = at_stdin; 1246 argbuffer[current_arg].what = optarg; 1247 ++current_arg; 1248 ++file_count; 1249 if (parsing_stdin) 1250 fatal ("cannot parse standard input more than once", (char *)NULL); 1251 parsing_stdin = TRUE; 1252 break; 1253 1254 /* Common options. */ 1255 case 'a': append_to_tagfile = TRUE; break; 1256 case 'C': cplusplus = TRUE; break; 1257 case 'f': /* for compatibility with old makefiles */ 1258 case 'o': 1259 if (tagfile) 1260 { 1261 error ("-o option may only be given once.", (char *)NULL); 1262 suggest_asking_for_help (); 1263 /* NOTREACHED */ 1264 } 1265 tagfile = optarg; 1266 break; 1267 case 'I': 1268 case 'S': /* for backward compatibility */ 1269 ignoreindent = TRUE; 1270 break; 1271 case 'l': 1272 { 1273 language *lang = get_language_from_langname (optarg); 1274 if (lang != NULL) 1275 { 1276 argbuffer[current_arg].lang = lang; 1277 argbuffer[current_arg].arg_type = at_language; 1278 ++current_arg; 1279 } 1280 } 1281 break; 1282 case 'c': 1283 /* Backward compatibility: support obsolete --ignore-case-regexp. */ 1284 optarg = concat (optarg, "i", ""); /* memory leak here */ 1285 /* FALLTHRU */ 1286 case 'r': 1287 argbuffer[current_arg].arg_type = at_regexp; 1288 argbuffer[current_arg].what = optarg; 1289 ++current_arg; 1290 break; 1291 case 'R': 1292 argbuffer[current_arg].arg_type = at_regexp; 1293 argbuffer[current_arg].what = NULL; 1294 ++current_arg; 1295 break; 1296 case 'V': 1297 print_version (); 1298 break; 1299 case 'h': 1300 case 'H': 1301 help_asked = TRUE; 1302 break; 1303 1304 /* Etags options */ 1305 case 'D': constantypedefs = FALSE; break; 1306 case 'i': included_files[nincluded_files++] = optarg; break; 1307 1308 /* Ctags options. */ 1309 case 'B': searchar = '?'; break; 1310 case 'd': constantypedefs = TRUE; break; 1311 case 't': typedefs = TRUE; break; 1312 case 'T': typedefs = typedefs_or_cplusplus = TRUE; break; 1313 case 'u': update = TRUE; break; 1314 case 'v': vgrind_style = TRUE; /*FALLTHRU*/ 1315 case 'x': cxref_style = TRUE; break; 1316 case 'w': no_warnings = TRUE; break; 1317 default: 1318 suggest_asking_for_help (); 1319 /* NOTREACHED */ 1320 } 1321 1322 /* No more options. Store the rest of arguments. */ 1323 for (; optind < argc; optind++) 1324 { 1325 argbuffer[current_arg].arg_type = at_filename; 1326 argbuffer[current_arg].what = argv[optind]; 1327 ++current_arg; 1328 ++file_count; 1329 } 1330 1331 argbuffer[current_arg].arg_type = at_end; 1332 1333 if (help_asked) 1334 print_help (argbuffer); 1335 /* NOTREACHED */ 1336 1337 if (nincluded_files == 0 && file_count == 0) 1338 { 1339 error ("no input files specified.", (char *)NULL); 1340 suggest_asking_for_help (); 1341 /* NOTREACHED */ 1342 } 1343 1344 if (tagfile == NULL) 1345 tagfile = CTAGS ? "tags" : "TAGS"; 1346 cwd = etags_getcwd (); /* the current working directory */ 1347 if (cwd[strlen (cwd) - 1] != '/') 1348 { 1349 char *oldcwd = cwd; 1350 cwd = concat (oldcwd, "/", ""); 1351 free (oldcwd); 1352 } 1353 /* Relative file names are made relative to the current directory. */ 1354 if (streq (tagfile, "-") 1355 || strneq (tagfile, "/dev/", 5)) 1356 tagfiledir = cwd; 1357 else 1358 tagfiledir = absolute_dirname (tagfile, cwd); 1359 1360 init (); /* set up boolean "functions" */ 1361 1362 linebuffer_init (&lb); 1363 linebuffer_init (&filename_lb); 1364 linebuffer_init (&filebuf); 1365 linebuffer_init (&token_name); 1366 1367 if (!CTAGS) 1368 { 1369 if (streq (tagfile, "-")) 1370 { 1371 tagf = stdout; 1372#ifdef DOS_NT 1373 /* Switch redirected `stdout' to binary mode (setting `_fmode' 1374 doesn't take effect until after `stdout' is already open). */ 1375 if (!isatty (fileno (stdout))) 1376 setmode (fileno (stdout), O_BINARY); 1377#endif /* DOS_NT */ 1378 } 1379 else 1380 tagf = fopen (tagfile, append_to_tagfile ? "a" : "w"); 1381 if (tagf == NULL) 1382 pfatal (tagfile); 1383 } 1384 1385 /* 1386 * Loop through files finding functions. 1387 */ 1388 for (i = 0; i < current_arg; i++) 1389 { 1390 static language *lang; /* non-NULL if language is forced */ 1391 char *this_file; 1392 1393 switch (argbuffer[i].arg_type) 1394 { 1395 case at_language: 1396 lang = argbuffer[i].lang; 1397 break; 1398 case at_regexp: 1399 analyse_regex (argbuffer[i].what); 1400 break; 1401 case at_filename: 1402#ifdef VMS 1403 while ((this_file = gfnames (argbuffer[i].what, &got_err)) != NULL) 1404 { 1405 if (got_err) 1406 { 1407 error ("can't find file %s\n", this_file); 1408 argc--, argv++; 1409 } 1410 else 1411 { 1412 this_file = massage_name (this_file); 1413 } 1414#else 1415 this_file = argbuffer[i].what; 1416#endif 1417 /* Input file named "-" means read file names from stdin 1418 (one per line) and use them. */ 1419 if (streq (this_file, "-")) 1420 { 1421 if (parsing_stdin) 1422 fatal ("cannot parse standard input AND read file names from it", 1423 (char *)NULL); 1424 while (readline_internal (&filename_lb, stdin) > 0) 1425 process_file_name (filename_lb.buffer, lang); 1426 } 1427 else 1428 process_file_name (this_file, lang); 1429#ifdef VMS 1430 } 1431#endif 1432 break; 1433 case at_stdin: 1434 this_file = argbuffer[i].what; 1435 process_file (stdin, this_file, lang); 1436 break; 1437 } 1438 } 1439 1440 free_regexps (); 1441 free (lb.buffer); 1442 free (filebuf.buffer); 1443 free (token_name.buffer); 1444 1445 if (!CTAGS || cxref_style) 1446 { 1447 /* Write the remaining tags to tagf (ETAGS) or stdout (CXREF). */ 1448 put_entries (nodehead); 1449 free_tree (nodehead); 1450 nodehead = NULL; 1451 if (!CTAGS) 1452 { 1453 fdesc *fdp; 1454 1455 /* Output file entries that have no tags. */ 1456 for (fdp = fdhead; fdp != NULL; fdp = fdp->next) 1457 if (!fdp->written) 1458 fprintf (tagf, "\f\n%s,0\n", fdp->taggedfname); 1459 1460 while (nincluded_files-- > 0) 1461 fprintf (tagf, "\f\n%s,include\n", *included_files++); 1462 1463 if (fclose (tagf) == EOF) 1464 pfatal (tagfile); 1465 } 1466 1467 exit (EXIT_SUCCESS); 1468 } 1469 1470 if (update) 1471 { 1472 char cmd[BUFSIZ]; 1473 for (i = 0; i < current_arg; ++i) 1474 { 1475 switch (argbuffer[i].arg_type) 1476 { 1477 case at_filename: 1478 case at_stdin: 1479 break; 1480 default: 1481 continue; /* the for loop */ 1482 } 1483 sprintf (cmd, 1484 "mv %s OTAGS;fgrep -v '\t%s\t' OTAGS >%s;rm OTAGS", 1485 tagfile, argbuffer[i].what, tagfile); 1486 if (system (cmd) != EXIT_SUCCESS) 1487 fatal ("failed to execute shell command", (char *)NULL); 1488 } 1489 append_to_tagfile = TRUE; 1490 } 1491 1492 tagf = fopen (tagfile, append_to_tagfile ? "a" : "w"); 1493 if (tagf == NULL) 1494 pfatal (tagfile); 1495 put_entries (nodehead); /* write all the tags (CTAGS) */ 1496 free_tree (nodehead); 1497 nodehead = NULL; 1498 if (fclose (tagf) == EOF) 1499 pfatal (tagfile); 1500 1501 if (CTAGS) 1502 if (append_to_tagfile || update) 1503 { 1504 char cmd[2*BUFSIZ+20]; 1505 /* Maybe these should be used: 1506 setenv ("LC_COLLATE", "C", 1); 1507 setenv ("LC_ALL", "C", 1); */ 1508 sprintf (cmd, "sort -u -o %.*s %.*s", BUFSIZ, tagfile, BUFSIZ, tagfile); 1509 exit (system (cmd)); 1510 } 1511 return EXIT_SUCCESS; 1512} 1513 1514 1515/* 1516 * Return a compressor given the file name. If EXTPTR is non-zero, 1517 * return a pointer into FILE where the compressor-specific 1518 * extension begins. If no compressor is found, NULL is returned 1519 * and EXTPTR is not significant. 1520 * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998) 1521 */ 1522static compressor * 1523get_compressor_from_suffix (file, extptr) 1524 char *file; 1525 char **extptr; 1526{ 1527 compressor *compr; 1528 char *slash, *suffix; 1529 1530 /* This relies on FN to be after canonicalize_filename, 1531 so we don't need to consider backslashes on DOS_NT. */ 1532 slash = etags_strrchr (file, '/'); 1533 suffix = etags_strrchr (file, '.'); 1534 if (suffix == NULL || suffix < slash) 1535 return NULL; 1536 if (extptr != NULL) 1537 *extptr = suffix; 1538 suffix += 1; 1539 /* Let those poor souls who live with DOS 8+3 file name limits get 1540 some solace by treating foo.cgz as if it were foo.c.gz, etc. 1541 Only the first do loop is run if not MSDOS */ 1542 do 1543 { 1544 for (compr = compressors; compr->suffix != NULL; compr++) 1545 if (streq (compr->suffix, suffix)) 1546 return compr; 1547 if (!MSDOS) 1548 break; /* do it only once: not really a loop */ 1549 if (extptr != NULL) 1550 *extptr = ++suffix; 1551 } while (*suffix != '\0'); 1552 return NULL; 1553} 1554 1555 1556 1557/* 1558 * Return a language given the name. 1559 */ 1560static language * 1561get_language_from_langname (name) 1562 const char *name; 1563{ 1564 language *lang; 1565 1566 if (name == NULL) 1567 error ("empty language name", (char *)NULL); 1568 else 1569 { 1570 for (lang = lang_names; lang->name != NULL; lang++) 1571 if (streq (name, lang->name)) 1572 return lang; 1573 error ("unknown language \"%s\"", name); 1574 } 1575 1576 return NULL; 1577} 1578 1579 1580/* 1581 * Return a language given the interpreter name. 1582 */ 1583static language * 1584get_language_from_interpreter (interpreter) 1585 char *interpreter; 1586{ 1587 language *lang; 1588 char **iname; 1589 1590 if (interpreter == NULL) 1591 return NULL; 1592 for (lang = lang_names; lang->name != NULL; lang++) 1593 if (lang->interpreters != NULL) 1594 for (iname = lang->interpreters; *iname != NULL; iname++) 1595 if (streq (*iname, interpreter)) 1596 return lang; 1597 1598 return NULL; 1599} 1600 1601 1602 1603/* 1604 * Return a language given the file name. 1605 */ 1606static language * 1607get_language_from_filename (file, case_sensitive) 1608 char *file; 1609 bool case_sensitive; 1610{ 1611 language *lang; 1612 char **name, **ext, *suffix; 1613 1614 /* Try whole file name first. */ 1615 for (lang = lang_names; lang->name != NULL; lang++) 1616 if (lang->filenames != NULL) 1617 for (name = lang->filenames; *name != NULL; name++) 1618 if ((case_sensitive) 1619 ? streq (*name, file) 1620 : strcaseeq (*name, file)) 1621 return lang; 1622 1623 /* If not found, try suffix after last dot. */ 1624 suffix = etags_strrchr (file, '.'); 1625 if (suffix == NULL) 1626 return NULL; 1627 suffix += 1; 1628 for (lang = lang_names; lang->name != NULL; lang++) 1629 if (lang->suffixes != NULL) 1630 for (ext = lang->suffixes; *ext != NULL; ext++) 1631 if ((case_sensitive) 1632 ? streq (*ext, suffix) 1633 : strcaseeq (*ext, suffix)) 1634 return lang; 1635 return NULL; 1636} 1637 1638 1639/* 1640 * This routine is called on each file argument. 1641 */ 1642static void 1643process_file_name (file, lang) 1644 char *file; 1645 language *lang; 1646{ 1647 struct stat stat_buf; 1648 FILE *inf; 1649 fdesc *fdp; 1650 compressor *compr; 1651 char *compressed_name, *uncompressed_name; 1652 char *ext, *real_name; 1653 int retval; 1654 1655 canonicalize_filename (file); 1656 if (streq (file, tagfile) && !streq (tagfile, "-")) 1657 { 1658 error ("skipping inclusion of %s in self.", file); 1659 return; 1660 } 1661 if ((compr = get_compressor_from_suffix (file, &ext)) == NULL) 1662 { 1663 compressed_name = NULL; 1664 real_name = uncompressed_name = savestr (file); 1665 } 1666 else 1667 { 1668 real_name = compressed_name = savestr (file); 1669 uncompressed_name = savenstr (file, ext - file); 1670 } 1671 1672 /* If the canonicalized uncompressed name 1673 has already been dealt with, skip it silently. */ 1674 for (fdp = fdhead; fdp != NULL; fdp = fdp->next) 1675 { 1676 assert (fdp->infname != NULL); 1677 if (streq (uncompressed_name, fdp->infname)) 1678 goto cleanup; 1679 } 1680 1681 if (stat (real_name, &stat_buf) != 0) 1682 { 1683 /* Reset real_name and try with a different name. */ 1684 real_name = NULL; 1685 if (compressed_name != NULL) /* try with the given suffix */ 1686 { 1687 if (stat (uncompressed_name, &stat_buf) == 0) 1688 real_name = uncompressed_name; 1689 } 1690 else /* try all possible suffixes */ 1691 { 1692 for (compr = compressors; compr->suffix != NULL; compr++) 1693 { 1694 compressed_name = concat (file, ".", compr->suffix); 1695 if (stat (compressed_name, &stat_buf) != 0) 1696 { 1697 if (MSDOS) 1698 { 1699 char *suf = compressed_name + strlen (file); 1700 size_t suflen = strlen (compr->suffix) + 1; 1701 for ( ; suf[1]; suf++, suflen--) 1702 { 1703 memmove (suf, suf + 1, suflen); 1704 if (stat (compressed_name, &stat_buf) == 0) 1705 { 1706 real_name = compressed_name; 1707 break; 1708 } 1709 } 1710 if (real_name != NULL) 1711 break; 1712 } /* MSDOS */ 1713 free (compressed_name); 1714 compressed_name = NULL; 1715 } 1716 else 1717 { 1718 real_name = compressed_name; 1719 break; 1720 } 1721 } 1722 } 1723 if (real_name == NULL) 1724 { 1725 perror (file); 1726 goto cleanup; 1727 } 1728 } /* try with a different name */ 1729 1730 if (!S_ISREG (stat_buf.st_mode)) 1731 { 1732 error ("skipping %s: it is not a regular file.", real_name); 1733 goto cleanup; 1734 } 1735 if (real_name == compressed_name) 1736 { 1737 char *cmd = concat (compr->command, " ", real_name); 1738 inf = (FILE *) popen (cmd, "r"); 1739 free (cmd); 1740 } 1741 else 1742 inf = fopen (real_name, "r"); 1743 if (inf == NULL) 1744 { 1745 perror (real_name); 1746 goto cleanup; 1747 } 1748 1749 process_file (inf, uncompressed_name, lang); 1750 1751 if (real_name == compressed_name) 1752 retval = pclose (inf); 1753 else 1754 retval = fclose (inf); 1755 if (retval < 0) 1756 pfatal (file); 1757 1758 cleanup: 1759 if (compressed_name) free (compressed_name); 1760 if (uncompressed_name) free (uncompressed_name); 1761 last_node = NULL; 1762 curfdp = NULL; 1763 return; 1764} 1765 1766static void 1767process_file (fh, fn, lang) 1768 FILE *fh; 1769 char *fn; 1770 language *lang; 1771{ 1772 static const fdesc emptyfdesc; 1773 fdesc *fdp; 1774 1775 /* Create a new input file description entry. */ 1776 fdp = xnew (1, fdesc); 1777 *fdp = emptyfdesc; 1778 fdp->next = fdhead; 1779 fdp->infname = savestr (fn); 1780 fdp->lang = lang; 1781 fdp->infabsname = absolute_filename (fn, cwd); 1782 fdp->infabsdir = absolute_dirname (fn, cwd); 1783 if (filename_is_absolute (fn)) 1784 { 1785 /* An absolute file name. Canonicalize it. */ 1786 fdp->taggedfname = absolute_filename (fn, NULL); 1787 } 1788 else 1789 { 1790 /* A file name relative to cwd. Make it relative 1791 to the directory of the tags file. */ 1792 fdp->taggedfname = relative_filename (fn, tagfiledir); 1793 } 1794 fdp->usecharno = TRUE; /* use char position when making tags */ 1795 fdp->prop = NULL; 1796 fdp->written = FALSE; /* not written on tags file yet */ 1797 1798 fdhead = fdp; 1799 curfdp = fdhead; /* the current file description */ 1800 1801 find_entries (fh); 1802 1803 /* If not Ctags, and if this is not metasource and if it contained no #line 1804 directives, we can write the tags and free all nodes pointing to 1805 curfdp. */ 1806 if (!CTAGS 1807 && curfdp->usecharno /* no #line directives in this file */ 1808 && !curfdp->lang->metasource) 1809 { 1810 node *np, *prev; 1811 1812 /* Look for the head of the sublist relative to this file. See add_node 1813 for the structure of the node tree. */ 1814 prev = NULL; 1815 for (np = nodehead; np != NULL; prev = np, np = np->left) 1816 if (np->fdp == curfdp) 1817 break; 1818 1819 /* If we generated tags for this file, write and delete them. */ 1820 if (np != NULL) 1821 { 1822 /* This is the head of the last sublist, if any. The following 1823 instructions depend on this being true. */ 1824 assert (np->left == NULL); 1825 1826 assert (fdhead == curfdp); 1827 assert (last_node->fdp == curfdp); 1828 put_entries (np); /* write tags for file curfdp->taggedfname */ 1829 free_tree (np); /* remove the written nodes */ 1830 if (prev == NULL) 1831 nodehead = NULL; /* no nodes left */ 1832 else 1833 prev->left = NULL; /* delete the pointer to the sublist */ 1834 } 1835 } 1836} 1837 1838/* 1839 * This routine sets up the boolean pseudo-functions which work 1840 * by setting boolean flags dependent upon the corresponding character. 1841 * Every char which is NOT in that string is not a white char. Therefore, 1842 * all of the array "_wht" is set to FALSE, and then the elements 1843 * subscripted by the chars in "white" are set to TRUE. Thus "_wht" 1844 * of a char is TRUE if it is the string "white", else FALSE. 1845 */ 1846static void 1847init () 1848{ 1849 register char *sp; 1850 register int i; 1851 1852 for (i = 0; i < CHARS; i++) 1853 iswhite(i) = notinname(i) = begtoken(i) = intoken(i) = endtoken(i) = FALSE; 1854 for (sp = white; *sp != '\0'; sp++) iswhite (*sp) = TRUE; 1855 for (sp = nonam; *sp != '\0'; sp++) notinname (*sp) = TRUE; 1856 notinname('\0') = notinname('\n'); 1857 for (sp = begtk; *sp != '\0'; sp++) begtoken (*sp) = TRUE; 1858 begtoken('\0') = begtoken('\n'); 1859 for (sp = midtk; *sp != '\0'; sp++) intoken (*sp) = TRUE; 1860 intoken('\0') = intoken('\n'); 1861 for (sp = endtk; *sp != '\0'; sp++) endtoken (*sp) = TRUE; 1862 endtoken('\0') = endtoken('\n'); 1863} 1864 1865/* 1866 * This routine opens the specified file and calls the function 1867 * which finds the function and type definitions. 1868 */ 1869static void 1870find_entries (inf) 1871 FILE *inf; 1872{ 1873 char *cp; 1874 language *lang = curfdp->lang; 1875 Lang_function *parser = NULL; 1876 1877 /* If user specified a language, use it. */ 1878 if (lang != NULL && lang->function != NULL) 1879 { 1880 parser = lang->function; 1881 } 1882 1883 /* Else try to guess the language given the file name. */ 1884 if (parser == NULL) 1885 { 1886 lang = get_language_from_filename (curfdp->infname, TRUE); 1887 if (lang != NULL && lang->function != NULL) 1888 { 1889 curfdp->lang = lang; 1890 parser = lang->function; 1891 } 1892 } 1893 1894 /* Else look for sharp-bang as the first two characters. */ 1895 if (parser == NULL 1896 && readline_internal (&lb, inf) > 0 1897 && lb.len >= 2 1898 && lb.buffer[0] == '#' 1899 && lb.buffer[1] == '!') 1900 { 1901 char *lp; 1902 1903 /* Set lp to point at the first char after the last slash in the 1904 line or, if no slashes, at the first nonblank. Then set cp to 1905 the first successive blank and terminate the string. */ 1906 lp = etags_strrchr (lb.buffer+2, '/'); 1907 if (lp != NULL) 1908 lp += 1; 1909 else 1910 lp = skip_spaces (lb.buffer + 2); 1911 cp = skip_non_spaces (lp); 1912 *cp = '\0'; 1913 1914 if (strlen (lp) > 0) 1915 { 1916 lang = get_language_from_interpreter (lp); 1917 if (lang != NULL && lang->function != NULL) 1918 { 1919 curfdp->lang = lang; 1920 parser = lang->function; 1921 } 1922 } 1923 } 1924 1925 /* We rewind here, even if inf may be a pipe. We fail if the 1926 length of the first line is longer than the pipe block size, 1927 which is unlikely. */ 1928 rewind (inf); 1929 1930 /* Else try to guess the language given the case insensitive file name. */ 1931 if (parser == NULL) 1932 { 1933 lang = get_language_from_filename (curfdp->infname, FALSE); 1934 if (lang != NULL && lang->function != NULL) 1935 { 1936 curfdp->lang = lang; 1937 parser = lang->function; 1938 } 1939 } 1940 1941 /* Else try Fortran or C. */ 1942 if (parser == NULL) 1943 { 1944 node *old_last_node = last_node; 1945 1946 curfdp->lang = get_language_from_langname ("fortran"); 1947 find_entries (inf); 1948 1949 if (old_last_node == last_node) 1950 /* No Fortran entries found. Try C. */ 1951 { 1952 /* We do not tag if rewind fails. 1953 Only the file name will be recorded in the tags file. */ 1954 rewind (inf); 1955 curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c"); 1956 find_entries (inf); 1957 } 1958 return; 1959 } 1960 1961 if (!no_line_directive 1962 && curfdp->lang != NULL && curfdp->lang->metasource) 1963 /* It may be that this is a bingo.y file, and we already parsed a bingo.c 1964 file, or anyway we parsed a file that is automatically generated from 1965 this one. If this is the case, the bingo.c file contained #line 1966 directives that generated tags pointing to this file. Let's delete 1967 them all before parsing this file, which is the real source. */ 1968 { 1969 fdesc **fdpp = &fdhead; 1970 while (*fdpp != NULL) 1971 if (*fdpp != curfdp 1972 && streq ((*fdpp)->taggedfname, curfdp->taggedfname)) 1973 /* We found one of those! We must delete both the file description 1974 and all tags referring to it. */ 1975 { 1976 fdesc *badfdp = *fdpp; 1977 1978 /* Delete the tags referring to badfdp->taggedfname 1979 that were obtained from badfdp->infname. */ 1980 invalidate_nodes (badfdp, &nodehead); 1981 1982 *fdpp = badfdp->next; /* remove the bad description from the list */ 1983 free_fdesc (badfdp); 1984 } 1985 else 1986 fdpp = &(*fdpp)->next; /* advance the list pointer */ 1987 } 1988 1989 assert (parser != NULL); 1990 1991 /* Generic initialisations before reading from file. */ 1992 linebuffer_setlen (&filebuf, 0); /* reset the file buffer */ 1993 1994 /* Generic initialisations before parsing file with readline. */ 1995 lineno = 0; /* reset global line number */ 1996 charno = 0; /* reset global char number */ 1997 linecharno = 0; /* reset global char number of line start */ 1998 1999 parser (inf); 2000 2001 regex_tag_multiline (); 2002} 2003 2004 2005/* 2006 * Check whether an implicitly named tag should be created, 2007 * then call `pfnote'. 2008 * NAME is a string that is internally copied by this function. 2009 * 2010 * TAGS format specification 2011 * Idea by Sam Kendall <kendall@mv.mv.com> (1997) 2012 * The following is explained in some more detail in etc/ETAGS.EBNF. 2013 * 2014 * make_tag creates tags with "implicit tag names" (unnamed tags) 2015 * if the following are all true, assuming NONAM=" \f\t\n\r()=,;": 2016 * 1. NAME does not contain any of the characters in NONAM; 2017 * 2. LINESTART contains name as either a rightmost, or rightmost but 2018 * one character, substring; 2019 * 3. the character, if any, immediately before NAME in LINESTART must 2020 * be a character in NONAM; 2021 * 4. the character, if any, immediately after NAME in LINESTART must 2022 * also be a character in NONAM. 2023 * 2024 * The implementation uses the notinname() macro, which recognises the 2025 * characters stored in the string `nonam'. 2026 * etags.el needs to use the same characters that are in NONAM. 2027 */ 2028static void 2029make_tag (name, namelen, is_func, linestart, linelen, lno, cno) 2030 char *name; /* tag name, or NULL if unnamed */ 2031 int namelen; /* tag length */ 2032 bool is_func; /* tag is a function */ 2033 char *linestart; /* start of the line where tag is */ 2034 int linelen; /* length of the line where tag is */ 2035 int lno; /* line number */ 2036 long cno; /* character number */ 2037{ 2038 bool named = (name != NULL && namelen > 0); 2039 2040 if (!CTAGS && named) /* maybe set named to false */ 2041 /* Let's try to make an implicit tag name, that is, create an unnamed tag 2042 such that etags.el can guess a name from it. */ 2043 { 2044 int i; 2045 register char *cp = name; 2046 2047 for (i = 0; i < namelen; i++) 2048 if (notinname (*cp++)) 2049 break; 2050 if (i == namelen) /* rule #1 */ 2051 { 2052 cp = linestart + linelen - namelen; 2053 if (notinname (linestart[linelen-1])) 2054 cp -= 1; /* rule #4 */ 2055 if (cp >= linestart /* rule #2 */ 2056 && (cp == linestart 2057 || notinname (cp[-1])) /* rule #3 */ 2058 && strneq (name, cp, namelen)) /* rule #2 */ 2059 named = FALSE; /* use implicit tag name */ 2060 } 2061 } 2062 2063 if (named) 2064 name = savenstr (name, namelen); 2065 else 2066 name = NULL; 2067 pfnote (name, is_func, linestart, linelen, lno, cno); 2068} 2069 2070/* Record a tag. */ 2071static void 2072pfnote (name, is_func, linestart, linelen, lno, cno) 2073 char *name; /* tag name, or NULL if unnamed */ 2074 bool is_func; /* tag is a function */ 2075 char *linestart; /* start of the line where tag is */ 2076 int linelen; /* length of the line where tag is */ 2077 int lno; /* line number */ 2078 long cno; /* character number */ 2079{ 2080 register node *np; 2081 2082 assert (name == NULL || name[0] != '\0'); 2083 if (CTAGS && name == NULL) 2084 return; 2085 2086 np = xnew (1, node); 2087 2088 /* If ctags mode, change name "main" to M<thisfilename>. */ 2089 if (CTAGS && !cxref_style && streq (name, "main")) 2090 { 2091 register char *fp = etags_strrchr (curfdp->taggedfname, '/'); 2092 np->name = concat ("M", fp == NULL ? curfdp->taggedfname : fp + 1, ""); 2093 fp = etags_strrchr (np->name, '.'); 2094 if (fp != NULL && fp[1] != '\0' && fp[2] == '\0') 2095 fp[0] = '\0'; 2096 } 2097 else 2098 np->name = name; 2099 np->valid = TRUE; 2100 np->been_warned = FALSE; 2101 np->fdp = curfdp; 2102 np->is_func = is_func; 2103 np->lno = lno; 2104 if (np->fdp->usecharno) 2105 /* Our char numbers are 0-base, because of C language tradition? 2106 ctags compatibility? old versions compatibility? I don't know. 2107 Anyway, since emacs's are 1-base we expect etags.el to take care 2108 of the difference. If we wanted to have 1-based numbers, we would 2109 uncomment the +1 below. */ 2110 np->cno = cno /* + 1 */ ; 2111 else 2112 np->cno = invalidcharno; 2113 np->left = np->right = NULL; 2114 if (CTAGS && !cxref_style) 2115 { 2116 if (strlen (linestart) < 50) 2117 np->regex = concat (linestart, "$", ""); 2118 else 2119 np->regex = savenstr (linestart, 50); 2120 } 2121 else 2122 np->regex = savenstr (linestart, linelen); 2123 2124 add_node (np, &nodehead); 2125} 2126 2127/* 2128 * free_tree () 2129 * recurse on left children, iterate on right children. 2130 */ 2131static void 2132free_tree (np) 2133 register node *np; 2134{ 2135 while (np) 2136 { 2137 register node *node_right = np->right; 2138 free_tree (np->left); 2139 if (np->name != NULL) 2140 free (np->name); 2141 free (np->regex); 2142 free (np); 2143 np = node_right; 2144 } 2145} 2146 2147/* 2148 * free_fdesc () 2149 * delete a file description 2150 */ 2151static void 2152free_fdesc (fdp) 2153 register fdesc *fdp; 2154{ 2155 if (fdp->infname != NULL) free (fdp->infname); 2156 if (fdp->infabsname != NULL) free (fdp->infabsname); 2157 if (fdp->infabsdir != NULL) free (fdp->infabsdir); 2158 if (fdp->taggedfname != NULL) free (fdp->taggedfname); 2159 if (fdp->prop != NULL) free (fdp->prop); 2160 free (fdp); 2161} 2162 2163/* 2164 * add_node () 2165 * Adds a node to the tree of nodes. In etags mode, sort by file 2166 * name. In ctags mode, sort by tag name. Make no attempt at 2167 * balancing. 2168 * 2169 * add_node is the only function allowed to add nodes, so it can 2170 * maintain state. 2171 */ 2172static void 2173add_node (np, cur_node_p) 2174 node *np, **cur_node_p; 2175{ 2176 register int dif; 2177 register node *cur_node = *cur_node_p; 2178 2179 if (cur_node == NULL) 2180 { 2181 *cur_node_p = np; 2182 last_node = np; 2183 return; 2184 } 2185 2186 if (!CTAGS) 2187 /* Etags Mode */ 2188 { 2189 /* For each file name, tags are in a linked sublist on the right 2190 pointer. The first tags of different files are a linked list 2191 on the left pointer. last_node points to the end of the last 2192 used sublist. */ 2193 if (last_node != NULL && last_node->fdp == np->fdp) 2194 { 2195 /* Let's use the same sublist as the last added node. */ 2196 assert (last_node->right == NULL); 2197 last_node->right = np; 2198 last_node = np; 2199 } 2200 else if (cur_node->fdp == np->fdp) 2201 { 2202 /* Scanning the list we found the head of a sublist which is 2203 good for us. Let's scan this sublist. */ 2204 add_node (np, &cur_node->right); 2205 } 2206 else 2207 /* The head of this sublist is not good for us. Let's try the 2208 next one. */ 2209 add_node (np, &cur_node->left); 2210 } /* if ETAGS mode */ 2211 2212 else 2213 { 2214 /* Ctags Mode */ 2215 dif = strcmp (np->name, cur_node->name); 2216 2217 /* 2218 * If this tag name matches an existing one, then 2219 * do not add the node, but maybe print a warning. 2220 */ 2221 if (no_duplicates && !dif) 2222 { 2223 if (np->fdp == cur_node->fdp) 2224 { 2225 if (!no_warnings) 2226 { 2227 fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n", 2228 np->fdp->infname, lineno, np->name); 2229 fprintf (stderr, "Second entry ignored\n"); 2230 } 2231 } 2232 else if (!cur_node->been_warned && !no_warnings) 2233 { 2234 fprintf 2235 (stderr, 2236 "Duplicate entry in files %s and %s: %s (Warning only)\n", 2237 np->fdp->infname, cur_node->fdp->infname, np->name); 2238 cur_node->been_warned = TRUE; 2239 } 2240 return; 2241 } 2242 2243 /* Actually add the node */ 2244 add_node (np, dif < 0 ? &cur_node->left : &cur_node->right); 2245 } /* if CTAGS mode */ 2246} 2247 2248/* 2249 * invalidate_nodes () 2250 * Scan the node tree and invalidate all nodes pointing to the 2251 * given file description (CTAGS case) or free them (ETAGS case). 2252 */ 2253static void 2254invalidate_nodes (badfdp, npp) 2255 fdesc *badfdp; 2256 node **npp; 2257{ 2258 node *np = *npp; 2259 2260 if (np == NULL) 2261 return; 2262 2263 if (CTAGS) 2264 { 2265 if (np->left != NULL) 2266 invalidate_nodes (badfdp, &np->left); 2267 if (np->fdp == badfdp) 2268 np->valid = FALSE; 2269 if (np->right != NULL) 2270 invalidate_nodes (badfdp, &np->right); 2271 } 2272 else 2273 { 2274 assert (np->fdp != NULL); 2275 if (np->fdp == badfdp) 2276 { 2277 *npp = np->left; /* detach the sublist from the list */ 2278 np->left = NULL; /* isolate it */ 2279 free_tree (np); /* free it */ 2280 invalidate_nodes (badfdp, npp); 2281 } 2282 else 2283 invalidate_nodes (badfdp, &np->left); 2284 } 2285} 2286 2287 2288static int total_size_of_entries __P((node *)); 2289static int number_len __P((long)); 2290 2291/* Length of a non-negative number's decimal representation. */ 2292static int 2293number_len (num) 2294 long num; 2295{ 2296 int len = 1; 2297 while ((num /= 10) > 0) 2298 len += 1; 2299 return len; 2300} 2301 2302/* 2303 * Return total number of characters that put_entries will output for 2304 * the nodes in the linked list at the right of the specified node. 2305 * This count is irrelevant with etags.el since emacs 19.34 at least, 2306 * but is still supplied for backward compatibility. 2307 */ 2308static int 2309total_size_of_entries (np) 2310 register node *np; 2311{ 2312 register int total = 0; 2313 2314 for (; np != NULL; np = np->right) 2315 if (np->valid) 2316 { 2317 total += strlen (np->regex) + 1; /* pat\177 */ 2318 if (np->name != NULL) 2319 total += strlen (np->name) + 1; /* name\001 */ 2320 total += number_len ((long) np->lno) + 1; /* lno, */ 2321 if (np->cno != invalidcharno) /* cno */ 2322 total += number_len (np->cno); 2323 total += 1; /* newline */ 2324 } 2325 2326 return total; 2327} 2328 2329static void 2330put_entries (np) 2331 register node *np; 2332{ 2333 register char *sp; 2334 static fdesc *fdp = NULL; 2335 2336 if (np == NULL) 2337 return; 2338 2339 /* Output subentries that precede this one */ 2340 if (CTAGS) 2341 put_entries (np->left); 2342 2343 /* Output this entry */ 2344 if (np->valid) 2345 { 2346 if (!CTAGS) 2347 { 2348 /* Etags mode */ 2349 if (fdp != np->fdp) 2350 { 2351 fdp = np->fdp; 2352 fprintf (tagf, "\f\n%s,%d\n", 2353 fdp->taggedfname, total_size_of_entries (np)); 2354 fdp->written = TRUE; 2355 } 2356 fputs (np->regex, tagf); 2357 fputc ('\177', tagf); 2358 if (np->name != NULL) 2359 { 2360 fputs (np->name, tagf); 2361 fputc ('\001', tagf); 2362 } 2363 fprintf (tagf, "%d,", np->lno); 2364 if (np->cno != invalidcharno) 2365 fprintf (tagf, "%ld", np->cno); 2366 fputs ("\n", tagf); 2367 } 2368 else 2369 { 2370 /* Ctags mode */ 2371 if (np->name == NULL) 2372 error ("internal error: NULL name in ctags mode.", (char *)NULL); 2373 2374 if (cxref_style) 2375 { 2376 if (vgrind_style) 2377 fprintf (stdout, "%s %s %d\n", 2378 np->name, np->fdp->taggedfname, (np->lno + 63) / 64); 2379 else 2380 fprintf (stdout, "%-16s %3d %-16s %s\n", 2381 np->name, np->lno, np->fdp->taggedfname, np->regex); 2382 } 2383 else 2384 { 2385 fprintf (tagf, "%s\t%s\t", np->name, np->fdp->taggedfname); 2386 2387 if (np->is_func) 2388 { /* function or #define macro with args */ 2389 putc (searchar, tagf); 2390 putc ('^', tagf); 2391 2392 for (sp = np->regex; *sp; sp++) 2393 { 2394 if (*sp == '\\' || *sp == searchar) 2395 putc ('\\', tagf); 2396 putc (*sp, tagf); 2397 } 2398 putc (searchar, tagf); 2399 } 2400 else 2401 { /* anything else; text pattern inadequate */ 2402 fprintf (tagf, "%d", np->lno); 2403 } 2404 putc ('\n', tagf); 2405 } 2406 } 2407 } /* if this node contains a valid tag */ 2408 2409 /* Output subentries that follow this one */ 2410 put_entries (np->right); 2411 if (!CTAGS) 2412 put_entries (np->left); 2413} 2414 2415 2416/* C extensions. */ 2417#define C_EXT 0x00fff /* C extensions */ 2418#define C_PLAIN 0x00000 /* C */ 2419#define C_PLPL 0x00001 /* C++ */ 2420#define C_STAR 0x00003 /* C* */ 2421#define C_JAVA 0x00005 /* JAVA */ 2422#define C_AUTO 0x01000 /* C, but switch to C++ if `class' is met */ 2423#define YACC 0x10000 /* yacc file */ 2424 2425/* 2426 * The C symbol tables. 2427 */ 2428enum sym_type 2429{ 2430 st_none, 2431 st_C_objprot, st_C_objimpl, st_C_objend, 2432 st_C_gnumacro, 2433 st_C_ignore, st_C_attribute, 2434 st_C_javastruct, 2435 st_C_operator, 2436 st_C_class, st_C_template, 2437 st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef 2438}; 2439 2440static unsigned int hash __P((const char *, unsigned int)); 2441static struct C_stab_entry * in_word_set __P((const char *, unsigned int)); 2442static enum sym_type C_symtype __P((char *, int, int)); 2443 2444/* Feed stuff between (but not including) %[ and %] lines to: 2445 gperf -m 5 2446%[ 2447%compare-strncmp 2448%enum 2449%struct-type 2450struct C_stab_entry { char *name; int c_ext; enum sym_type type; } 2451%% 2452if, 0, st_C_ignore 2453for, 0, st_C_ignore 2454while, 0, st_C_ignore 2455switch, 0, st_C_ignore 2456return, 0, st_C_ignore 2457__attribute__, 0, st_C_attribute 2458@interface, 0, st_C_objprot 2459@protocol, 0, st_C_objprot 2460@implementation,0, st_C_objimpl 2461@end, 0, st_C_objend 2462import, (C_JAVA & ~C_PLPL), st_C_ignore 2463package, (C_JAVA & ~C_PLPL), st_C_ignore 2464friend, C_PLPL, st_C_ignore 2465extends, (C_JAVA & ~C_PLPL), st_C_javastruct 2466implements, (C_JAVA & ~C_PLPL), st_C_javastruct 2467interface, (C_JAVA & ~C_PLPL), st_C_struct 2468class, 0, st_C_class 2469namespace, C_PLPL, st_C_struct 2470domain, C_STAR, st_C_struct 2471union, 0, st_C_struct 2472struct, 0, st_C_struct 2473extern, 0, st_C_extern 2474enum, 0, st_C_enum 2475typedef, 0, st_C_typedef 2476define, 0, st_C_define 2477undef, 0, st_C_define 2478operator, C_PLPL, st_C_operator 2479template, 0, st_C_template 2480# DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach). 2481DEFUN, 0, st_C_gnumacro 2482SYSCALL, 0, st_C_gnumacro 2483ENTRY, 0, st_C_gnumacro 2484PSEUDO, 0, st_C_gnumacro 2485# These are defined inside C functions, so currently they are not met. 2486# EXFUN used in glibc, DEFVAR_* in emacs. 2487#EXFUN, 0, st_C_gnumacro 2488#DEFVAR_, 0, st_C_gnumacro 2489%] 2490and replace lines between %< and %> with its output, then: 2491 - remove the #if characterset check 2492 - make in_word_set static and not inline. */ 2493/*%<*/ 2494/* C code produced by gperf version 3.0.1 */ 2495/* Command-line: gperf -m 5 */ 2496/* Computed positions: -k'2-3' */ 2497 2498struct C_stab_entry { char *name; int c_ext; enum sym_type type; }; 2499/* maximum key range = 33, duplicates = 0 */ 2500 2501#ifdef __GNUC__ 2502__inline 2503#else 2504#ifdef __cplusplus 2505inline 2506#endif 2507#endif 2508static unsigned int 2509hash (str, len) 2510 register const char *str; 2511 register unsigned int len; 2512{ 2513 static unsigned char asso_values[] = 2514 { 2515 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 2516 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 2517 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 2518 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 2519 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 2520 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 2521 35, 35, 35, 35, 35, 35, 35, 35, 35, 15, 2522 14, 35, 35, 35, 35, 35, 35, 35, 14, 35, 2523 35, 35, 35, 12, 13, 35, 35, 35, 35, 12, 2524 35, 35, 35, 35, 35, 1, 35, 16, 35, 6, 2525 23, 0, 0, 35, 22, 0, 35, 35, 5, 0, 2526 0, 15, 1, 35, 6, 35, 8, 19, 35, 16, 2527 4, 5, 35, 35, 35, 35, 35, 35, 35, 35, 2528 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 2529 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 2530 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 2531 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 2532 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 2533 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 2534 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 2535 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 2536 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 2537 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 2538 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 2539 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 2540 35, 35, 35, 35, 35, 35 2541 }; 2542 register int hval = len; 2543 2544 switch (hval) 2545 { 2546 default: 2547 hval += asso_values[(unsigned char)str[2]]; 2548 /*FALLTHROUGH*/ 2549 case 2: 2550 hval += asso_values[(unsigned char)str[1]]; 2551 break; 2552 } 2553 return hval; 2554} 2555 2556static struct C_stab_entry * 2557in_word_set (str, len) 2558 register const char *str; 2559 register unsigned int len; 2560{ 2561 enum 2562 { 2563 TOTAL_KEYWORDS = 32, 2564 MIN_WORD_LENGTH = 2, 2565 MAX_WORD_LENGTH = 15, 2566 MIN_HASH_VALUE = 2, 2567 MAX_HASH_VALUE = 34 2568 }; 2569 2570 static struct C_stab_entry wordlist[] = 2571 { 2572 {""}, {""}, 2573 {"if", 0, st_C_ignore}, 2574 {""}, 2575 {"@end", 0, st_C_objend}, 2576 {"union", 0, st_C_struct}, 2577 {"define", 0, st_C_define}, 2578 {"import", (C_JAVA & ~C_PLPL), st_C_ignore}, 2579 {"template", 0, st_C_template}, 2580 {"operator", C_PLPL, st_C_operator}, 2581 {"@interface", 0, st_C_objprot}, 2582 {"implements", (C_JAVA & ~C_PLPL), st_C_javastruct}, 2583 {"friend", C_PLPL, st_C_ignore}, 2584 {"typedef", 0, st_C_typedef}, 2585 {"return", 0, st_C_ignore}, 2586 {"@implementation",0, st_C_objimpl}, 2587 {"@protocol", 0, st_C_objprot}, 2588 {"interface", (C_JAVA & ~C_PLPL), st_C_struct}, 2589 {"extern", 0, st_C_extern}, 2590 {"extends", (C_JAVA & ~C_PLPL), st_C_javastruct}, 2591 {"struct", 0, st_C_struct}, 2592 {"domain", C_STAR, st_C_struct}, 2593 {"switch", 0, st_C_ignore}, 2594 {"enum", 0, st_C_enum}, 2595 {"for", 0, st_C_ignore}, 2596 {"namespace", C_PLPL, st_C_struct}, 2597 {"class", 0, st_C_class}, 2598 {"while", 0, st_C_ignore}, 2599 {"undef", 0, st_C_define}, 2600 {"package", (C_JAVA & ~C_PLPL), st_C_ignore}, 2601 {"__attribute__", 0, st_C_attribute}, 2602 {"SYSCALL", 0, st_C_gnumacro}, 2603 {"ENTRY", 0, st_C_gnumacro}, 2604 {"PSEUDO", 0, st_C_gnumacro}, 2605 {"DEFUN", 0, st_C_gnumacro} 2606 }; 2607 2608 if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH) 2609 { 2610 register int key = hash (str, len); 2611 2612 if (key <= MAX_HASH_VALUE && key >= 0) 2613 { 2614 register const char *s = wordlist[key].name; 2615 2616 if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0') 2617 return &wordlist[key]; 2618 } 2619 } 2620 return 0; 2621} 2622/*%>*/ 2623 2624static enum sym_type 2625C_symtype (str, len, c_ext) 2626 char *str; 2627 int len; 2628 int c_ext; 2629{ 2630 register struct C_stab_entry *se = in_word_set (str, len); 2631 2632 if (se == NULL || (se->c_ext && !(c_ext & se->c_ext))) 2633 return st_none; 2634 return se->type; 2635} 2636 2637 2638/* 2639 * Ignoring __attribute__ ((list)) 2640 */ 2641static bool inattribute; /* looking at an __attribute__ construct */ 2642 2643/* 2644 * C functions and variables are recognized using a simple 2645 * finite automaton. fvdef is its state variable. 2646 */ 2647static enum 2648{ 2649 fvnone, /* nothing seen */ 2650 fdefunkey, /* Emacs DEFUN keyword seen */ 2651 fdefunname, /* Emacs DEFUN name seen */ 2652 foperator, /* func: operator keyword seen (cplpl) */ 2653 fvnameseen, /* function or variable name seen */ 2654 fstartlist, /* func: just after open parenthesis */ 2655 finlist, /* func: in parameter list */ 2656 flistseen, /* func: after parameter list */ 2657 fignore, /* func: before open brace */ 2658 vignore /* var-like: ignore until ';' */ 2659} fvdef; 2660 2661static bool fvextern; /* func or var: extern keyword seen; */ 2662 2663/* 2664 * typedefs are recognized using a simple finite automaton. 2665 * typdef is its state variable. 2666 */ 2667static enum 2668{ 2669 tnone, /* nothing seen */ 2670 tkeyseen, /* typedef keyword seen */ 2671 ttypeseen, /* defined type seen */ 2672 tinbody, /* inside typedef body */ 2673 tend, /* just before typedef tag */ 2674 tignore /* junk after typedef tag */ 2675} typdef; 2676 2677/* 2678 * struct-like structures (enum, struct and union) are recognized 2679 * using another simple finite automaton. `structdef' is its state 2680 * variable. 2681 */ 2682static enum 2683{ 2684 snone, /* nothing seen yet, 2685 or in struct body if bracelev > 0 */ 2686 skeyseen, /* struct-like keyword seen */ 2687 stagseen, /* struct-like tag seen */ 2688 scolonseen /* colon seen after struct-like tag */ 2689} structdef; 2690 2691/* 2692 * When objdef is different from onone, objtag is the name of the class. 2693 */ 2694static char *objtag = "<uninited>"; 2695 2696/* 2697 * Yet another little state machine to deal with preprocessor lines. 2698 */ 2699static enum 2700{ 2701 dnone, /* nothing seen */ 2702 dsharpseen, /* '#' seen as first char on line */ 2703 ddefineseen, /* '#' and 'define' seen */ 2704 dignorerest /* ignore rest of line */ 2705} definedef; 2706 2707/* 2708 * State machine for Objective C protocols and implementations. 2709 * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995) 2710 */ 2711static enum 2712{ 2713 onone, /* nothing seen */ 2714 oprotocol, /* @interface or @protocol seen */ 2715 oimplementation, /* @implementations seen */ 2716 otagseen, /* class name seen */ 2717 oparenseen, /* parenthesis before category seen */ 2718 ocatseen, /* category name seen */ 2719 oinbody, /* in @implementation body */ 2720 omethodsign, /* in @implementation body, after +/- */ 2721 omethodtag, /* after method name */ 2722 omethodcolon, /* after method colon */ 2723 omethodparm, /* after method parameter */ 2724 oignore /* wait for @end */ 2725} objdef; 2726 2727 2728/* 2729 * Use this structure to keep info about the token read, and how it 2730 * should be tagged. Used by the make_C_tag function to build a tag. 2731 */ 2732static struct tok 2733{ 2734 char *line; /* string containing the token */ 2735 int offset; /* where the token starts in LINE */ 2736 int length; /* token length */ 2737 /* 2738 The previous members can be used to pass strings around for generic 2739 purposes. The following ones specifically refer to creating tags. In this 2740 case the token contained here is the pattern that will be used to create a 2741 tag. 2742 */ 2743 bool valid; /* do not create a tag; the token should be 2744 invalidated whenever a state machine is 2745 reset prematurely */ 2746 bool named; /* create a named tag */ 2747 int lineno; /* source line number of tag */ 2748 long linepos; /* source char number of tag */ 2749} token; /* latest token read */ 2750 2751/* 2752 * Variables and functions for dealing with nested structures. 2753 * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001) 2754 */ 2755static void pushclass_above __P((int, char *, int)); 2756static void popclass_above __P((int)); 2757static void write_classname __P((linebuffer *, char *qualifier)); 2758 2759static struct { 2760 char **cname; /* nested class names */ 2761 int *bracelev; /* nested class brace level */ 2762 int nl; /* class nesting level (elements used) */ 2763 int size; /* length of the array */ 2764} cstack; /* stack for nested declaration tags */ 2765/* Current struct nesting depth (namespace, class, struct, union, enum). */ 2766#define nestlev (cstack.nl) 2767/* After struct keyword or in struct body, not inside a nested function. */ 2768#define instruct (structdef == snone && nestlev > 0 \ 2769 && bracelev == cstack.bracelev[nestlev-1] + 1) 2770 2771static void 2772pushclass_above (bracelev, str, len) 2773 int bracelev; 2774 char *str; 2775 int len; 2776{ 2777 int nl; 2778 2779 popclass_above (bracelev); 2780 nl = cstack.nl; 2781 if (nl >= cstack.size) 2782 { 2783 int size = cstack.size *= 2; 2784 xrnew (cstack.cname, size, char *); 2785 xrnew (cstack.bracelev, size, int); 2786 } 2787 assert (nl == 0 || cstack.bracelev[nl-1] < bracelev); 2788 cstack.cname[nl] = (str == NULL) ? NULL : savenstr (str, len); 2789 cstack.bracelev[nl] = bracelev; 2790 cstack.nl = nl + 1; 2791} 2792 2793static void 2794popclass_above (bracelev) 2795 int bracelev; 2796{ 2797 int nl; 2798 2799 for (nl = cstack.nl - 1; 2800 nl >= 0 && cstack.bracelev[nl] >= bracelev; 2801 nl--) 2802 { 2803 if (cstack.cname[nl] != NULL) 2804 free (cstack.cname[nl]); 2805 cstack.nl = nl; 2806 } 2807} 2808 2809static void 2810write_classname (cn, qualifier) 2811 linebuffer *cn; 2812 char *qualifier; 2813{ 2814 int i, len; 2815 int qlen = strlen (qualifier); 2816 2817 if (cstack.nl == 0 || cstack.cname[0] == NULL) 2818 { 2819 len = 0; 2820 cn->len = 0; 2821 cn->buffer[0] = '\0'; 2822 } 2823 else 2824 { 2825 len = strlen (cstack.cname[0]); 2826 linebuffer_setlen (cn, len); 2827 strcpy (cn->buffer, cstack.cname[0]); 2828 } 2829 for (i = 1; i < cstack.nl; i++) 2830 { 2831 char *s; 2832 int slen; 2833 2834 s = cstack.cname[i]; 2835 if (s == NULL) 2836 continue; 2837 slen = strlen (s); 2838 len += slen + qlen; 2839 linebuffer_setlen (cn, len); 2840 strncat (cn->buffer, qualifier, qlen); 2841 strncat (cn->buffer, s, slen); 2842 } 2843} 2844 2845 2846static bool consider_token __P((char *, int, int, int *, int, int, bool *)); 2847static void make_C_tag __P((bool)); 2848 2849/* 2850 * consider_token () 2851 * checks to see if the current token is at the start of a 2852 * function or variable, or corresponds to a typedef, or 2853 * is a struct/union/enum tag, or #define, or an enum constant. 2854 * 2855 * *IS_FUNC gets TRUE iff the token is a function or #define macro 2856 * with args. C_EXTP points to which language we are looking at. 2857 * 2858 * Globals 2859 * fvdef IN OUT 2860 * structdef IN OUT 2861 * definedef IN OUT 2862 * typdef IN OUT 2863 * objdef IN OUT 2864 */ 2865 2866static bool 2867consider_token (str, len, c, c_extp, bracelev, parlev, is_func_or_var) 2868 register char *str; /* IN: token pointer */ 2869 register int len; /* IN: token length */ 2870 register int c; /* IN: first char after the token */ 2871 int *c_extp; /* IN, OUT: C extensions mask */ 2872 int bracelev; /* IN: brace level */ 2873 int parlev; /* IN: parenthesis level */ 2874 bool *is_func_or_var; /* OUT: function or variable found */ 2875{ 2876 /* When structdef is stagseen, scolonseen, or snone with bracelev > 0, 2877 structtype is the type of the preceding struct-like keyword, and 2878 structbracelev is the brace level where it has been seen. */ 2879 static enum sym_type structtype; 2880 static int structbracelev; 2881 static enum sym_type toktype; 2882 2883 2884 toktype = C_symtype (str, len, *c_extp); 2885 2886 /* 2887 * Skip __attribute__ 2888 */ 2889 if (toktype == st_C_attribute) 2890 { 2891 inattribute = TRUE; 2892 return FALSE; 2893 } 2894 2895 /* 2896 * Advance the definedef state machine. 2897 */ 2898 switch (definedef) 2899 { 2900 case dnone: 2901 /* We're not on a preprocessor line. */ 2902 if (toktype == st_C_gnumacro) 2903 { 2904 fvdef = fdefunkey; 2905 return FALSE; 2906 } 2907 break; 2908 case dsharpseen: 2909 if (toktype == st_C_define) 2910 { 2911 definedef = ddefineseen; 2912 } 2913 else 2914 { 2915 definedef = dignorerest; 2916 } 2917 return FALSE; 2918 case ddefineseen: 2919 /* 2920 * Make a tag for any macro, unless it is a constant 2921 * and constantypedefs is FALSE. 2922 */ 2923 definedef = dignorerest; 2924 *is_func_or_var = (c == '('); 2925 if (!*is_func_or_var && !constantypedefs) 2926 return FALSE; 2927 else 2928 return TRUE; 2929 case dignorerest: 2930 return FALSE; 2931 default: 2932 error ("internal error: definedef value.", (char *)NULL); 2933 } 2934 2935 /* 2936 * Now typedefs 2937 */ 2938 switch (typdef) 2939 { 2940 case tnone: 2941 if (toktype == st_C_typedef) 2942 { 2943 if (typedefs) 2944 typdef = tkeyseen; 2945 fvextern = FALSE; 2946 fvdef = fvnone; 2947 return FALSE; 2948 } 2949 break; 2950 case tkeyseen: 2951 switch (toktype) 2952 { 2953 case st_none: 2954 case st_C_class: 2955 case st_C_struct: 2956 case st_C_enum: 2957 typdef = ttypeseen; 2958 } 2959 break; 2960 case ttypeseen: 2961 if (structdef == snone && fvdef == fvnone) 2962 { 2963 fvdef = fvnameseen; 2964 return TRUE; 2965 } 2966 break; 2967 case tend: 2968 switch (toktype) 2969 { 2970 case st_C_class: 2971 case st_C_struct: 2972 case st_C_enum: 2973 return FALSE; 2974 } 2975 return TRUE; 2976 } 2977 2978 /* 2979 * This structdef business is NOT invoked when we are ctags and the 2980 * file is plain C. This is because a struct tag may have the same 2981 * name as another tag, and this loses with ctags. 2982 */ 2983 switch (toktype) 2984 { 2985 case st_C_javastruct: 2986 if (structdef == stagseen) 2987 structdef = scolonseen; 2988 return FALSE; 2989 case st_C_template: 2990 case st_C_class: 2991 if ((*c_extp & C_AUTO) /* automatic detection of C++ language */ 2992 && bracelev == 0 2993 && definedef == dnone && structdef == snone 2994 && typdef == tnone && fvdef == fvnone) 2995 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO; 2996 if (toktype == st_C_template) 2997 break; 2998 /* FALLTHRU */ 2999 case st_C_struct: 3000 case st_C_enum: 3001 if (parlev == 0 3002 && fvdef != vignore 3003 && (typdef == tkeyseen 3004 || (typedefs_or_cplusplus && structdef == snone))) 3005 { 3006 structdef = skeyseen; 3007 structtype = toktype; 3008 structbracelev = bracelev; 3009 if (fvdef == fvnameseen) 3010 fvdef = fvnone; 3011 } 3012 return FALSE; 3013 } 3014 3015 if (structdef == skeyseen) 3016 { 3017 structdef = stagseen; 3018 return TRUE; 3019 } 3020 3021 if (typdef != tnone) 3022 definedef = dnone; 3023 3024 /* Detect Objective C constructs. */ 3025 switch (objdef) 3026 { 3027 case onone: 3028 switch (toktype) 3029 { 3030 case st_C_objprot: 3031 objdef = oprotocol; 3032 return FALSE; 3033 case st_C_objimpl: 3034 objdef = oimplementation; 3035 return FALSE; 3036 } 3037 break; 3038 case oimplementation: 3039 /* Save the class tag for functions or variables defined inside. */ 3040 objtag = savenstr (str, len); 3041 objdef = oinbody; 3042 return FALSE; 3043 case oprotocol: 3044 /* Save the class tag for categories. */ 3045 objtag = savenstr (str, len); 3046 objdef = otagseen; 3047 *is_func_or_var = TRUE; 3048 return TRUE; 3049 case oparenseen: 3050 objdef = ocatseen; 3051 *is_func_or_var = TRUE; 3052 return TRUE; 3053 case oinbody: 3054 break; 3055 case omethodsign: 3056 if (parlev == 0) 3057 { 3058 fvdef = fvnone; 3059 objdef = omethodtag; 3060 linebuffer_setlen (&token_name, len); 3061 strncpy (token_name.buffer, str, len); 3062 token_name.buffer[len] = '\0'; 3063 return TRUE; 3064 } 3065 return FALSE; 3066 case omethodcolon: 3067 if (parlev == 0) 3068 objdef = omethodparm; 3069 return FALSE; 3070 case omethodparm: 3071 if (parlev == 0) 3072 { 3073 fvdef = fvnone; 3074 objdef = omethodtag; 3075 linebuffer_setlen (&token_name, token_name.len + len); 3076 strncat (token_name.buffer, str, len); 3077 return TRUE; 3078 } 3079 return FALSE; 3080 case oignore: 3081 if (toktype == st_C_objend) 3082 { 3083 /* Memory leakage here: the string pointed by objtag is 3084 never released, because many tests would be needed to 3085 avoid breaking on incorrect input code. The amount of 3086 memory leaked here is the sum of the lengths of the 3087 class tags. 3088 free (objtag); */ 3089 objdef = onone; 3090 } 3091 return FALSE; 3092 } 3093 3094 /* A function, variable or enum constant? */ 3095 switch (toktype) 3096 { 3097 case st_C_extern: 3098 fvextern = TRUE; 3099 switch (fvdef) 3100 { 3101 case finlist: 3102 case flistseen: 3103 case fignore: 3104 case vignore: 3105 break; 3106 default: 3107 fvdef = fvnone; 3108 } 3109 return FALSE; 3110 case st_C_ignore: 3111 fvextern = FALSE; 3112 fvdef = vignore; 3113 return FALSE; 3114 case st_C_operator: 3115 fvdef = foperator; 3116 *is_func_or_var = TRUE; 3117 return TRUE; 3118 case st_none: 3119 if (constantypedefs 3120 && structdef == snone 3121 && structtype == st_C_enum && bracelev > structbracelev) 3122 return TRUE; /* enum constant */ 3123 switch (fvdef) 3124 { 3125 case fdefunkey: 3126 if (bracelev > 0) 3127 break; 3128 fvdef = fdefunname; /* GNU macro */ 3129 *is_func_or_var = TRUE; 3130 return TRUE; 3131 case fvnone: 3132 switch (typdef) 3133 { 3134 case ttypeseen: 3135 return FALSE; 3136 case tnone: 3137 if ((strneq (str, "asm", 3) && endtoken (str[3])) 3138 || (strneq (str, "__asm__", 7) && endtoken (str[7]))) 3139 { 3140 fvdef = vignore; 3141 return FALSE; 3142 } 3143 break; 3144 } 3145 /* FALLTHRU */ 3146 case fvnameseen: 3147 if (len >= 10 && strneq (str+len-10, "::operator", 10)) 3148 { 3149 if (*c_extp & C_AUTO) /* automatic detection of C++ */ 3150 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO; 3151 fvdef = foperator; 3152 *is_func_or_var = TRUE; 3153 return TRUE; 3154 } 3155 if (bracelev > 0 && !instruct) 3156 break; 3157 fvdef = fvnameseen; /* function or variable */ 3158 *is_func_or_var = TRUE; 3159 return TRUE; 3160 } 3161 break; 3162 } 3163 3164 return FALSE; 3165} 3166 3167 3168/* 3169 * C_entries often keeps pointers to tokens or lines which are older than 3170 * the line currently read. By keeping two line buffers, and switching 3171 * them at end of line, it is possible to use those pointers. 3172 */ 3173static struct 3174{ 3175 long linepos; 3176 linebuffer lb; 3177} lbs[2]; 3178 3179#define current_lb_is_new (newndx == curndx) 3180#define switch_line_buffers() (curndx = 1 - curndx) 3181 3182#define curlb (lbs[curndx].lb) 3183#define newlb (lbs[newndx].lb) 3184#define curlinepos (lbs[curndx].linepos) 3185#define newlinepos (lbs[newndx].linepos) 3186 3187#define plainc ((c_ext & C_EXT) == C_PLAIN) 3188#define cplpl (c_ext & C_PLPL) 3189#define cjava ((c_ext & C_JAVA) == C_JAVA) 3190 3191#define CNL_SAVE_DEFINEDEF() \ 3192do { \ 3193 curlinepos = charno; \ 3194 readline (&curlb, inf); \ 3195 lp = curlb.buffer; \ 3196 quotednl = FALSE; \ 3197 newndx = curndx; \ 3198} while (0) 3199 3200#define CNL() \ 3201do { \ 3202 CNL_SAVE_DEFINEDEF(); \ 3203 if (savetoken.valid) \ 3204 { \ 3205 token = savetoken; \ 3206 savetoken.valid = FALSE; \ 3207 } \ 3208 definedef = dnone; \ 3209} while (0) 3210 3211 3212static void 3213make_C_tag (isfun) 3214 bool isfun; 3215{ 3216 /* This function is never called when token.valid is FALSE, but 3217 we must protect against invalid input or internal errors. */ 3218 if (!DEBUG && !token.valid) 3219 return; 3220 3221 if (token.valid) 3222 make_tag (token_name.buffer, token_name.len, isfun, token.line, 3223 token.offset+token.length+1, token.lineno, token.linepos); 3224 else /* this case is optimised away if !DEBUG */ 3225 make_tag (concat ("INVALID TOKEN:-->", token_name.buffer, ""), 3226 token_name.len + 17, isfun, token.line, 3227 token.offset+token.length+1, token.lineno, token.linepos); 3228 3229 token.valid = FALSE; 3230} 3231 3232 3233/* 3234 * C_entries () 3235 * This routine finds functions, variables, typedefs, 3236 * #define's, enum constants and struct/union/enum definitions in 3237 * C syntax and adds them to the list. 3238 */ 3239static void 3240C_entries (c_ext, inf) 3241 int c_ext; /* extension of C */ 3242 FILE *inf; /* input file */ 3243{ 3244 register char c; /* latest char read; '\0' for end of line */ 3245 register char *lp; /* pointer one beyond the character `c' */ 3246 int curndx, newndx; /* indices for current and new lb */ 3247 register int tokoff; /* offset in line of start of current token */ 3248 register int toklen; /* length of current token */ 3249 char *qualifier; /* string used to qualify names */ 3250 int qlen; /* length of qualifier */ 3251 int bracelev; /* current brace level */ 3252 int bracketlev; /* current bracket level */ 3253 int parlev; /* current parenthesis level */ 3254 int attrparlev; /* __attribute__ parenthesis level */ 3255 int templatelev; /* current template level */ 3256 int typdefbracelev; /* bracelev where a typedef struct body begun */ 3257 bool incomm, inquote, inchar, quotednl, midtoken; 3258 bool yacc_rules; /* in the rules part of a yacc file */ 3259 struct tok savetoken = {0}; /* token saved during preprocessor handling */ 3260 3261 3262 linebuffer_init (&lbs[0].lb); 3263 linebuffer_init (&lbs[1].lb); 3264 if (cstack.size == 0) 3265 { 3266 cstack.size = (DEBUG) ? 1 : 4; 3267 cstack.nl = 0; 3268 cstack.cname = xnew (cstack.size, char *); 3269 cstack.bracelev = xnew (cstack.size, int); 3270 } 3271 3272 tokoff = toklen = typdefbracelev = 0; /* keep compiler quiet */ 3273 curndx = newndx = 0; 3274 lp = curlb.buffer; 3275 *lp = 0; 3276 3277 fvdef = fvnone; fvextern = FALSE; typdef = tnone; 3278 structdef = snone; definedef = dnone; objdef = onone; 3279 yacc_rules = FALSE; 3280 midtoken = inquote = inchar = incomm = quotednl = FALSE; 3281 token.valid = savetoken.valid = FALSE; 3282 bracelev = bracketlev = parlev = attrparlev = templatelev = 0; 3283 if (cjava) 3284 { qualifier = "."; qlen = 1; } 3285 else 3286 { qualifier = "::"; qlen = 2; } 3287 3288 3289 while (!feof (inf)) 3290 { 3291 c = *lp++; 3292 if (c == '\\') 3293 { 3294 /* If we are at the end of the line, the next character is a 3295 '\0'; do not skip it, because it is what tells us 3296 to read the next line. */ 3297 if (*lp == '\0') 3298 { 3299 quotednl = TRUE; 3300 continue; 3301 } 3302 lp++; 3303 c = ' '; 3304 } 3305 else if (incomm) 3306 { 3307 switch (c) 3308 { 3309 case '*': 3310 if (*lp == '/') 3311 { 3312 c = *lp++; 3313 incomm = FALSE; 3314 } 3315 break; 3316 case '\0': 3317 /* Newlines inside comments do not end macro definitions in 3318 traditional cpp. */ 3319 CNL_SAVE_DEFINEDEF (); 3320 break; 3321 } 3322 continue; 3323 } 3324 else if (inquote) 3325 { 3326 switch (c) 3327 { 3328 case '"': 3329 inquote = FALSE; 3330 break; 3331 case '\0': 3332 /* Newlines inside strings do not end macro definitions 3333 in traditional cpp, even though compilers don't 3334 usually accept them. */ 3335 CNL_SAVE_DEFINEDEF (); 3336 break; 3337 } 3338 continue; 3339 } 3340 else if (inchar) 3341 { 3342 switch (c) 3343 { 3344 case '\0': 3345 /* Hmmm, something went wrong. */ 3346 CNL (); 3347 /* FALLTHRU */ 3348 case '\'': 3349 inchar = FALSE; 3350 break; 3351 } 3352 continue; 3353 } 3354 else if (bracketlev > 0) 3355 { 3356 switch (c) 3357 { 3358 case ']': 3359 if (--bracketlev > 0) 3360 continue; 3361 break; 3362 case '\0': 3363 CNL_SAVE_DEFINEDEF (); 3364 break; 3365 } 3366 continue; 3367 } 3368 else switch (c) 3369 { 3370 case '"': 3371 inquote = TRUE; 3372 if (inattribute) 3373 break; 3374 switch (fvdef) 3375 { 3376 case fdefunkey: 3377 case fstartlist: 3378 case finlist: 3379 case fignore: 3380 case vignore: 3381 break; 3382 default: 3383 fvextern = FALSE; 3384 fvdef = fvnone; 3385 } 3386 continue; 3387 case '\'': 3388 inchar = TRUE; 3389 if (inattribute) 3390 break; 3391 if (fvdef != finlist && fvdef != fignore && fvdef !=vignore) 3392 { 3393 fvextern = FALSE; 3394 fvdef = fvnone; 3395 } 3396 continue; 3397 case '/': 3398 if (*lp == '*') 3399 { 3400 lp++; 3401 incomm = TRUE; 3402 continue; 3403 } 3404 else if (/* cplpl && */ *lp == '/') 3405 { 3406 c = '\0'; 3407 break; 3408 } 3409 else 3410 break; 3411 case '%': 3412 if ((c_ext & YACC) && *lp == '%') 3413 { 3414 /* Entering or exiting rules section in yacc file. */ 3415 lp++; 3416 definedef = dnone; fvdef = fvnone; fvextern = FALSE; 3417 typdef = tnone; structdef = snone; 3418 midtoken = inquote = inchar = incomm = quotednl = FALSE; 3419 bracelev = 0; 3420 yacc_rules = !yacc_rules; 3421 continue; 3422 } 3423 else 3424 break; 3425 case '#': 3426 if (definedef == dnone) 3427 { 3428 char *cp; 3429 bool cpptoken = TRUE; 3430 3431 /* Look back on this line. If all blanks, or nonblanks 3432 followed by an end of comment, this is a preprocessor 3433 token. */ 3434 for (cp = newlb.buffer; cp < lp-1; cp++) 3435 if (!iswhite (*cp)) 3436 { 3437 if (*cp == '*' && *(cp+1) == '/') 3438 { 3439 cp++; 3440 cpptoken = TRUE; 3441 } 3442 else 3443 cpptoken = FALSE; 3444 } 3445 if (cpptoken) 3446 definedef = dsharpseen; 3447 } /* if (definedef == dnone) */ 3448 continue; 3449 case '[': 3450 bracketlev++; 3451 continue; 3452 } /* switch (c) */ 3453 3454 3455 /* Consider token only if some involved conditions are satisfied. */ 3456 if (typdef != tignore 3457 && definedef != dignorerest 3458 && fvdef != finlist 3459 && templatelev == 0 3460 && (definedef != dnone 3461 || structdef != scolonseen) 3462 && !inattribute) 3463 { 3464 if (midtoken) 3465 { 3466 if (endtoken (c)) 3467 { 3468 if (c == ':' && *lp == ':' && begtoken (lp[1])) 3469 /* This handles :: in the middle, 3470 but not at the beginning of an identifier. 3471 Also, space-separated :: is not recognised. */ 3472 { 3473 if (c_ext & C_AUTO) /* automatic detection of C++ */ 3474 c_ext = (c_ext | C_PLPL) & ~C_AUTO; 3475 lp += 2; 3476 toklen += 2; 3477 c = lp[-1]; 3478 goto still_in_token; 3479 } 3480 else 3481 { 3482 bool funorvar = FALSE; 3483 3484 if (yacc_rules 3485 || consider_token (newlb.buffer + tokoff, toklen, c, 3486 &c_ext, bracelev, parlev, 3487 &funorvar)) 3488 { 3489 if (fvdef == foperator) 3490 { 3491 char *oldlp = lp; 3492 lp = skip_spaces (lp-1); 3493 if (*lp != '\0') 3494 lp += 1; 3495 while (*lp != '\0' 3496 && !iswhite (*lp) && *lp != '(') 3497 lp += 1; 3498 c = *lp++; 3499 toklen += lp - oldlp; 3500 } 3501 token.named = FALSE; 3502 if (!plainc 3503 && nestlev > 0 && definedef == dnone) 3504 /* in struct body */ 3505 { 3506 write_classname (&token_name, qualifier); 3507 linebuffer_setlen (&token_name, 3508 token_name.len+qlen+toklen); 3509 strcat (token_name.buffer, qualifier); 3510 strncat (token_name.buffer, 3511 newlb.buffer + tokoff, toklen); 3512 token.named = TRUE; 3513 } 3514 else if (objdef == ocatseen) 3515 /* Objective C category */ 3516 { 3517 int len = strlen (objtag) + 2 + toklen; 3518 linebuffer_setlen (&token_name, len); 3519 strcpy (token_name.buffer, objtag); 3520 strcat (token_name.buffer, "("); 3521 strncat (token_name.buffer, 3522 newlb.buffer + tokoff, toklen); 3523 strcat (token_name.buffer, ")"); 3524 token.named = TRUE; 3525 } 3526 else if (objdef == omethodtag 3527 || objdef == omethodparm) 3528 /* Objective C method */ 3529 { 3530 token.named = TRUE; 3531 } 3532 else if (fvdef == fdefunname) 3533 /* GNU DEFUN and similar macros */ 3534 { 3535 bool defun = (newlb.buffer[tokoff] == 'F'); 3536 int off = tokoff; 3537 int len = toklen; 3538 3539 /* Rewrite the tag so that emacs lisp DEFUNs 3540 can be found by their elisp name */ 3541 if (defun) 3542 { 3543 off += 1; 3544 len -= 1; 3545 } 3546 linebuffer_setlen (&token_name, len); 3547 strncpy (token_name.buffer, 3548 newlb.buffer + off, len); 3549 token_name.buffer[len] = '\0'; 3550 if (defun) 3551 while (--len >= 0) 3552 if (token_name.buffer[len] == '_') 3553 token_name.buffer[len] = '-'; 3554 token.named = defun; 3555 } 3556 else 3557 { 3558 linebuffer_setlen (&token_name, toklen); 3559 strncpy (token_name.buffer, 3560 newlb.buffer + tokoff, toklen); 3561 token_name.buffer[toklen] = '\0'; 3562 /* Name macros and members. */ 3563 token.named = (structdef == stagseen 3564 || typdef == ttypeseen 3565 || typdef == tend 3566 || (funorvar 3567 && definedef == dignorerest) 3568 || (funorvar 3569 && definedef == dnone 3570 && structdef == snone 3571 && bracelev > 0)); 3572 } 3573 token.lineno = lineno; 3574 token.offset = tokoff; 3575 token.length = toklen; 3576 token.line = newlb.buffer; 3577 token.linepos = newlinepos; 3578 token.valid = TRUE; 3579 3580 if (definedef == dnone 3581 && (fvdef == fvnameseen 3582 || fvdef == foperator 3583 || structdef == stagseen 3584 || typdef == tend 3585 || typdef == ttypeseen 3586 || objdef != onone)) 3587 { 3588 if (current_lb_is_new) 3589 switch_line_buffers (); 3590 } 3591 else if (definedef != dnone 3592 || fvdef == fdefunname 3593 || instruct) 3594 make_C_tag (funorvar); 3595 } 3596 else /* not yacc and consider_token failed */ 3597 { 3598 if (inattribute && fvdef == fignore) 3599 { 3600 /* We have just met __attribute__ after a 3601 function parameter list: do not tag the 3602 function again. */ 3603 fvdef = fvnone; 3604 } 3605 } 3606 midtoken = FALSE; 3607 } 3608 } /* if (endtoken (c)) */ 3609 else if (intoken (c)) 3610 still_in_token: 3611 { 3612 toklen++; 3613 continue; 3614 } 3615 } /* if (midtoken) */ 3616 else if (begtoken (c)) 3617 { 3618 switch (definedef) 3619 { 3620 case dnone: 3621 switch (fvdef) 3622 { 3623 case fstartlist: 3624 /* This prevents tagging fb in 3625 void (__attribute__((noreturn)) *fb) (void); 3626 Fixing this is not easy and not very important. */ 3627 fvdef = finlist; 3628 continue; 3629 case flistseen: 3630 if (plainc || declarations) 3631 { 3632 make_C_tag (TRUE); /* a function */ 3633 fvdef = fignore; 3634 } 3635 break; 3636 } 3637 if (structdef == stagseen && !cjava) 3638 { 3639 popclass_above (bracelev); 3640 structdef = snone; 3641 } 3642 break; 3643 case dsharpseen: 3644 savetoken = token; 3645 break; 3646 } 3647 if (!yacc_rules || lp == newlb.buffer + 1) 3648 { 3649 tokoff = lp - 1 - newlb.buffer; 3650 toklen = 1; 3651 midtoken = TRUE; 3652 } 3653 continue; 3654 } /* if (begtoken) */ 3655 } /* if must look at token */ 3656 3657 3658 /* Detect end of line, colon, comma, semicolon and various braces 3659 after having handled a token.*/ 3660 switch (c) 3661 { 3662 case ':': 3663 if (inattribute) 3664 break; 3665 if (yacc_rules && token.offset == 0 && token.valid) 3666 { 3667 make_C_tag (FALSE); /* a yacc function */ 3668 break; 3669 } 3670 if (definedef != dnone) 3671 break; 3672 switch (objdef) 3673 { 3674 case otagseen: 3675 objdef = oignore; 3676 make_C_tag (TRUE); /* an Objective C class */ 3677 break; 3678 case omethodtag: 3679 case omethodparm: 3680 objdef = omethodcolon; 3681 linebuffer_setlen (&token_name, token_name.len + 1); 3682 strcat (token_name.buffer, ":"); 3683 break; 3684 } 3685 if (structdef == stagseen) 3686 { 3687 structdef = scolonseen; 3688 break; 3689 } 3690 /* Should be useless, but may be work as a safety net. */ 3691 if (cplpl && fvdef == flistseen) 3692 { 3693 make_C_tag (TRUE); /* a function */ 3694 fvdef = fignore; 3695 break; 3696 } 3697 break; 3698 case ';': 3699 if (definedef != dnone || inattribute) 3700 break; 3701 switch (typdef) 3702 { 3703 case tend: 3704 case ttypeseen: 3705 make_C_tag (FALSE); /* a typedef */ 3706 typdef = tnone; 3707 fvdef = fvnone; 3708 break; 3709 case tnone: 3710 case tinbody: 3711 case tignore: 3712 switch (fvdef) 3713 { 3714 case fignore: 3715 if (typdef == tignore || cplpl) 3716 fvdef = fvnone; 3717 break; 3718 case fvnameseen: 3719 if ((globals && bracelev == 0 && (!fvextern || declarations)) 3720 || (members && instruct)) 3721 make_C_tag (FALSE); /* a variable */ 3722 fvextern = FALSE; 3723 fvdef = fvnone; 3724 token.valid = FALSE; 3725 break; 3726 case flistseen: 3727 if ((declarations 3728 && (cplpl || !instruct) 3729 && (typdef == tnone || (typdef != tignore && instruct))) 3730 || (members 3731 && plainc && instruct)) 3732 make_C_tag (TRUE); /* a function */ 3733 /* FALLTHRU */ 3734 default: 3735 fvextern = FALSE; 3736 fvdef = fvnone; 3737 if (declarations 3738 && cplpl && structdef == stagseen) 3739 make_C_tag (FALSE); /* forward declaration */ 3740 else 3741 token.valid = FALSE; 3742 } /* switch (fvdef) */ 3743 /* FALLTHRU */ 3744 default: 3745 if (!instruct) 3746 typdef = tnone; 3747 } 3748 if (structdef == stagseen) 3749 structdef = snone; 3750 break; 3751 case ',': 3752 if (definedef != dnone || inattribute) 3753 break; 3754 switch (objdef) 3755 { 3756 case omethodtag: 3757 case omethodparm: 3758 make_C_tag (TRUE); /* an Objective C method */ 3759 objdef = oinbody; 3760 break; 3761 } 3762 switch (fvdef) 3763 { 3764 case fdefunkey: 3765 case foperator: 3766 case fstartlist: 3767 case finlist: 3768 case fignore: 3769 case vignore: 3770 break; 3771 case fdefunname: 3772 fvdef = fignore; 3773 break; 3774 case fvnameseen: 3775 if (parlev == 0 3776 && ((globals 3777 && bracelev == 0 3778 && templatelev == 0 3779 && (!fvextern || declarations)) 3780 || (members && instruct))) 3781 make_C_tag (FALSE); /* a variable */ 3782 break; 3783 case flistseen: 3784 if ((declarations && typdef == tnone && !instruct) 3785 || (members && typdef != tignore && instruct)) 3786 { 3787 make_C_tag (TRUE); /* a function */ 3788 fvdef = fvnameseen; 3789 } 3790 else if (!declarations) 3791 fvdef = fvnone; 3792 token.valid = FALSE; 3793 break; 3794 default: 3795 fvdef = fvnone; 3796 } 3797 if (structdef == stagseen) 3798 structdef = snone; 3799 break; 3800 case ']': 3801 if (definedef != dnone || inattribute) 3802 break; 3803 if (structdef == stagseen) 3804 structdef = snone; 3805 switch (typdef) 3806 { 3807 case ttypeseen: 3808 case tend: 3809 typdef = tignore; 3810 make_C_tag (FALSE); /* a typedef */ 3811 break; 3812 case tnone: 3813 case tinbody: 3814 switch (fvdef) 3815 { 3816 case foperator: 3817 case finlist: 3818 case fignore: 3819 case vignore: 3820 break; 3821 case fvnameseen: 3822 if ((members && bracelev == 1) 3823 || (globals && bracelev == 0 3824 && (!fvextern || declarations))) 3825 make_C_tag (FALSE); /* a variable */ 3826 /* FALLTHRU */ 3827 default: 3828 fvdef = fvnone; 3829 } 3830 break; 3831 } 3832 break; 3833 case '(': 3834 if (inattribute) 3835 { 3836 attrparlev++; 3837 break; 3838 } 3839 if (definedef != dnone) 3840 break; 3841 if (objdef == otagseen && parlev == 0) 3842 objdef = oparenseen; 3843 switch (fvdef) 3844 { 3845 case fvnameseen: 3846 if (typdef == ttypeseen 3847 && *lp != '*' 3848 && !instruct) 3849 { 3850 /* This handles constructs like: 3851 typedef void OperatorFun (int fun); */ 3852 make_C_tag (FALSE); 3853 typdef = tignore; 3854 fvdef = fignore; 3855 break; 3856 } 3857 /* FALLTHRU */ 3858 case foperator: 3859 fvdef = fstartlist; 3860 break; 3861 case flistseen: 3862 fvdef = finlist; 3863 break; 3864 } 3865 parlev++; 3866 break; 3867 case ')': 3868 if (inattribute) 3869 { 3870 if (--attrparlev == 0) 3871 inattribute = FALSE; 3872 break; 3873 } 3874 if (definedef != dnone) 3875 break; 3876 if (objdef == ocatseen && parlev == 1) 3877 { 3878 make_C_tag (TRUE); /* an Objective C category */ 3879 objdef = oignore; 3880 } 3881 if (--parlev == 0) 3882 { 3883 switch (fvdef) 3884 { 3885 case fstartlist: 3886 case finlist: 3887 fvdef = flistseen; 3888 break; 3889 } 3890 if (!instruct 3891 && (typdef == tend 3892 || typdef == ttypeseen)) 3893 { 3894 typdef = tignore; 3895 make_C_tag (FALSE); /* a typedef */ 3896 } 3897 } 3898 else if (parlev < 0) /* can happen due to ill-conceived #if's. */ 3899 parlev = 0; 3900 break; 3901 case '{': 3902 if (definedef != dnone) 3903 break; 3904 if (typdef == ttypeseen) 3905 { 3906 /* Whenever typdef is set to tinbody (currently only 3907 here), typdefbracelev should be set to bracelev. */ 3908 typdef = tinbody; 3909 typdefbracelev = bracelev; 3910 } 3911 switch (fvdef) 3912 { 3913 case flistseen: 3914 make_C_tag (TRUE); /* a function */ 3915 /* FALLTHRU */ 3916 case fignore: 3917 fvdef = fvnone; 3918 break; 3919 case fvnone: 3920 switch (objdef) 3921 { 3922 case otagseen: 3923 make_C_tag (TRUE); /* an Objective C class */ 3924 objdef = oignore; 3925 break; 3926 case omethodtag: 3927 case omethodparm: 3928 make_C_tag (TRUE); /* an Objective C method */ 3929 objdef = oinbody; 3930 break; 3931 default: 3932 /* Neutralize `extern "C" {' grot. */ 3933 if (bracelev == 0 && structdef == snone && nestlev == 0 3934 && typdef == tnone) 3935 bracelev = -1; 3936 } 3937 break; 3938 } 3939 switch (structdef) 3940 { 3941 case skeyseen: /* unnamed struct */ 3942 pushclass_above (bracelev, NULL, 0); 3943 structdef = snone; 3944 break; 3945 case stagseen: /* named struct or enum */ 3946 case scolonseen: /* a class */ 3947 pushclass_above (bracelev,token.line+token.offset, token.length); 3948 structdef = snone; 3949 make_C_tag (FALSE); /* a struct or enum */ 3950 break; 3951 } 3952 bracelev++; 3953 break; 3954 case '*': 3955 if (definedef != dnone) 3956 break; 3957 if (fvdef == fstartlist) 3958 { 3959 fvdef = fvnone; /* avoid tagging `foo' in `foo (*bar()) ()' */ 3960 token.valid = FALSE; 3961 } 3962 break; 3963 case '}': 3964 if (definedef != dnone) 3965 break; 3966 if (!ignoreindent && lp == newlb.buffer + 1) 3967 { 3968 if (bracelev != 0) 3969 token.valid = FALSE; 3970 bracelev = 0; /* reset brace level if first column */ 3971 parlev = 0; /* also reset paren level, just in case... */ 3972 } 3973 else if (bracelev > 0) 3974 bracelev--; 3975 else 3976 token.valid = FALSE; /* something gone amiss, token unreliable */ 3977 popclass_above (bracelev); 3978 structdef = snone; 3979 /* Only if typdef == tinbody is typdefbracelev significant. */ 3980 if (typdef == tinbody && bracelev <= typdefbracelev) 3981 { 3982 assert (bracelev == typdefbracelev); 3983 typdef = tend; 3984 } 3985 break; 3986 case '=': 3987 if (definedef != dnone) 3988 break; 3989 switch (fvdef) 3990 { 3991 case foperator: 3992 case finlist: 3993 case fignore: 3994 case vignore: 3995 break; 3996 case fvnameseen: 3997 if ((members && bracelev == 1) 3998 || (globals && bracelev == 0 && (!fvextern || declarations))) 3999 make_C_tag (FALSE); /* a variable */ 4000 /* FALLTHRU */ 4001 default: 4002 fvdef = vignore; 4003 } 4004 break; 4005 case '<': 4006 if (cplpl 4007 && (structdef == stagseen || fvdef == fvnameseen)) 4008 { 4009 templatelev++; 4010 break; 4011 } 4012 goto resetfvdef; 4013 case '>': 4014 if (templatelev > 0) 4015 { 4016 templatelev--; 4017 break; 4018 } 4019 goto resetfvdef; 4020 case '+': 4021 case '-': 4022 if (objdef == oinbody && bracelev == 0) 4023 { 4024 objdef = omethodsign; 4025 break; 4026 } 4027 /* FALLTHRU */ 4028 resetfvdef: 4029 case '#': case '~': case '&': case '%': case '/': 4030 case '|': case '^': case '!': case '.': case '?': 4031 if (definedef != dnone) 4032 break; 4033 /* These surely cannot follow a function tag in C. */ 4034 switch (fvdef) 4035 { 4036 case foperator: 4037 case finlist: 4038 case fignore: 4039 case vignore: 4040 break; 4041 default: 4042 fvdef = fvnone; 4043 } 4044 break; 4045 case '\0': 4046 if (objdef == otagseen) 4047 { 4048 make_C_tag (TRUE); /* an Objective C class */ 4049 objdef = oignore; 4050 } 4051 /* If a macro spans multiple lines don't reset its state. */ 4052 if (quotednl) 4053 CNL_SAVE_DEFINEDEF (); 4054 else 4055 CNL (); 4056 break; 4057 } /* switch (c) */ 4058 4059 } /* while not eof */ 4060 4061 free (lbs[0].lb.buffer); 4062 free (lbs[1].lb.buffer); 4063} 4064 4065/* 4066 * Process either a C++ file or a C file depending on the setting 4067 * of a global flag. 4068 */ 4069static void 4070default_C_entries (inf) 4071 FILE *inf; 4072{ 4073 C_entries (cplusplus ? C_PLPL : C_AUTO, inf); 4074} 4075 4076/* Always do plain C. */ 4077static void 4078plain_C_entries (inf) 4079 FILE *inf; 4080{ 4081 C_entries (0, inf); 4082} 4083 4084/* Always do C++. */ 4085static void 4086Cplusplus_entries (inf) 4087 FILE *inf; 4088{ 4089 C_entries (C_PLPL, inf); 4090} 4091 4092/* Always do Java. */ 4093static void 4094Cjava_entries (inf) 4095 FILE *inf; 4096{ 4097 C_entries (C_JAVA, inf); 4098} 4099 4100/* Always do C*. */ 4101static void 4102Cstar_entries (inf) 4103 FILE *inf; 4104{ 4105 C_entries (C_STAR, inf); 4106} 4107 4108/* Always do Yacc. */ 4109static void 4110Yacc_entries (inf) 4111 FILE *inf; 4112{ 4113 C_entries (YACC, inf); 4114} 4115 4116 4117/* Useful macros. */ 4118#define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer) \ 4119 for (; /* loop initialization */ \ 4120 !feof (file_pointer) /* loop test */ \ 4121 && /* instructions at start of loop */ \ 4122 (readline (&line_buffer, file_pointer), \ 4123 char_pointer = line_buffer.buffer, \ 4124 TRUE); \ 4125 ) 4126 4127#define LOOKING_AT(cp, kw) /* kw is the keyword, a literal string */ \ 4128 ((assert("" kw), TRUE) /* syntax error if not a literal string */ \ 4129 && strneq ((cp), kw, sizeof(kw)-1) /* cp points at kw */ \ 4130 && notinname ((cp)[sizeof(kw)-1]) /* end of kw */ \ 4131 && ((cp) = skip_spaces((cp)+sizeof(kw)-1))) /* skip spaces */ 4132 4133/* Similar to LOOKING_AT but does not use notinname, does not skip */ 4134#define LOOKING_AT_NOCASE(cp, kw) /* the keyword is a literal string */ \ 4135 ((assert("" kw), TRUE) /* syntax error if not a literal string */ \ 4136 && strncaseeq ((cp), kw, sizeof(kw)-1) /* cp points at kw */ \ 4137 && ((cp) += sizeof(kw)-1)) /* skip spaces */ 4138 4139/* 4140 * Read a file, but do no processing. This is used to do regexp 4141 * matching on files that have no language defined. 4142 */ 4143static void 4144just_read_file (inf) 4145 FILE *inf; 4146{ 4147 register char *dummy; 4148 4149 LOOP_ON_INPUT_LINES (inf, lb, dummy) 4150 continue; 4151} 4152 4153 4154/* Fortran parsing */ 4155 4156static void F_takeprec __P((void)); 4157static void F_getit __P((FILE *)); 4158 4159static void 4160F_takeprec () 4161{ 4162 dbp = skip_spaces (dbp); 4163 if (*dbp != '*') 4164 return; 4165 dbp++; 4166 dbp = skip_spaces (dbp); 4167 if (strneq (dbp, "(*)", 3)) 4168 { 4169 dbp += 3; 4170 return; 4171 } 4172 if (!ISDIGIT (*dbp)) 4173 { 4174 --dbp; /* force failure */ 4175 return; 4176 } 4177 do 4178 dbp++; 4179 while (ISDIGIT (*dbp)); 4180} 4181 4182static void 4183F_getit (inf) 4184 FILE *inf; 4185{ 4186 register char *cp; 4187 4188 dbp = skip_spaces (dbp); 4189 if (*dbp == '\0') 4190 { 4191 readline (&lb, inf); 4192 dbp = lb.buffer; 4193 if (dbp[5] != '&') 4194 return; 4195 dbp += 6; 4196 dbp = skip_spaces (dbp); 4197 } 4198 if (!ISALPHA (*dbp) && *dbp != '_' && *dbp != '$') 4199 return; 4200 for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++) 4201 continue; 4202 make_tag (dbp, cp-dbp, TRUE, 4203 lb.buffer, cp - lb.buffer + 1, lineno, linecharno); 4204} 4205 4206 4207static void 4208Fortran_functions (inf) 4209 FILE *inf; 4210{ 4211 LOOP_ON_INPUT_LINES (inf, lb, dbp) 4212 { 4213 if (*dbp == '%') 4214 dbp++; /* Ratfor escape to fortran */ 4215 dbp = skip_spaces (dbp); 4216 if (*dbp == '\0') 4217 continue; 4218 switch (lowcase (*dbp)) 4219 { 4220 case 'i': 4221 if (nocase_tail ("integer")) 4222 F_takeprec (); 4223 break; 4224 case 'r': 4225 if (nocase_tail ("real")) 4226 F_takeprec (); 4227 break; 4228 case 'l': 4229 if (nocase_tail ("logical")) 4230 F_takeprec (); 4231 break; 4232 case 'c': 4233 if (nocase_tail ("complex") || nocase_tail ("character")) 4234 F_takeprec (); 4235 break; 4236 case 'd': 4237 if (nocase_tail ("double")) 4238 { 4239 dbp = skip_spaces (dbp); 4240 if (*dbp == '\0') 4241 continue; 4242 if (nocase_tail ("precision")) 4243 break; 4244 continue; 4245 } 4246 break; 4247 } 4248 dbp = skip_spaces (dbp); 4249 if (*dbp == '\0') 4250 continue; 4251 switch (lowcase (*dbp)) 4252 { 4253 case 'f': 4254 if (nocase_tail ("function")) 4255 F_getit (inf); 4256 continue; 4257 case 's': 4258 if (nocase_tail ("subroutine")) 4259 F_getit (inf); 4260 continue; 4261 case 'e': 4262 if (nocase_tail ("entry")) 4263 F_getit (inf); 4264 continue; 4265 case 'b': 4266 if (nocase_tail ("blockdata") || nocase_tail ("block data")) 4267 { 4268 dbp = skip_spaces (dbp); 4269 if (*dbp == '\0') /* assume un-named */ 4270 make_tag ("blockdata", 9, TRUE, 4271 lb.buffer, dbp - lb.buffer, lineno, linecharno); 4272 else 4273 F_getit (inf); /* look for name */ 4274 } 4275 continue; 4276 } 4277 } 4278} 4279 4280 4281/* 4282 * Ada parsing 4283 * Original code by 4284 * Philippe Waroquiers (1998) 4285 */ 4286 4287static void Ada_getit __P((FILE *, char *)); 4288 4289/* Once we are positioned after an "interesting" keyword, let's get 4290 the real tag value necessary. */ 4291static void 4292Ada_getit (inf, name_qualifier) 4293 FILE *inf; 4294 char *name_qualifier; 4295{ 4296 register char *cp; 4297 char *name; 4298 char c; 4299 4300 while (!feof (inf)) 4301 { 4302 dbp = skip_spaces (dbp); 4303 if (*dbp == '\0' 4304 || (dbp[0] == '-' && dbp[1] == '-')) 4305 { 4306 readline (&lb, inf); 4307 dbp = lb.buffer; 4308 } 4309 switch (lowcase(*dbp)) 4310 { 4311 case 'b': 4312 if (nocase_tail ("body")) 4313 { 4314 /* Skipping body of procedure body or package body or .... 4315 resetting qualifier to body instead of spec. */ 4316 name_qualifier = "/b"; 4317 continue; 4318 } 4319 break; 4320 case 't': 4321 /* Skipping type of task type or protected type ... */ 4322 if (nocase_tail ("type")) 4323 continue; 4324 break; 4325 } 4326 if (*dbp == '"') 4327 { 4328 dbp += 1; 4329 for (cp = dbp; *cp != '\0' && *cp != '"'; cp++) 4330 continue; 4331 } 4332 else 4333 { 4334 dbp = skip_spaces (dbp); 4335 for (cp = dbp; 4336 (*cp != '\0' 4337 && (ISALPHA (*cp) || ISDIGIT (*cp) || *cp == '_' || *cp == '.')); 4338 cp++) 4339 continue; 4340 if (cp == dbp) 4341 return; 4342 } 4343 c = *cp; 4344 *cp = '\0'; 4345 name = concat (dbp, name_qualifier, ""); 4346 *cp = c; 4347 make_tag (name, strlen (name), TRUE, 4348 lb.buffer, cp - lb.buffer + 1, lineno, linecharno); 4349 free (name); 4350 if (c == '"') 4351 dbp = cp + 1; 4352 return; 4353 } 4354} 4355 4356static void 4357Ada_funcs (inf) 4358 FILE *inf; 4359{ 4360 bool inquote = FALSE; 4361 bool skip_till_semicolumn = FALSE; 4362 4363 LOOP_ON_INPUT_LINES (inf, lb, dbp) 4364 { 4365 while (*dbp != '\0') 4366 { 4367 /* Skip a string i.e. "abcd". */ 4368 if (inquote || (*dbp == '"')) 4369 { 4370 dbp = etags_strchr ((inquote) ? dbp : dbp+1, '"'); 4371 if (dbp != NULL) 4372 { 4373 inquote = FALSE; 4374 dbp += 1; 4375 continue; /* advance char */ 4376 } 4377 else 4378 { 4379 inquote = TRUE; 4380 break; /* advance line */ 4381 } 4382 } 4383 4384 /* Skip comments. */ 4385 if (dbp[0] == '-' && dbp[1] == '-') 4386 break; /* advance line */ 4387 4388 /* Skip character enclosed in single quote i.e. 'a' 4389 and skip single quote starting an attribute i.e. 'Image. */ 4390 if (*dbp == '\'') 4391 { 4392 dbp++ ; 4393 if (*dbp != '\0') 4394 dbp++; 4395 continue; 4396 } 4397 4398 if (skip_till_semicolumn) 4399 { 4400 if (*dbp == ';') 4401 skip_till_semicolumn = FALSE; 4402 dbp++; 4403 continue; /* advance char */ 4404 } 4405 4406 /* Search for beginning of a token. */ 4407 if (!begtoken (*dbp)) 4408 { 4409 dbp++; 4410 continue; /* advance char */ 4411 } 4412 4413 /* We are at the beginning of a token. */ 4414 switch (lowcase(*dbp)) 4415 { 4416 case 'f': 4417 if (!packages_only && nocase_tail ("function")) 4418 Ada_getit (inf, "/f"); 4419 else 4420 break; /* from switch */ 4421 continue; /* advance char */ 4422 case 'p': 4423 if (!packages_only && nocase_tail ("procedure")) 4424 Ada_getit (inf, "/p"); 4425 else if (nocase_tail ("package")) 4426 Ada_getit (inf, "/s"); 4427 else if (nocase_tail ("protected")) /* protected type */ 4428 Ada_getit (inf, "/t"); 4429 else 4430 break; /* from switch */ 4431 continue; /* advance char */ 4432 4433 case 'u': 4434 if (typedefs && !packages_only && nocase_tail ("use")) 4435 { 4436 /* when tagging types, avoid tagging use type Pack.Typename; 4437 for this, we will skip everything till a ; */ 4438 skip_till_semicolumn = TRUE; 4439 continue; /* advance char */ 4440 } 4441 4442 case 't': 4443 if (!packages_only && nocase_tail ("task")) 4444 Ada_getit (inf, "/k"); 4445 else if (typedefs && !packages_only && nocase_tail ("type")) 4446 { 4447 Ada_getit (inf, "/t"); 4448 while (*dbp != '\0') 4449 dbp += 1; 4450 } 4451 else 4452 break; /* from switch */ 4453 continue; /* advance char */ 4454 } 4455 4456 /* Look for the end of the token. */ 4457 while (!endtoken (*dbp)) 4458 dbp++; 4459 4460 } /* advance char */ 4461 } /* advance line */ 4462} 4463 4464 4465/* 4466 * Unix and microcontroller assembly tag handling 4467 * Labels: /^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]/ 4468 * Idea by Bob Weiner, Motorola Inc. (1994) 4469 */ 4470static void 4471Asm_labels (inf) 4472 FILE *inf; 4473{ 4474 register char *cp; 4475 4476 LOOP_ON_INPUT_LINES (inf, lb, cp) 4477 { 4478 /* If first char is alphabetic or one of [_.$], test for colon 4479 following identifier. */ 4480 if (ISALPHA (*cp) || *cp == '_' || *cp == '.' || *cp == '$') 4481 { 4482 /* Read past label. */ 4483 cp++; 4484 while (ISALNUM (*cp) || *cp == '_' || *cp == '.' || *cp == '$') 4485 cp++; 4486 if (*cp == ':' || iswhite (*cp)) 4487 /* Found end of label, so copy it and add it to the table. */ 4488 make_tag (lb.buffer, cp - lb.buffer, TRUE, 4489 lb.buffer, cp - lb.buffer + 1, lineno, linecharno); 4490 } 4491 } 4492} 4493 4494 4495/* 4496 * Perl support 4497 * Perl sub names: /^sub[ \t\n]+[^ \t\n{]+/ 4498 * Perl variable names: /^(my|local).../ 4499 * Original code by Bart Robinson <lomew@cs.utah.edu> (1995) 4500 * Additions by Michael Ernst <mernst@alum.mit.edu> (1997) 4501 * Ideas by Kai Gro�johann <Kai.Grossjohann@CS.Uni-Dortmund.DE> (2001) 4502 */ 4503static void 4504Perl_functions (inf) 4505 FILE *inf; 4506{ 4507 char *package = savestr ("main"); /* current package name */ 4508 register char *cp; 4509 4510 LOOP_ON_INPUT_LINES (inf, lb, cp) 4511 { 4512 skip_spaces(cp); 4513 4514 if (LOOKING_AT (cp, "package")) 4515 { 4516 free (package); 4517 get_tag (cp, &package); 4518 } 4519 else if (LOOKING_AT (cp, "sub")) 4520 { 4521 char *pos; 4522 char *sp = cp; 4523 4524 while (!notinname (*cp)) 4525 cp++; 4526 if (cp == sp) 4527 continue; /* nothing found */ 4528 if ((pos = etags_strchr (sp, ':')) != NULL 4529 && pos < cp && pos[1] == ':') 4530 /* The name is already qualified. */ 4531 make_tag (sp, cp - sp, TRUE, 4532 lb.buffer, cp - lb.buffer + 1, lineno, linecharno); 4533 else 4534 /* Qualify it. */ 4535 { 4536 char savechar, *name; 4537 4538 savechar = *cp; 4539 *cp = '\0'; 4540 name = concat (package, "::", sp); 4541 *cp = savechar; 4542 make_tag (name, strlen(name), TRUE, 4543 lb.buffer, cp - lb.buffer + 1, lineno, linecharno); 4544 free (name); 4545 } 4546 } 4547 else if (globals) /* only if we are tagging global vars */ 4548 { 4549 /* Skip a qualifier, if any. */ 4550 bool qual = LOOKING_AT (cp, "my") || LOOKING_AT (cp, "local"); 4551 /* After "my" or "local", but before any following paren or space. */ 4552 char *varstart = cp; 4553 4554 if (qual /* should this be removed? If yes, how? */ 4555 && (*cp == '$' || *cp == '@' || *cp == '%')) 4556 { 4557 varstart += 1; 4558 do 4559 cp++; 4560 while (ISALNUM (*cp) || *cp == '_'); 4561 } 4562 else if (qual) 4563 { 4564 /* Should be examining a variable list at this point; 4565 could insist on seeing an open parenthesis. */ 4566 while (*cp != '\0' && *cp != ';' && *cp != '=' && *cp != ')') 4567 cp++; 4568 } 4569 else 4570 continue; 4571 4572 make_tag (varstart, cp - varstart, FALSE, 4573 lb.buffer, cp - lb.buffer + 1, lineno, linecharno); 4574 } 4575 } 4576 free (package); 4577} 4578 4579 4580/* 4581 * Python support 4582 * Look for /^[\t]*def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/ 4583 * Idea by Eric S. Raymond <esr@thyrsus.com> (1997) 4584 * More ideas by seb bacon <seb@jamkit.com> (2002) 4585 */ 4586static void 4587Python_functions (inf) 4588 FILE *inf; 4589{ 4590 register char *cp; 4591 4592 LOOP_ON_INPUT_LINES (inf, lb, cp) 4593 { 4594 cp = skip_spaces (cp); 4595 if (LOOKING_AT (cp, "def") || LOOKING_AT (cp, "class")) 4596 { 4597 char *name = cp; 4598 while (!notinname (*cp) && *cp != ':') 4599 cp++; 4600 make_tag (name, cp - name, TRUE, 4601 lb.buffer, cp - lb.buffer + 1, lineno, linecharno); 4602 } 4603 } 4604} 4605 4606 4607/* 4608 * PHP support 4609 * Look for: 4610 * - /^[ \t]*function[ \t\n]+[^ \t\n(]+/ 4611 * - /^[ \t]*class[ \t\n]+[^ \t\n]+/ 4612 * - /^[ \t]*define\(\"[^\"]+/ 4613 * Only with --members: 4614 * - /^[ \t]*var[ \t\n]+\$[^ \t\n=;]/ 4615 * Idea by Diez B. Roggisch (2001) 4616 */ 4617static void 4618PHP_functions (inf) 4619 FILE *inf; 4620{ 4621 register char *cp, *name; 4622 bool search_identifier = FALSE; 4623 4624 LOOP_ON_INPUT_LINES (inf, lb, cp) 4625 { 4626 cp = skip_spaces (cp); 4627 name = cp; 4628 if (search_identifier 4629 && *cp != '\0') 4630 { 4631 while (!notinname (*cp)) 4632 cp++; 4633 make_tag (name, cp - name, TRUE, 4634 lb.buffer, cp - lb.buffer + 1, lineno, linecharno); 4635 search_identifier = FALSE; 4636 } 4637 else if (LOOKING_AT (cp, "function")) 4638 { 4639 if(*cp == '&') 4640 cp = skip_spaces (cp+1); 4641 if(*cp != '\0') 4642 { 4643 name = cp; 4644 while (!notinname (*cp)) 4645 cp++; 4646 make_tag (name, cp - name, TRUE, 4647 lb.buffer, cp - lb.buffer + 1, lineno, linecharno); 4648 } 4649 else 4650 search_identifier = TRUE; 4651 } 4652 else if (LOOKING_AT (cp, "class")) 4653 { 4654 if (*cp != '\0') 4655 { 4656 name = cp; 4657 while (*cp != '\0' && !iswhite (*cp)) 4658 cp++; 4659 make_tag (name, cp - name, FALSE, 4660 lb.buffer, cp - lb.buffer + 1, lineno, linecharno); 4661 } 4662 else 4663 search_identifier = TRUE; 4664 } 4665 else if (strneq (cp, "define", 6) 4666 && (cp = skip_spaces (cp+6)) 4667 && *cp++ == '(' 4668 && (*cp == '"' || *cp == '\'')) 4669 { 4670 char quote = *cp++; 4671 name = cp; 4672 while (*cp != quote && *cp != '\0') 4673 cp++; 4674 make_tag (name, cp - name, FALSE, 4675 lb.buffer, cp - lb.buffer + 1, lineno, linecharno); 4676 } 4677 else if (members 4678 && LOOKING_AT (cp, "var") 4679 && *cp == '$') 4680 { 4681 name = cp; 4682 while (!notinname(*cp)) 4683 cp++; 4684 make_tag (name, cp - name, FALSE, 4685 lb.buffer, cp - lb.buffer + 1, lineno, linecharno); 4686 } 4687 } 4688} 4689 4690 4691/* 4692 * Cobol tag functions 4693 * We could look for anything that could be a paragraph name. 4694 * i.e. anything that starts in column 8 is one word and ends in a full stop. 4695 * Idea by Corny de Souza (1993) 4696 */ 4697static void 4698Cobol_paragraphs (inf) 4699 FILE *inf; 4700{ 4701 register char *bp, *ep; 4702 4703 LOOP_ON_INPUT_LINES (inf, lb, bp) 4704 { 4705 if (lb.len < 9) 4706 continue; 4707 bp += 8; 4708 4709 /* If eoln, compiler option or comment ignore whole line. */ 4710 if (bp[-1] != ' ' || !ISALNUM (bp[0])) 4711 continue; 4712 4713 for (ep = bp; ISALNUM (*ep) || *ep == '-'; ep++) 4714 continue; 4715 if (*ep++ == '.') 4716 make_tag (bp, ep - bp, TRUE, 4717 lb.buffer, ep - lb.buffer + 1, lineno, linecharno); 4718 } 4719} 4720 4721 4722/* 4723 * Makefile support 4724 * Ideas by Assar Westerlund <assar@sics.se> (2001) 4725 */ 4726static void 4727Makefile_targets (inf) 4728 FILE *inf; 4729{ 4730 register char *bp; 4731 4732 LOOP_ON_INPUT_LINES (inf, lb, bp) 4733 { 4734 if (*bp == '\t' || *bp == '#') 4735 continue; 4736 while (*bp != '\0' && *bp != '=' && *bp != ':') 4737 bp++; 4738 if (*bp == ':' || (globals && *bp == '=')) 4739 { 4740 /* We should detect if there is more than one tag, but we do not. 4741 We just skip initial and final spaces. */ 4742 char * namestart = skip_spaces (lb.buffer); 4743 while (--bp > namestart) 4744 if (!notinname (*bp)) 4745 break; 4746 make_tag (namestart, bp - namestart + 1, TRUE, 4747 lb.buffer, bp - lb.buffer + 2, lineno, linecharno); 4748 } 4749 } 4750} 4751 4752 4753/* 4754 * Pascal parsing 4755 * Original code by Mosur K. Mohan (1989) 4756 * 4757 * Locates tags for procedures & functions. Doesn't do any type- or 4758 * var-definitions. It does look for the keyword "extern" or 4759 * "forward" immediately following the procedure statement; if found, 4760 * the tag is skipped. 4761 */ 4762static void 4763Pascal_functions (inf) 4764 FILE *inf; 4765{ 4766 linebuffer tline; /* mostly copied from C_entries */ 4767 long save_lcno; 4768 int save_lineno, namelen, taglen; 4769 char c, *name; 4770 4771 bool /* each of these flags is TRUE iff: */ 4772 incomment, /* point is inside a comment */ 4773 inquote, /* point is inside '..' string */ 4774 get_tagname, /* point is after PROCEDURE/FUNCTION 4775 keyword, so next item = potential tag */ 4776 found_tag, /* point is after a potential tag */ 4777 inparms, /* point is within parameter-list */ 4778 verify_tag; /* point has passed the parm-list, so the 4779 next token will determine whether this 4780 is a FORWARD/EXTERN to be ignored, or 4781 whether it is a real tag */ 4782 4783 save_lcno = save_lineno = namelen = taglen = 0; /* keep compiler quiet */ 4784 name = NULL; /* keep compiler quiet */ 4785 dbp = lb.buffer; 4786 *dbp = '\0'; 4787 linebuffer_init (&tline); 4788 4789 incomment = inquote = FALSE; 4790 found_tag = FALSE; /* have a proc name; check if extern */ 4791 get_tagname = FALSE; /* found "procedure" keyword */ 4792 inparms = FALSE; /* found '(' after "proc" */ 4793 verify_tag = FALSE; /* check if "extern" is ahead */ 4794 4795 4796 while (!feof (inf)) /* long main loop to get next char */ 4797 { 4798 c = *dbp++; 4799 if (c == '\0') /* if end of line */ 4800 { 4801 readline (&lb, inf); 4802 dbp = lb.buffer; 4803 if (*dbp == '\0') 4804 continue; 4805 if (!((found_tag && verify_tag) 4806 || get_tagname)) 4807 c = *dbp++; /* only if don't need *dbp pointing 4808 to the beginning of the name of 4809 the procedure or function */ 4810 } 4811 if (incomment) 4812 { 4813 if (c == '}') /* within { } comments */ 4814 incomment = FALSE; 4815 else if (c == '*' && *dbp == ')') /* within (* *) comments */ 4816 { 4817 dbp++; 4818 incomment = FALSE; 4819 } 4820 continue; 4821 } 4822 else if (inquote) 4823 { 4824 if (c == '\'') 4825 inquote = FALSE; 4826 continue; 4827 } 4828 else 4829 switch (c) 4830 { 4831 case '\'': 4832 inquote = TRUE; /* found first quote */ 4833 continue; 4834 case '{': /* found open { comment */ 4835 incomment = TRUE; 4836 continue; 4837 case '(': 4838 if (*dbp == '*') /* found open (* comment */ 4839 { 4840 incomment = TRUE; 4841 dbp++; 4842 } 4843 else if (found_tag) /* found '(' after tag, i.e., parm-list */ 4844 inparms = TRUE; 4845 continue; 4846 case ')': /* end of parms list */ 4847 if (inparms) 4848 inparms = FALSE; 4849 continue; 4850 case ';': 4851 if (found_tag && !inparms) /* end of proc or fn stmt */ 4852 { 4853 verify_tag = TRUE; 4854 break; 4855 } 4856 continue; 4857 } 4858 if (found_tag && verify_tag && (*dbp != ' ')) 4859 { 4860 /* Check if this is an "extern" declaration. */ 4861 if (*dbp == '\0') 4862 continue; 4863 if (lowcase (*dbp == 'e')) 4864 { 4865 if (nocase_tail ("extern")) /* superfluous, really! */ 4866 { 4867 found_tag = FALSE; 4868 verify_tag = FALSE; 4869 } 4870 } 4871 else if (lowcase (*dbp) == 'f') 4872 { 4873 if (nocase_tail ("forward")) /* check for forward reference */ 4874 { 4875 found_tag = FALSE; 4876 verify_tag = FALSE; 4877 } 4878 } 4879 if (found_tag && verify_tag) /* not external proc, so make tag */ 4880 { 4881 found_tag = FALSE; 4882 verify_tag = FALSE; 4883 make_tag (name, namelen, TRUE, 4884 tline.buffer, taglen, save_lineno, save_lcno); 4885 continue; 4886 } 4887 } 4888 if (get_tagname) /* grab name of proc or fn */ 4889 { 4890 char *cp; 4891 4892 if (*dbp == '\0') 4893 continue; 4894 4895 /* Find block name. */ 4896 for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++) 4897 continue; 4898 4899 /* Save all values for later tagging. */ 4900 linebuffer_setlen (&tline, lb.len); 4901 strcpy (tline.buffer, lb.buffer); 4902 save_lineno = lineno; 4903 save_lcno = linecharno; 4904 name = tline.buffer + (dbp - lb.buffer); 4905 namelen = cp - dbp; 4906 taglen = cp - lb.buffer + 1; 4907 4908 dbp = cp; /* set dbp to e-o-token */ 4909 get_tagname = FALSE; 4910 found_tag = TRUE; 4911 continue; 4912 4913 /* And proceed to check for "extern". */ 4914 } 4915 else if (!incomment && !inquote && !found_tag) 4916 { 4917 /* Check for proc/fn keywords. */ 4918 switch (lowcase (c)) 4919 { 4920 case 'p': 4921 if (nocase_tail ("rocedure")) /* c = 'p', dbp has advanced */ 4922 get_tagname = TRUE; 4923 continue; 4924 case 'f': 4925 if (nocase_tail ("unction")) 4926 get_tagname = TRUE; 4927 continue; 4928 } 4929 } 4930 } /* while not eof */ 4931 4932 free (tline.buffer); 4933} 4934 4935 4936/* 4937 * Lisp tag functions 4938 * look for (def or (DEF, quote or QUOTE 4939 */ 4940 4941static void L_getit __P((void)); 4942 4943static void 4944L_getit () 4945{ 4946 if (*dbp == '\'') /* Skip prefix quote */ 4947 dbp++; 4948 else if (*dbp == '(') 4949 { 4950 dbp++; 4951 /* Try to skip "(quote " */ 4952 if (!LOOKING_AT (dbp, "quote") && !LOOKING_AT (dbp, "QUOTE")) 4953 /* Ok, then skip "(" before name in (defstruct (foo)) */ 4954 dbp = skip_spaces (dbp); 4955 } 4956 get_tag (dbp, NULL); 4957} 4958 4959static void 4960Lisp_functions (inf) 4961 FILE *inf; 4962{ 4963 LOOP_ON_INPUT_LINES (inf, lb, dbp) 4964 { 4965 if (dbp[0] != '(') 4966 continue; 4967 4968 if (strneq (dbp+1, "def", 3) || strneq (dbp+1, "DEF", 3)) 4969 { 4970 dbp = skip_non_spaces (dbp); 4971 dbp = skip_spaces (dbp); 4972 L_getit (); 4973 } 4974 else 4975 { 4976 /* Check for (foo::defmumble name-defined ... */ 4977 do 4978 dbp++; 4979 while (!notinname (*dbp) && *dbp != ':'); 4980 if (*dbp == ':') 4981 { 4982 do 4983 dbp++; 4984 while (*dbp == ':'); 4985 4986 if (strneq (dbp, "def", 3) || strneq (dbp, "DEF", 3)) 4987 { 4988 dbp = skip_non_spaces (dbp); 4989 dbp = skip_spaces (dbp); 4990 L_getit (); 4991 } 4992 } 4993 } 4994 } 4995} 4996 4997 4998/* 4999 * Lua script language parsing 5000 * Original code by David A. Capello <dacap@users.sourceforge.net> (2004) 5001 * 5002 * "function" and "local function" are tags if they start at column 1. 5003 */ 5004static void 5005Lua_functions (inf) 5006 FILE *inf; 5007{ 5008 register char *bp; 5009 5010 LOOP_ON_INPUT_LINES (inf, lb, bp) 5011 { 5012 if (bp[0] != 'f' && bp[0] != 'l') 5013 continue; 5014 5015 (void)LOOKING_AT (bp, "local"); /* skip possible "local" */ 5016 5017 if (LOOKING_AT (bp, "function")) 5018 get_tag (bp, NULL); 5019 } 5020} 5021 5022 5023/* 5024 * Postscript tags 5025 * Just look for lines where the first character is '/' 5026 * Also look at "defineps" for PSWrap 5027 * Ideas by: 5028 * Richard Mlynarik <mly@adoc.xerox.com> (1997) 5029 * Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999) 5030 */ 5031static void 5032PS_functions (inf) 5033 FILE *inf; 5034{ 5035 register char *bp, *ep; 5036 5037 LOOP_ON_INPUT_LINES (inf, lb, bp) 5038 { 5039 if (bp[0] == '/') 5040 { 5041 for (ep = bp+1; 5042 *ep != '\0' && *ep != ' ' && *ep != '{'; 5043 ep++) 5044 continue; 5045 make_tag (bp, ep - bp, TRUE, 5046 lb.buffer, ep - lb.buffer + 1, lineno, linecharno); 5047 } 5048 else if (LOOKING_AT (bp, "defineps")) 5049 get_tag (bp, NULL); 5050 } 5051} 5052 5053 5054/* 5055 * Forth tags 5056 * Ignore anything after \ followed by space or in ( ) 5057 * Look for words defined by : 5058 * Look for constant, code, create, defer, value, and variable 5059 * OBP extensions: Look for buffer:, field, 5060 * Ideas by Eduardo Horvath <eeh@netbsd.org> (2004) 5061 */ 5062static void 5063Forth_words (inf) 5064 FILE *inf; 5065{ 5066 register char *bp; 5067 5068 LOOP_ON_INPUT_LINES (inf, lb, bp) 5069 while ((bp = skip_spaces (bp))[0] != '\0') 5070 if (bp[0] == '\\' && iswhite(bp[1])) 5071 break; /* read next line */ 5072 else if (bp[0] == '(' && iswhite(bp[1])) 5073 do /* skip to ) or eol */ 5074 bp++; 5075 while (*bp != ')' && *bp != '\0'); 5076 else if ((bp[0] == ':' && iswhite(bp[1]) && bp++) 5077 || LOOKING_AT_NOCASE (bp, "constant") 5078 || LOOKING_AT_NOCASE (bp, "code") 5079 || LOOKING_AT_NOCASE (bp, "create") 5080 || LOOKING_AT_NOCASE (bp, "defer") 5081 || LOOKING_AT_NOCASE (bp, "value") 5082 || LOOKING_AT_NOCASE (bp, "variable") 5083 || LOOKING_AT_NOCASE (bp, "buffer:") 5084 || LOOKING_AT_NOCASE (bp, "field")) 5085 get_tag (skip_spaces (bp), NULL); /* Yay! A definition! */ 5086 else 5087 bp = skip_non_spaces (bp); 5088} 5089 5090 5091/* 5092 * Scheme tag functions 5093 * look for (def... xyzzy 5094 * (def... (xyzzy 5095 * (def ... ((...(xyzzy .... 5096 * (set! xyzzy 5097 * Original code by Ken Haase (1985?) 5098 */ 5099static void 5100Scheme_functions (inf) 5101 FILE *inf; 5102{ 5103 register char *bp; 5104 5105 LOOP_ON_INPUT_LINES (inf, lb, bp) 5106 { 5107 if (strneq (bp, "(def", 4) || strneq (bp, "(DEF", 4)) 5108 { 5109 bp = skip_non_spaces (bp+4); 5110 /* Skip over open parens and white space */ 5111 while (notinname (*bp)) 5112 bp++; 5113 get_tag (bp, NULL); 5114 } 5115 if (LOOKING_AT (bp, "(SET!") || LOOKING_AT (bp, "(set!")) 5116 get_tag (bp, NULL); 5117 } 5118} 5119 5120 5121/* Find tags in TeX and LaTeX input files. */ 5122 5123/* TEX_toktab is a table of TeX control sequences that define tags. 5124 * Each entry records one such control sequence. 5125 * 5126 * Original code from who knows whom. 5127 * Ideas by: 5128 * Stefan Monnier (2002) 5129 */ 5130 5131static linebuffer *TEX_toktab = NULL; /* Table with tag tokens */ 5132 5133/* Default set of control sequences to put into TEX_toktab. 5134 The value of environment var TEXTAGS is prepended to this. */ 5135static char *TEX_defenv = "\ 5136:chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\ 5137:part:appendix:entry:index:def\ 5138:newcommand:renewcommand:newenvironment:renewenvironment"; 5139 5140static void TEX_mode __P((FILE *)); 5141static void TEX_decode_env __P((char *, char *)); 5142 5143static char TEX_esc = '\\'; 5144static char TEX_opgrp = '{'; 5145static char TEX_clgrp = '}'; 5146 5147/* 5148 * TeX/LaTeX scanning loop. 5149 */ 5150static void 5151TeX_commands (inf) 5152 FILE *inf; 5153{ 5154 char *cp; 5155 linebuffer *key; 5156 5157 /* Select either \ or ! as escape character. */ 5158 TEX_mode (inf); 5159 5160 /* Initialize token table once from environment. */ 5161 if (TEX_toktab == NULL) 5162 TEX_decode_env ("TEXTAGS", TEX_defenv); 5163 5164 LOOP_ON_INPUT_LINES (inf, lb, cp) 5165 { 5166 /* Look at each TEX keyword in line. */ 5167 for (;;) 5168 { 5169 /* Look for a TEX escape. */ 5170 while (*cp++ != TEX_esc) 5171 if (cp[-1] == '\0' || cp[-1] == '%') 5172 goto tex_next_line; 5173 5174 for (key = TEX_toktab; key->buffer != NULL; key++) 5175 if (strneq (cp, key->buffer, key->len)) 5176 { 5177 register char *p; 5178 int namelen, linelen; 5179 bool opgrp = FALSE; 5180 5181 cp = skip_spaces (cp + key->len); 5182 if (*cp == TEX_opgrp) 5183 { 5184 opgrp = TRUE; 5185 cp++; 5186 } 5187 for (p = cp; 5188 (!iswhite (*p) && *p != '#' && 5189 *p != TEX_opgrp && *p != TEX_clgrp); 5190 p++) 5191 continue; 5192 namelen = p - cp; 5193 linelen = lb.len; 5194 if (!opgrp || *p == TEX_clgrp) 5195 { 5196 while (*p != '\0' && *p != TEX_opgrp && *p != TEX_clgrp) 5197 p++; 5198 linelen = p - lb.buffer + 1; 5199 } 5200 make_tag (cp, namelen, TRUE, 5201 lb.buffer, linelen, lineno, linecharno); 5202 goto tex_next_line; /* We only tag a line once */ 5203 } 5204 } 5205 tex_next_line: 5206 ; 5207 } 5208} 5209 5210#define TEX_LESC '\\' 5211#define TEX_SESC '!' 5212 5213/* Figure out whether TeX's escapechar is '\\' or '!' and set grouping 5214 chars accordingly. */ 5215static void 5216TEX_mode (inf) 5217 FILE *inf; 5218{ 5219 int c; 5220 5221 while ((c = getc (inf)) != EOF) 5222 { 5223 /* Skip to next line if we hit the TeX comment char. */ 5224 if (c == '%') 5225 while (c != '\n' && c != EOF) 5226 c = getc (inf); 5227 else if (c == TEX_LESC || c == TEX_SESC ) 5228 break; 5229 } 5230 5231 if (c == TEX_LESC) 5232 { 5233 TEX_esc = TEX_LESC; 5234 TEX_opgrp = '{'; 5235 TEX_clgrp = '}'; 5236 } 5237 else 5238 { 5239 TEX_esc = TEX_SESC; 5240 TEX_opgrp = '<'; 5241 TEX_clgrp = '>'; 5242 } 5243 /* If the input file is compressed, inf is a pipe, and rewind may fail. 5244 No attempt is made to correct the situation. */ 5245 rewind (inf); 5246} 5247 5248/* Read environment and prepend it to the default string. 5249 Build token table. */ 5250static void 5251TEX_decode_env (evarname, defenv) 5252 char *evarname; 5253 char *defenv; 5254{ 5255 register char *env, *p; 5256 int i, len; 5257 5258 /* Append default string to environment. */ 5259 env = getenv (evarname); 5260 if (!env) 5261 env = defenv; 5262 else 5263 { 5264 char *oldenv = env; 5265 env = concat (oldenv, defenv, ""); 5266 } 5267 5268 /* Allocate a token table */ 5269 for (len = 1, p = env; p;) 5270 if ((p = etags_strchr (p, ':')) && *++p != '\0') 5271 len++; 5272 TEX_toktab = xnew (len, linebuffer); 5273 5274 /* Unpack environment string into token table. Be careful about */ 5275 /* zero-length strings (leading ':', "::" and trailing ':') */ 5276 for (i = 0; *env != '\0';) 5277 { 5278 p = etags_strchr (env, ':'); 5279 if (!p) /* End of environment string. */ 5280 p = env + strlen (env); 5281 if (p - env > 0) 5282 { /* Only non-zero strings. */ 5283 TEX_toktab[i].buffer = savenstr (env, p - env); 5284 TEX_toktab[i].len = p - env; 5285 i++; 5286 } 5287 if (*p) 5288 env = p + 1; 5289 else 5290 { 5291 TEX_toktab[i].buffer = NULL; /* Mark end of table. */ 5292 TEX_toktab[i].len = 0; 5293 break; 5294 } 5295 } 5296} 5297 5298 5299/* Texinfo support. Dave Love, Mar. 2000. */ 5300static void 5301Texinfo_nodes (inf) 5302 FILE * inf; 5303{ 5304 char *cp, *start; 5305 LOOP_ON_INPUT_LINES (inf, lb, cp) 5306 if (LOOKING_AT (cp, "@node")) 5307 { 5308 start = cp; 5309 while (*cp != '\0' && *cp != ',') 5310 cp++; 5311 make_tag (start, cp - start, TRUE, 5312 lb.buffer, cp - lb.buffer + 1, lineno, linecharno); 5313 } 5314} 5315 5316 5317/* 5318 * HTML support. 5319 * Contents of <title>, <h1>, <h2>, <h3> are tags. 5320 * Contents of <a name=xxx> are tags with name xxx. 5321 * 5322 * Francesco Potort�, 2002. 5323 */ 5324static void 5325HTML_labels (inf) 5326 FILE * inf; 5327{ 5328 bool getnext = FALSE; /* next text outside of HTML tags is a tag */ 5329 bool skiptag = FALSE; /* skip to the end of the current HTML tag */ 5330 bool intag = FALSE; /* inside an html tag, looking for ID= */ 5331 bool inanchor = FALSE; /* when INTAG, is an anchor, look for NAME= */ 5332 char *end; 5333 5334 5335 linebuffer_setlen (&token_name, 0); /* no name in buffer */ 5336 5337 LOOP_ON_INPUT_LINES (inf, lb, dbp) 5338 for (;;) /* loop on the same line */ 5339 { 5340 if (skiptag) /* skip HTML tag */ 5341 { 5342 while (*dbp != '\0' && *dbp != '>') 5343 dbp++; 5344 if (*dbp == '>') 5345 { 5346 dbp += 1; 5347 skiptag = FALSE; 5348 continue; /* look on the same line */ 5349 } 5350 break; /* go to next line */ 5351 } 5352 5353 else if (intag) /* look for "name=" or "id=" */ 5354 { 5355 while (*dbp != '\0' && *dbp != '>' 5356 && lowcase (*dbp) != 'n' && lowcase (*dbp) != 'i') 5357 dbp++; 5358 if (*dbp == '\0') 5359 break; /* go to next line */ 5360 if (*dbp == '>') 5361 { 5362 dbp += 1; 5363 intag = FALSE; 5364 continue; /* look on the same line */ 5365 } 5366 if ((inanchor && LOOKING_AT_NOCASE (dbp, "name=")) 5367 || LOOKING_AT_NOCASE (dbp, "id=")) 5368 { 5369 bool quoted = (dbp[0] == '"'); 5370 5371 if (quoted) 5372 for (end = ++dbp; *end != '\0' && *end != '"'; end++) 5373 continue; 5374 else 5375 for (end = dbp; *end != '\0' && intoken (*end); end++) 5376 continue; 5377 linebuffer_setlen (&token_name, end - dbp); 5378 strncpy (token_name.buffer, dbp, end - dbp); 5379 token_name.buffer[end - dbp] = '\0'; 5380 5381 dbp = end; 5382 intag = FALSE; /* we found what we looked for */ 5383 skiptag = TRUE; /* skip to the end of the tag */ 5384 getnext = TRUE; /* then grab the text */ 5385 continue; /* look on the same line */ 5386 } 5387 dbp += 1; 5388 } 5389 5390 else if (getnext) /* grab next tokens and tag them */ 5391 { 5392 dbp = skip_spaces (dbp); 5393 if (*dbp == '\0') 5394 break; /* go to next line */ 5395 if (*dbp == '<') 5396 { 5397 intag = TRUE; 5398 inanchor = (lowcase (dbp[1]) == 'a' && !intoken (dbp[2])); 5399 continue; /* look on the same line */ 5400 } 5401 5402 for (end = dbp + 1; *end != '\0' && *end != '<'; end++) 5403 continue; 5404 make_tag (token_name.buffer, token_name.len, TRUE, 5405 dbp, end - dbp, lineno, linecharno); 5406 linebuffer_setlen (&token_name, 0); /* no name in buffer */ 5407 getnext = FALSE; 5408 break; /* go to next line */ 5409 } 5410 5411 else /* look for an interesting HTML tag */ 5412 { 5413 while (*dbp != '\0' && *dbp != '<') 5414 dbp++; 5415 if (*dbp == '\0') 5416 break; /* go to next line */ 5417 intag = TRUE; 5418 if (lowcase (dbp[1]) == 'a' && !intoken (dbp[2])) 5419 { 5420 inanchor = TRUE; 5421 continue; /* look on the same line */ 5422 } 5423 else if (LOOKING_AT_NOCASE (dbp, "<title>") 5424 || LOOKING_AT_NOCASE (dbp, "<h1>") 5425 || LOOKING_AT_NOCASE (dbp, "<h2>") 5426 || LOOKING_AT_NOCASE (dbp, "<h3>")) 5427 { 5428 intag = FALSE; 5429 getnext = TRUE; 5430 continue; /* look on the same line */ 5431 } 5432 dbp += 1; 5433 } 5434 } 5435} 5436 5437 5438/* 5439 * Prolog support 5440 * 5441 * Assumes that the predicate or rule starts at column 0. 5442 * Only the first clause of a predicate or rule is added. 5443 * Original code by Sunichirou Sugou (1989) 5444 * Rewritten by Anders Lindgren (1996) 5445 */ 5446static int prolog_pr __P((char *, char *)); 5447static void prolog_skip_comment __P((linebuffer *, FILE *)); 5448static int prolog_atom __P((char *, int)); 5449 5450static void 5451Prolog_functions (inf) 5452 FILE *inf; 5453{ 5454 char *cp, *last; 5455 int len; 5456 int allocated; 5457 5458 allocated = 0; 5459 len = 0; 5460 last = NULL; 5461 5462 LOOP_ON_INPUT_LINES (inf, lb, cp) 5463 { 5464 if (cp[0] == '\0') /* Empty line */ 5465 continue; 5466 else if (iswhite (cp[0])) /* Not a predicate */ 5467 continue; 5468 else if (cp[0] == '/' && cp[1] == '*') /* comment. */ 5469 prolog_skip_comment (&lb, inf); 5470 else if ((len = prolog_pr (cp, last)) > 0) 5471 { 5472 /* Predicate or rule. Store the function name so that we 5473 only generate a tag for the first clause. */ 5474 if (last == NULL) 5475 last = xnew(len + 1, char); 5476 else if (len + 1 > allocated) 5477 xrnew (last, len + 1, char); 5478 allocated = len + 1; 5479 strncpy (last, cp, len); 5480 last[len] = '\0'; 5481 } 5482 } 5483 if (last != NULL) 5484 free (last); 5485} 5486 5487 5488static void 5489prolog_skip_comment (plb, inf) 5490 linebuffer *plb; 5491 FILE *inf; 5492{ 5493 char *cp; 5494 5495 do 5496 { 5497 for (cp = plb->buffer; *cp != '\0'; cp++) 5498 if (cp[0] == '*' && cp[1] == '/') 5499 return; 5500 readline (plb, inf); 5501 } 5502 while (!feof(inf)); 5503} 5504 5505/* 5506 * A predicate or rule definition is added if it matches: 5507 * <beginning of line><Prolog Atom><whitespace>( 5508 * or <beginning of line><Prolog Atom><whitespace>:- 5509 * 5510 * It is added to the tags database if it doesn't match the 5511 * name of the previous clause header. 5512 * 5513 * Return the size of the name of the predicate or rule, or 0 if no 5514 * header was found. 5515 */ 5516static int 5517prolog_pr (s, last) 5518 char *s; 5519 char *last; /* Name of last clause. */ 5520{ 5521 int pos; 5522 int len; 5523 5524 pos = prolog_atom (s, 0); 5525 if (pos < 1) 5526 return 0; 5527 5528 len = pos; 5529 pos = skip_spaces (s + pos) - s; 5530 5531 if ((s[pos] == '.' 5532 || (s[pos] == '(' && (pos += 1)) 5533 || (s[pos] == ':' && s[pos + 1] == '-' && (pos += 2))) 5534 && (last == NULL /* save only the first clause */ 5535 || len != (int)strlen (last) 5536 || !strneq (s, last, len))) 5537 { 5538 make_tag (s, len, TRUE, s, pos, lineno, linecharno); 5539 return len; 5540 } 5541 else 5542 return 0; 5543} 5544 5545/* 5546 * Consume a Prolog atom. 5547 * Return the number of bytes consumed, or -1 if there was an error. 5548 * 5549 * A prolog atom, in this context, could be one of: 5550 * - An alphanumeric sequence, starting with a lower case letter. 5551 * - A quoted arbitrary string. Single quotes can escape themselves. 5552 * Backslash quotes everything. 5553 */ 5554static int 5555prolog_atom (s, pos) 5556 char *s; 5557 int pos; 5558{ 5559 int origpos; 5560 5561 origpos = pos; 5562 5563 if (ISLOWER(s[pos]) || (s[pos] == '_')) 5564 { 5565 /* The atom is unquoted. */ 5566 pos++; 5567 while (ISALNUM(s[pos]) || (s[pos] == '_')) 5568 { 5569 pos++; 5570 } 5571 return pos - origpos; 5572 } 5573 else if (s[pos] == '\'') 5574 { 5575 pos++; 5576 5577 for (;;) 5578 { 5579 if (s[pos] == '\'') 5580 { 5581 pos++; 5582 if (s[pos] != '\'') 5583 break; 5584 pos++; /* A double quote */ 5585 } 5586 else if (s[pos] == '\0') 5587 /* Multiline quoted atoms are ignored. */ 5588 return -1; 5589 else if (s[pos] == '\\') 5590 { 5591 if (s[pos+1] == '\0') 5592 return -1; 5593 pos += 2; 5594 } 5595 else 5596 pos++; 5597 } 5598 return pos - origpos; 5599 } 5600 else 5601 return -1; 5602} 5603 5604 5605/* 5606 * Support for Erlang 5607 * 5608 * Generates tags for functions, defines, and records. 5609 * Assumes that Erlang functions start at column 0. 5610 * Original code by Anders Lindgren (1996) 5611 */ 5612static int erlang_func __P((char *, char *)); 5613static void erlang_attribute __P((char *)); 5614static int erlang_atom __P((char *)); 5615 5616static void 5617Erlang_functions (inf) 5618 FILE *inf; 5619{ 5620 char *cp, *last; 5621 int len; 5622 int allocated; 5623 5624 allocated = 0; 5625 len = 0; 5626 last = NULL; 5627 5628 LOOP_ON_INPUT_LINES (inf, lb, cp) 5629 { 5630 if (cp[0] == '\0') /* Empty line */ 5631 continue; 5632 else if (iswhite (cp[0])) /* Not function nor attribute */ 5633 continue; 5634 else if (cp[0] == '%') /* comment */ 5635 continue; 5636 else if (cp[0] == '"') /* Sometimes, strings start in column one */ 5637 continue; 5638 else if (cp[0] == '-') /* attribute, e.g. "-define" */ 5639 { 5640 erlang_attribute (cp); 5641 if (last != NULL) 5642 { 5643 free (last); 5644 last = NULL; 5645 } 5646 } 5647 else if ((len = erlang_func (cp, last)) > 0) 5648 { 5649 /* 5650 * Function. Store the function name so that we only 5651 * generates a tag for the first clause. 5652 */ 5653 if (last == NULL) 5654 last = xnew (len + 1, char); 5655 else if (len + 1 > allocated) 5656 xrnew (last, len + 1, char); 5657 allocated = len + 1; 5658 strncpy (last, cp, len); 5659 last[len] = '\0'; 5660 } 5661 } 5662 if (last != NULL) 5663 free (last); 5664} 5665 5666 5667/* 5668 * A function definition is added if it matches: 5669 * <beginning of line><Erlang Atom><whitespace>( 5670 * 5671 * It is added to the tags database if it doesn't match the 5672 * name of the previous clause header. 5673 * 5674 * Return the size of the name of the function, or 0 if no function 5675 * was found. 5676 */ 5677static int 5678erlang_func (s, last) 5679 char *s; 5680 char *last; /* Name of last clause. */ 5681{ 5682 int pos; 5683 int len; 5684 5685 pos = erlang_atom (s); 5686 if (pos < 1) 5687 return 0; 5688 5689 len = pos; 5690 pos = skip_spaces (s + pos) - s; 5691 5692 /* Save only the first clause. */ 5693 if (s[pos++] == '(' 5694 && (last == NULL 5695 || len != (int)strlen (last) 5696 || !strneq (s, last, len))) 5697 { 5698 make_tag (s, len, TRUE, s, pos, lineno, linecharno); 5699 return len; 5700 } 5701 5702 return 0; 5703} 5704 5705 5706/* 5707 * Handle attributes. Currently, tags are generated for defines 5708 * and records. 5709 * 5710 * They are on the form: 5711 * -define(foo, bar). 5712 * -define(Foo(M, N), M+N). 5713 * -record(graph, {vtab = notable, cyclic = true}). 5714 */ 5715static void 5716erlang_attribute (s) 5717 char *s; 5718{ 5719 char *cp = s; 5720 5721 if ((LOOKING_AT (cp, "-define") || LOOKING_AT (cp, "-record")) 5722 && *cp++ == '(') 5723 { 5724 int len = erlang_atom (skip_spaces (cp)); 5725 if (len > 0) 5726 make_tag (cp, len, TRUE, s, cp + len - s, lineno, linecharno); 5727 } 5728 return; 5729} 5730 5731 5732/* 5733 * Consume an Erlang atom (or variable). 5734 * Return the number of bytes consumed, or -1 if there was an error. 5735 */ 5736static int 5737erlang_atom (s) 5738 char *s; 5739{ 5740 int pos = 0; 5741 5742 if (ISALPHA (s[pos]) || s[pos] == '_') 5743 { 5744 /* The atom is unquoted. */ 5745 do 5746 pos++; 5747 while (ISALNUM (s[pos]) || s[pos] == '_'); 5748 } 5749 else if (s[pos] == '\'') 5750 { 5751 for (pos++; s[pos] != '\''; pos++) 5752 if (s[pos] == '\0' /* multiline quoted atoms are ignored */ 5753 || (s[pos] == '\\' && s[++pos] == '\0')) 5754 return 0; 5755 pos++; 5756 } 5757 5758 return pos; 5759} 5760 5761 5762static char *scan_separators __P((char *)); 5763static void add_regex __P((char *, language *)); 5764static char *substitute __P((char *, char *, struct re_registers *)); 5765 5766/* 5767 * Take a string like "/blah/" and turn it into "blah", verifying 5768 * that the first and last characters are the same, and handling 5769 * quoted separator characters. Actually, stops on the occurrence of 5770 * an unquoted separator. Also process \t, \n, etc. and turn into 5771 * appropriate characters. Works in place. Null terminates name string. 5772 * Returns pointer to terminating separator, or NULL for 5773 * unterminated regexps. 5774 */ 5775static char * 5776scan_separators (name) 5777 char *name; 5778{ 5779 char sep = name[0]; 5780 char *copyto = name; 5781 bool quoted = FALSE; 5782 5783 for (++name; *name != '\0'; ++name) 5784 { 5785 if (quoted) 5786 { 5787 switch (*name) 5788 { 5789 case 'a': *copyto++ = '\007'; break; /* BEL (bell) */ 5790 case 'b': *copyto++ = '\b'; break; /* BS (back space) */ 5791 case 'd': *copyto++ = 0177; break; /* DEL (delete) */ 5792 case 'e': *copyto++ = 033; break; /* ESC (delete) */ 5793 case 'f': *copyto++ = '\f'; break; /* FF (form feed) */ 5794 case 'n': *copyto++ = '\n'; break; /* NL (new line) */ 5795 case 'r': *copyto++ = '\r'; break; /* CR (carriage return) */ 5796 case 't': *copyto++ = '\t'; break; /* TAB (horizontal tab) */ 5797 case 'v': *copyto++ = '\v'; break; /* VT (vertical tab) */ 5798 default: 5799 if (*name == sep) 5800 *copyto++ = sep; 5801 else 5802 { 5803 /* Something else is quoted, so preserve the quote. */ 5804 *copyto++ = '\\'; 5805 *copyto++ = *name; 5806 } 5807 break; 5808 } 5809 quoted = FALSE; 5810 } 5811 else if (*name == '\\') 5812 quoted = TRUE; 5813 else if (*name == sep) 5814 break; 5815 else 5816 *copyto++ = *name; 5817 } 5818 if (*name != sep) 5819 name = NULL; /* signal unterminated regexp */ 5820 5821 /* Terminate copied string. */ 5822 *copyto = '\0'; 5823 return name; 5824} 5825 5826/* Look at the argument of --regex or --no-regex and do the right 5827 thing. Same for each line of a regexp file. */ 5828static void 5829analyse_regex (regex_arg) 5830 char *regex_arg; 5831{ 5832 if (regex_arg == NULL) 5833 { 5834 free_regexps (); /* --no-regex: remove existing regexps */ 5835 return; 5836 } 5837 5838 /* A real --regexp option or a line in a regexp file. */ 5839 switch (regex_arg[0]) 5840 { 5841 /* Comments in regexp file or null arg to --regex. */ 5842 case '\0': 5843 case ' ': 5844 case '\t': 5845 break; 5846 5847 /* Read a regex file. This is recursive and may result in a 5848 loop, which will stop when the file descriptors are exhausted. */ 5849 case '@': 5850 { 5851 FILE *regexfp; 5852 linebuffer regexbuf; 5853 char *regexfile = regex_arg + 1; 5854 5855 /* regexfile is a file containing regexps, one per line. */ 5856 regexfp = fopen (regexfile, "r"); 5857 if (regexfp == NULL) 5858 { 5859 pfatal (regexfile); 5860 return; 5861 } 5862 linebuffer_init (®exbuf); 5863 while (readline_internal (®exbuf, regexfp) > 0) 5864 analyse_regex (regexbuf.buffer); 5865 free (regexbuf.buffer); 5866 fclose (regexfp); 5867 } 5868 break; 5869 5870 /* Regexp to be used for a specific language only. */ 5871 case '{': 5872 { 5873 language *lang; 5874 char *lang_name = regex_arg + 1; 5875 char *cp; 5876 5877 for (cp = lang_name; *cp != '}'; cp++) 5878 if (*cp == '\0') 5879 { 5880 error ("unterminated language name in regex: %s", regex_arg); 5881 return; 5882 } 5883 *cp++ = '\0'; 5884 lang = get_language_from_langname (lang_name); 5885 if (lang == NULL) 5886 return; 5887 add_regex (cp, lang); 5888 } 5889 break; 5890 5891 /* Regexp to be used for any language. */ 5892 default: 5893 add_regex (regex_arg, NULL); 5894 break; 5895 } 5896} 5897 5898/* Separate the regexp pattern, compile it, 5899 and care for optional name and modifiers. */ 5900static void 5901add_regex (regexp_pattern, lang) 5902 char *regexp_pattern; 5903 language *lang; 5904{ 5905 static struct re_pattern_buffer zeropattern; 5906 char sep, *pat, *name, *modifiers; 5907 const char *err; 5908 struct re_pattern_buffer *patbuf; 5909 regexp *rp; 5910 bool 5911 force_explicit_name = TRUE, /* do not use implicit tag names */ 5912 ignore_case = FALSE, /* case is significant */ 5913 multi_line = FALSE, /* matches are done one line at a time */ 5914 single_line = FALSE; /* dot does not match newline */ 5915 5916 5917 if (strlen(regexp_pattern) < 3) 5918 { 5919 error ("null regexp", (char *)NULL); 5920 return; 5921 } 5922 sep = regexp_pattern[0]; 5923 name = scan_separators (regexp_pattern); 5924 if (name == NULL) 5925 { 5926 error ("%s: unterminated regexp", regexp_pattern); 5927 return; 5928 } 5929 if (name[1] == sep) 5930 { 5931 error ("null name for regexp \"%s\"", regexp_pattern); 5932 return; 5933 } 5934 modifiers = scan_separators (name); 5935 if (modifiers == NULL) /* no terminating separator --> no name */ 5936 { 5937 modifiers = name; 5938 name = ""; 5939 } 5940 else 5941 modifiers += 1; /* skip separator */ 5942 5943 /* Parse regex modifiers. */ 5944 for (; modifiers[0] != '\0'; modifiers++) 5945 switch (modifiers[0]) 5946 { 5947 case 'N': 5948 if (modifiers == name) 5949 error ("forcing explicit tag name but no name, ignoring", NULL); 5950 force_explicit_name = TRUE; 5951 break; 5952 case 'i': 5953 ignore_case = TRUE; 5954 break; 5955 case 's': 5956 single_line = TRUE; 5957 /* FALLTHRU */ 5958 case 'm': 5959 multi_line = TRUE; 5960 need_filebuf = TRUE; 5961 break; 5962 default: 5963 { 5964 char wrongmod [2]; 5965 wrongmod[0] = modifiers[0]; 5966 wrongmod[1] = '\0'; 5967 error ("invalid regexp modifier `%s', ignoring", wrongmod); 5968 } 5969 break; 5970 } 5971 5972 patbuf = xnew (1, struct re_pattern_buffer); 5973 *patbuf = zeropattern; 5974 if (ignore_case) 5975 { 5976 static char lc_trans[CHARS]; 5977 int i; 5978 for (i = 0; i < CHARS; i++) 5979 lc_trans[i] = lowcase (i); 5980 patbuf->translate = lc_trans; /* translation table to fold case */ 5981 } 5982 5983 if (multi_line) 5984 pat = concat ("^", regexp_pattern, ""); /* anchor to beginning of line */ 5985 else 5986 pat = regexp_pattern; 5987 5988 if (single_line) 5989 re_set_syntax (RE_SYNTAX_EMACS | RE_DOT_NEWLINE); 5990 else 5991 re_set_syntax (RE_SYNTAX_EMACS); 5992 5993 err = re_compile_pattern (pat, strlen (regexp_pattern), patbuf); 5994 if (multi_line) 5995 free (pat); 5996 if (err != NULL) 5997 { 5998 error ("%s while compiling pattern", err); 5999 return; 6000 } 6001 6002 rp = p_head; 6003 p_head = xnew (1, regexp); 6004 p_head->pattern = savestr (regexp_pattern); 6005 p_head->p_next = rp; 6006 p_head->lang = lang; 6007 p_head->pat = patbuf; 6008 p_head->name = savestr (name); 6009 p_head->error_signaled = FALSE; 6010 p_head->force_explicit_name = force_explicit_name; 6011 p_head->ignore_case = ignore_case; 6012 p_head->multi_line = multi_line; 6013} 6014 6015/* 6016 * Do the substitutions indicated by the regular expression and 6017 * arguments. 6018 */ 6019static char * 6020substitute (in, out, regs) 6021 char *in, *out; 6022 struct re_registers *regs; 6023{ 6024 char *result, *t; 6025 int size, dig, diglen; 6026 6027 result = NULL; 6028 size = strlen (out); 6029 6030 /* Pass 1: figure out how much to allocate by finding all \N strings. */ 6031 if (out[size - 1] == '\\') 6032 fatal ("pattern error in \"%s\"", out); 6033 for (t = etags_strchr (out, '\\'); 6034 t != NULL; 6035 t = etags_strchr (t + 2, '\\')) 6036 if (ISDIGIT (t[1])) 6037 { 6038 dig = t[1] - '0'; 6039 diglen = regs->end[dig] - regs->start[dig]; 6040 size += diglen - 2; 6041 } 6042 else 6043 size -= 1; 6044 6045 /* Allocate space and do the substitutions. */ 6046 assert (size >= 0); 6047 result = xnew (size + 1, char); 6048 6049 for (t = result; *out != '\0'; out++) 6050 if (*out == '\\' && ISDIGIT (*++out)) 6051 { 6052 dig = *out - '0'; 6053 diglen = regs->end[dig] - regs->start[dig]; 6054 strncpy (t, in + regs->start[dig], diglen); 6055 t += diglen; 6056 } 6057 else 6058 *t++ = *out; 6059 *t = '\0'; 6060 6061 assert (t <= result + size); 6062 assert (t - result == (int)strlen (result)); 6063 6064 return result; 6065} 6066 6067/* Deallocate all regexps. */ 6068static void 6069free_regexps () 6070{ 6071 regexp *rp; 6072 while (p_head != NULL) 6073 { 6074 rp = p_head->p_next; 6075 free (p_head->pattern); 6076 free (p_head->name); 6077 free (p_head); 6078 p_head = rp; 6079 } 6080 return; 6081} 6082 6083/* 6084 * Reads the whole file as a single string from `filebuf' and looks for 6085 * multi-line regular expressions, creating tags on matches. 6086 * readline already dealt with normal regexps. 6087 * 6088 * Idea by Ben Wing <ben@666.com> (2002). 6089 */ 6090static void 6091regex_tag_multiline () 6092{ 6093 char *buffer = filebuf.buffer; 6094 regexp *rp; 6095 char *name; 6096 6097 for (rp = p_head; rp != NULL; rp = rp->p_next) 6098 { 6099 int match = 0; 6100 6101 if (!rp->multi_line) 6102 continue; /* skip normal regexps */ 6103 6104 /* Generic initialisations before parsing file from memory. */ 6105 lineno = 1; /* reset global line number */ 6106 charno = 0; /* reset global char number */ 6107 linecharno = 0; /* reset global char number of line start */ 6108 6109 /* Only use generic regexps or those for the current language. */ 6110 if (rp->lang != NULL && rp->lang != curfdp->lang) 6111 continue; 6112 6113 while (match >= 0 && match < filebuf.len) 6114 { 6115 match = re_search (rp->pat, buffer, filebuf.len, charno, 6116 filebuf.len - match, &rp->regs); 6117 switch (match) 6118 { 6119 case -2: 6120 /* Some error. */ 6121 if (!rp->error_signaled) 6122 { 6123 error ("regexp stack overflow while matching \"%s\"", 6124 rp->pattern); 6125 rp->error_signaled = TRUE; 6126 } 6127 break; 6128 case -1: 6129 /* No match. */ 6130 break; 6131 default: 6132 if (match == rp->regs.end[0]) 6133 { 6134 if (!rp->error_signaled) 6135 { 6136 error ("regexp matches the empty string: \"%s\"", 6137 rp->pattern); 6138 rp->error_signaled = TRUE; 6139 } 6140 match = -3; /* exit from while loop */ 6141 break; 6142 } 6143 6144 /* Match occurred. Construct a tag. */ 6145 while (charno < rp->regs.end[0]) 6146 if (buffer[charno++] == '\n') 6147 lineno++, linecharno = charno; 6148 name = rp->name; 6149 if (name[0] == '\0') 6150 name = NULL; 6151 else /* make a named tag */ 6152 name = substitute (buffer, rp->name, &rp->regs); 6153 if (rp->force_explicit_name) 6154 /* Force explicit tag name, if a name is there. */ 6155 pfnote (name, TRUE, buffer + linecharno, 6156 charno - linecharno + 1, lineno, linecharno); 6157 else 6158 make_tag (name, strlen (name), TRUE, buffer + linecharno, 6159 charno - linecharno + 1, lineno, linecharno); 6160 break; 6161 } 6162 } 6163 } 6164} 6165 6166 6167static bool 6168nocase_tail (cp) 6169 char *cp; 6170{ 6171 register int len = 0; 6172 6173 while (*cp != '\0' && lowcase (*cp) == lowcase (dbp[len])) 6174 cp++, len++; 6175 if (*cp == '\0' && !intoken (dbp[len])) 6176 { 6177 dbp += len; 6178 return TRUE; 6179 } 6180 return FALSE; 6181} 6182 6183static void 6184get_tag (bp, namepp) 6185 register char *bp; 6186 char **namepp; 6187{ 6188 register char *cp = bp; 6189 6190 if (*bp != '\0') 6191 { 6192 /* Go till you get to white space or a syntactic break */ 6193 for (cp = bp + 1; !notinname (*cp); cp++) 6194 continue; 6195 make_tag (bp, cp - bp, TRUE, 6196 lb.buffer, cp - lb.buffer + 1, lineno, linecharno); 6197 } 6198 6199 if (namepp != NULL) 6200 *namepp = savenstr (bp, cp - bp); 6201} 6202 6203/* 6204 * Read a line of text from `stream' into `lbp', excluding the 6205 * newline or CR-NL, if any. Return the number of characters read from 6206 * `stream', which is the length of the line including the newline. 6207 * 6208 * On DOS or Windows we do not count the CR character, if any before the 6209 * NL, in the returned length; this mirrors the behavior of Emacs on those 6210 * platforms (for text files, it translates CR-NL to NL as it reads in the 6211 * file). 6212 * 6213 * If multi-line regular expressions are requested, each line read is 6214 * appended to `filebuf'. 6215 */ 6216static long 6217readline_internal (lbp, stream) 6218 linebuffer *lbp; 6219 register FILE *stream; 6220{ 6221 char *buffer = lbp->buffer; 6222 register char *p = lbp->buffer; 6223 register char *pend; 6224 int chars_deleted; 6225 6226 pend = p + lbp->size; /* Separate to avoid 386/IX compiler bug. */ 6227 6228 for (;;) 6229 { 6230 register int c = getc (stream); 6231 if (p == pend) 6232 { 6233 /* We're at the end of linebuffer: expand it. */ 6234 lbp->size *= 2; 6235 xrnew (buffer, lbp->size, char); 6236 p += buffer - lbp->buffer; 6237 pend = buffer + lbp->size; 6238 lbp->buffer = buffer; 6239 } 6240 if (c == EOF) 6241 { 6242 *p = '\0'; 6243 chars_deleted = 0; 6244 break; 6245 } 6246 if (c == '\n') 6247 { 6248 if (p > buffer && p[-1] == '\r') 6249 { 6250 p -= 1; 6251#ifdef DOS_NT 6252 /* Assume CRLF->LF translation will be performed by Emacs 6253 when loading this file, so CRs won't appear in the buffer. 6254 It would be cleaner to compensate within Emacs; 6255 however, Emacs does not know how many CRs were deleted 6256 before any given point in the file. */ 6257 chars_deleted = 1; 6258#else 6259 chars_deleted = 2; 6260#endif 6261 } 6262 else 6263 { 6264 chars_deleted = 1; 6265 } 6266 *p = '\0'; 6267 break; 6268 } 6269 *p++ = c; 6270 } 6271 lbp->len = p - buffer; 6272 6273 if (need_filebuf /* we need filebuf for multi-line regexps */ 6274 && chars_deleted > 0) /* not at EOF */ 6275 { 6276 while (filebuf.size <= filebuf.len + lbp->len + 1) /* +1 for \n */ 6277 { 6278 /* Expand filebuf. */ 6279 filebuf.size *= 2; 6280 xrnew (filebuf.buffer, filebuf.size, char); 6281 } 6282 strncpy (filebuf.buffer + filebuf.len, lbp->buffer, lbp->len); 6283 filebuf.len += lbp->len; 6284 filebuf.buffer[filebuf.len++] = '\n'; 6285 filebuf.buffer[filebuf.len] = '\0'; 6286 } 6287 6288 return lbp->len + chars_deleted; 6289} 6290 6291/* 6292 * Like readline_internal, above, but in addition try to match the 6293 * input line against relevant regular expressions and manage #line 6294 * directives. 6295 */ 6296static void 6297readline (lbp, stream) 6298 linebuffer *lbp; 6299 FILE *stream; 6300{ 6301 long result; 6302 6303 linecharno = charno; /* update global char number of line start */ 6304 result = readline_internal (lbp, stream); /* read line */ 6305 lineno += 1; /* increment global line number */ 6306 charno += result; /* increment global char number */ 6307 6308 /* Honour #line directives. */ 6309 if (!no_line_directive) 6310 { 6311 static bool discard_until_line_directive; 6312 6313 /* Check whether this is a #line directive. */ 6314 if (result > 12 && strneq (lbp->buffer, "#line ", 6)) 6315 { 6316 unsigned int lno; 6317 int start = 0; 6318 6319 if (sscanf (lbp->buffer, "#line %u \"%n", &lno, &start) >= 1 6320 && start > 0) /* double quote character found */ 6321 { 6322 char *endp = lbp->buffer + start; 6323 6324 while ((endp = etags_strchr (endp, '"')) != NULL 6325 && endp[-1] == '\\') 6326 endp++; 6327 if (endp != NULL) 6328 /* Ok, this is a real #line directive. Let's deal with it. */ 6329 { 6330 char *taggedabsname; /* absolute name of original file */ 6331 char *taggedfname; /* name of original file as given */ 6332 char *name; /* temp var */ 6333 6334 discard_until_line_directive = FALSE; /* found it */ 6335 name = lbp->buffer + start; 6336 *endp = '\0'; 6337 canonicalize_filename (name); /* for DOS */ 6338 taggedabsname = absolute_filename (name, tagfiledir); 6339 if (filename_is_absolute (name) 6340 || filename_is_absolute (curfdp->infname)) 6341 taggedfname = savestr (taggedabsname); 6342 else 6343 taggedfname = relative_filename (taggedabsname,tagfiledir); 6344 6345 if (streq (curfdp->taggedfname, taggedfname)) 6346 /* The #line directive is only a line number change. We 6347 deal with this afterwards. */ 6348 free (taggedfname); 6349 else 6350 /* The tags following this #line directive should be 6351 attributed to taggedfname. In order to do this, set 6352 curfdp accordingly. */ 6353 { 6354 fdesc *fdp; /* file description pointer */ 6355 6356 /* Go look for a file description already set up for the 6357 file indicated in the #line directive. If there is 6358 one, use it from now until the next #line 6359 directive. */ 6360 for (fdp = fdhead; fdp != NULL; fdp = fdp->next) 6361 if (streq (fdp->infname, curfdp->infname) 6362 && streq (fdp->taggedfname, taggedfname)) 6363 /* If we remove the second test above (after the &&) 6364 then all entries pertaining to the same file are 6365 coalesced in the tags file. If we use it, then 6366 entries pertaining to the same file but generated 6367 from different files (via #line directives) will 6368 go into separate sections in the tags file. These 6369 alternatives look equivalent. The first one 6370 destroys some apparently useless information. */ 6371 { 6372 curfdp = fdp; 6373 free (taggedfname); 6374 break; 6375 } 6376 /* Else, if we already tagged the real file, skip all 6377 input lines until the next #line directive. */ 6378 if (fdp == NULL) /* not found */ 6379 for (fdp = fdhead; fdp != NULL; fdp = fdp->next) 6380 if (streq (fdp->infabsname, taggedabsname)) 6381 { 6382 discard_until_line_directive = TRUE; 6383 free (taggedfname); 6384 break; 6385 } 6386 /* Else create a new file description and use that from 6387 now on, until the next #line directive. */ 6388 if (fdp == NULL) /* not found */ 6389 { 6390 fdp = fdhead; 6391 fdhead = xnew (1, fdesc); 6392 *fdhead = *curfdp; /* copy curr. file description */ 6393 fdhead->next = fdp; 6394 fdhead->infname = savestr (curfdp->infname); 6395 fdhead->infabsname = savestr (curfdp->infabsname); 6396 fdhead->infabsdir = savestr (curfdp->infabsdir); 6397 fdhead->taggedfname = taggedfname; 6398 fdhead->usecharno = FALSE; 6399 fdhead->prop = NULL; 6400 fdhead->written = FALSE; 6401 curfdp = fdhead; 6402 } 6403 } 6404 free (taggedabsname); 6405 lineno = lno - 1; 6406 readline (lbp, stream); 6407 return; 6408 } /* if a real #line directive */ 6409 } /* if #line is followed by a a number */ 6410 } /* if line begins with "#line " */ 6411 6412 /* If we are here, no #line directive was found. */ 6413 if (discard_until_line_directive) 6414 { 6415 if (result > 0) 6416 { 6417 /* Do a tail recursion on ourselves, thus discarding the contents 6418 of the line buffer. */ 6419 readline (lbp, stream); 6420 return; 6421 } 6422 /* End of file. */ 6423 discard_until_line_directive = FALSE; 6424 return; 6425 } 6426 } /* if #line directives should be considered */ 6427 6428 { 6429 int match; 6430 regexp *rp; 6431 char *name; 6432 6433 /* Match against relevant regexps. */ 6434 if (lbp->len > 0) 6435 for (rp = p_head; rp != NULL; rp = rp->p_next) 6436 { 6437 /* Only use generic regexps or those for the current language. 6438 Also do not use multiline regexps, which is the job of 6439 regex_tag_multiline. */ 6440 if ((rp->lang != NULL && rp->lang != fdhead->lang) 6441 || rp->multi_line) 6442 continue; 6443 6444 match = re_match (rp->pat, lbp->buffer, lbp->len, 0, &rp->regs); 6445 switch (match) 6446 { 6447 case -2: 6448 /* Some error. */ 6449 if (!rp->error_signaled) 6450 { 6451 error ("regexp stack overflow while matching \"%s\"", 6452 rp->pattern); 6453 rp->error_signaled = TRUE; 6454 } 6455 break; 6456 case -1: 6457 /* No match. */ 6458 break; 6459 case 0: 6460 /* Empty string matched. */ 6461 if (!rp->error_signaled) 6462 { 6463 error ("regexp matches the empty string: \"%s\"", rp->pattern); 6464 rp->error_signaled = TRUE; 6465 } 6466 break; 6467 default: 6468 /* Match occurred. Construct a tag. */ 6469 name = rp->name; 6470 if (name[0] == '\0') 6471 name = NULL; 6472 else /* make a named tag */ 6473 name = substitute (lbp->buffer, rp->name, &rp->regs); 6474 if (rp->force_explicit_name) 6475 /* Force explicit tag name, if a name is there. */ 6476 pfnote (name, TRUE, lbp->buffer, match, lineno, linecharno); 6477 else 6478 make_tag (name, strlen (name), TRUE, 6479 lbp->buffer, match, lineno, linecharno); 6480 break; 6481 } 6482 } 6483 } 6484} 6485 6486 6487/* 6488 * Return a pointer to a space of size strlen(cp)+1 allocated 6489 * with xnew where the string CP has been copied. 6490 */ 6491static char * 6492savestr (cp) 6493 char *cp; 6494{ 6495 return savenstr (cp, strlen (cp)); 6496} 6497 6498/* 6499 * Return a pointer to a space of size LEN+1 allocated with xnew where 6500 * the string CP has been copied for at most the first LEN characters. 6501 */ 6502static char * 6503savenstr (cp, len) 6504 char *cp; 6505 int len; 6506{ 6507 register char *dp; 6508 6509 dp = xnew (len + 1, char); 6510 strncpy (dp, cp, len); 6511 dp[len] = '\0'; 6512 return dp; 6513} 6514 6515/* 6516 * Return the ptr in sp at which the character c last 6517 * appears; NULL if not found 6518 * 6519 * Identical to POSIX strrchr, included for portability. 6520 */ 6521static char * 6522etags_strrchr (sp, c) 6523 register const char *sp; 6524 register int c; 6525{ 6526 register const char *r; 6527 6528 r = NULL; 6529 do 6530 { 6531 if (*sp == c) 6532 r = sp; 6533 } while (*sp++); 6534 return (char *)r; 6535} 6536 6537/* 6538 * Return the ptr in sp at which the character c first 6539 * appears; NULL if not found 6540 * 6541 * Identical to POSIX strchr, included for portability. 6542 */ 6543static char * 6544etags_strchr (sp, c) 6545 register const char *sp; 6546 register int c; 6547{ 6548 do 6549 { 6550 if (*sp == c) 6551 return (char *)sp; 6552 } while (*sp++); 6553 return NULL; 6554} 6555 6556/* 6557 * Compare two strings, ignoring case for alphabetic characters. 6558 * 6559 * Same as BSD's strcasecmp, included for portability. 6560 */ 6561static int 6562etags_strcasecmp (s1, s2) 6563 register const char *s1; 6564 register const char *s2; 6565{ 6566 while (*s1 != '\0' 6567 && (ISALPHA (*s1) && ISALPHA (*s2) 6568 ? lowcase (*s1) == lowcase (*s2) 6569 : *s1 == *s2)) 6570 s1++, s2++; 6571 6572 return (ISALPHA (*s1) && ISALPHA (*s2) 6573 ? lowcase (*s1) - lowcase (*s2) 6574 : *s1 - *s2); 6575} 6576 6577/* 6578 * Compare two strings, ignoring case for alphabetic characters. 6579 * Stop after a given number of characters 6580 * 6581 * Same as BSD's strncasecmp, included for portability. 6582 */ 6583static int 6584etags_strncasecmp (s1, s2, n) 6585 register const char *s1; 6586 register const char *s2; 6587 register int n; 6588{ 6589 while (*s1 != '\0' && n-- > 0 6590 && (ISALPHA (*s1) && ISALPHA (*s2) 6591 ? lowcase (*s1) == lowcase (*s2) 6592 : *s1 == *s2)) 6593 s1++, s2++; 6594 6595 if (n < 0) 6596 return 0; 6597 else 6598 return (ISALPHA (*s1) && ISALPHA (*s2) 6599 ? lowcase (*s1) - lowcase (*s2) 6600 : *s1 - *s2); 6601} 6602 6603/* Skip spaces (end of string is not space), return new pointer. */ 6604static char * 6605skip_spaces (cp) 6606 char *cp; 6607{ 6608 while (iswhite (*cp)) 6609 cp++; 6610 return cp; 6611} 6612 6613/* Skip non spaces, except end of string, return new pointer. */ 6614static char * 6615skip_non_spaces (cp) 6616 char *cp; 6617{ 6618 while (*cp != '\0' && !iswhite (*cp)) 6619 cp++; 6620 return cp; 6621} 6622 6623/* Print error message and exit. */ 6624void 6625fatal (s1, s2) 6626 char *s1, *s2; 6627{ 6628 error (s1, s2); 6629 exit (EXIT_FAILURE); 6630} 6631 6632static void 6633pfatal (s1) 6634 char *s1; 6635{ 6636 perror (s1); 6637 exit (EXIT_FAILURE); 6638} 6639 6640static void 6641suggest_asking_for_help () 6642{ 6643 fprintf (stderr, "\tTry `%s %s' for a complete list of options.\n", 6644 progname, NO_LONG_OPTIONS ? "-h" : "--help"); 6645 exit (EXIT_FAILURE); 6646} 6647 6648/* Print error message. `s1' is printf control string, `s2' is arg for it. */ 6649static void 6650error (s1, s2) 6651 const char *s1, *s2; 6652{ 6653 fprintf (stderr, "%s: ", progname); 6654 fprintf (stderr, s1, s2); 6655 fprintf (stderr, "\n"); 6656} 6657 6658/* Return a newly-allocated string whose contents 6659 concatenate those of s1, s2, s3. */ 6660static char * 6661concat (s1, s2, s3) 6662 char *s1, *s2, *s3; 6663{ 6664 int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3); 6665 char *result = xnew (len1 + len2 + len3 + 1, char); 6666 6667 strcpy (result, s1); 6668 strcpy (result + len1, s2); 6669 strcpy (result + len1 + len2, s3); 6670 result[len1 + len2 + len3] = '\0'; 6671 6672 return result; 6673} 6674 6675 6676/* Does the same work as the system V getcwd, but does not need to 6677 guess the buffer size in advance. */ 6678static char * 6679etags_getcwd () 6680{ 6681#ifdef HAVE_GETCWD 6682 int bufsize = 200; 6683 char *path = xnew (bufsize, char); 6684 6685 while (getcwd (path, bufsize) == NULL) 6686 { 6687 if (errno != ERANGE) 6688 pfatal ("getcwd"); 6689 bufsize *= 2; 6690 free (path); 6691 path = xnew (bufsize, char); 6692 } 6693 6694 canonicalize_filename (path); 6695 return path; 6696 6697#else /* not HAVE_GETCWD */ 6698#if MSDOS 6699 6700 char *p, path[MAXPATHLEN + 1]; /* Fixed size is safe on MSDOS. */ 6701 6702 getwd (path); 6703 6704 for (p = path; *p != '\0'; p++) 6705 if (*p == '\\') 6706 *p = '/'; 6707 else 6708 *p = lowcase (*p); 6709 6710 return strdup (path); 6711#else /* not MSDOS */ 6712 linebuffer path; 6713 FILE *pipe; 6714 6715 linebuffer_init (&path); 6716 pipe = (FILE *) popen ("pwd 2>/dev/null", "r"); 6717 if (pipe == NULL || readline_internal (&path, pipe) == 0) 6718 pfatal ("pwd"); 6719 pclose (pipe); 6720 6721 return path.buffer; 6722#endif /* not MSDOS */ 6723#endif /* not HAVE_GETCWD */ 6724} 6725 6726/* Return a newly allocated string containing the file name of FILE 6727 relative to the absolute directory DIR (which should end with a slash). */ 6728static char * 6729relative_filename (file, dir) 6730 char *file, *dir; 6731{ 6732 char *fp, *dp, *afn, *res; 6733 int i; 6734 6735 /* Find the common root of file and dir (with a trailing slash). */ 6736 afn = absolute_filename (file, cwd); 6737 fp = afn; 6738 dp = dir; 6739 while (*fp++ == *dp++) 6740 continue; 6741 fp--, dp--; /* back to the first differing char */ 6742#ifdef DOS_NT 6743 if (fp == afn && afn[0] != '/') /* cannot build a relative name */ 6744 return afn; 6745#endif 6746 do /* look at the equal chars until '/' */ 6747 fp--, dp--; 6748 while (*fp != '/'); 6749 6750 /* Build a sequence of "../" strings for the resulting relative file name. */ 6751 i = 0; 6752 while ((dp = etags_strchr (dp + 1, '/')) != NULL) 6753 i += 1; 6754 res = xnew (3*i + strlen (fp + 1) + 1, char); 6755 res[0] = '\0'; 6756 while (i-- > 0) 6757 strcat (res, "../"); 6758 6759 /* Add the file name relative to the common root of file and dir. */ 6760 strcat (res, fp + 1); 6761 free (afn); 6762 6763 return res; 6764} 6765 6766/* Return a newly allocated string containing the absolute file name 6767 of FILE given DIR (which should end with a slash). */ 6768static char * 6769absolute_filename (file, dir) 6770 char *file, *dir; 6771{ 6772 char *slashp, *cp, *res; 6773 6774 if (filename_is_absolute (file)) 6775 res = savestr (file); 6776#ifdef DOS_NT 6777 /* We don't support non-absolute file names with a drive 6778 letter, like `d:NAME' (it's too much hassle). */ 6779 else if (file[1] == ':') 6780 fatal ("%s: relative file names with drive letters not supported", file); 6781#endif 6782 else 6783 res = concat (dir, file, ""); 6784 6785 /* Delete the "/dirname/.." and "/." substrings. */ 6786 slashp = etags_strchr (res, '/'); 6787 while (slashp != NULL && slashp[0] != '\0') 6788 { 6789 if (slashp[1] == '.') 6790 { 6791 if (slashp[2] == '.' 6792 && (slashp[3] == '/' || slashp[3] == '\0')) 6793 { 6794 cp = slashp; 6795 do 6796 cp--; 6797 while (cp >= res && !filename_is_absolute (cp)); 6798 if (cp < res) 6799 cp = slashp; /* the absolute name begins with "/.." */ 6800#ifdef DOS_NT 6801 /* Under MSDOS and NT we get `d:/NAME' as absolute 6802 file name, so the luser could say `d:/../NAME'. 6803 We silently treat this as `d:/NAME'. */ 6804 else if (cp[0] != '/') 6805 cp = slashp; 6806#endif 6807 memmove (cp, slashp + 3, strlen(slashp + 3) + 1); 6808 slashp = cp; 6809 continue; 6810 } 6811 else if (slashp[2] == '/' || slashp[2] == '\0') 6812 { 6813 memmove (slashp, slashp + 2, strlen(slashp + 2) + 1); 6814 continue; 6815 } 6816 } 6817 6818 slashp = etags_strchr (slashp + 1, '/'); 6819 } 6820 6821 if (res[0] == '\0') /* just a safety net: should never happen */ 6822 { 6823 free (res); 6824 return savestr ("/"); 6825 } 6826 else 6827 return res; 6828} 6829 6830/* Return a newly allocated string containing the absolute 6831 file name of dir where FILE resides given DIR (which should 6832 end with a slash). */ 6833static char * 6834absolute_dirname (file, dir) 6835 char *file, *dir; 6836{ 6837 char *slashp, *res; 6838 char save; 6839 6840 canonicalize_filename (file); 6841 slashp = etags_strrchr (file, '/'); 6842 if (slashp == NULL) 6843 return savestr (dir); 6844 save = slashp[1]; 6845 slashp[1] = '\0'; 6846 res = absolute_filename (file, dir); 6847 slashp[1] = save; 6848 6849 return res; 6850} 6851 6852/* Whether the argument string is an absolute file name. The argument 6853 string must have been canonicalized with canonicalize_filename. */ 6854static bool 6855filename_is_absolute (fn) 6856 char *fn; 6857{ 6858 return (fn[0] == '/' 6859#ifdef DOS_NT 6860 || (ISALPHA(fn[0]) && fn[1] == ':' && fn[2] == '/') 6861#endif 6862 ); 6863} 6864 6865/* Translate backslashes into slashes. Works in place. */ 6866static void 6867canonicalize_filename (fn) 6868 register char *fn; 6869{ 6870#ifdef DOS_NT 6871 /* Canonicalize drive letter case. */ 6872 if (fn[0] != '\0' && fn[1] == ':' && ISLOWER (fn[0])) 6873 fn[0] = upcase (fn[0]); 6874 /* Convert backslashes to slashes. */ 6875 for (; *fn != '\0'; fn++) 6876 if (*fn == '\\') 6877 *fn = '/'; 6878#else 6879 /* No action. */ 6880 fn = NULL; /* shut up the compiler */ 6881#endif 6882} 6883 6884 6885/* Initialize a linebuffer for use */ 6886static void 6887linebuffer_init (lbp) 6888 linebuffer *lbp; 6889{ 6890 lbp->size = (DEBUG) ? 3 : 200; 6891 lbp->buffer = xnew (lbp->size, char); 6892 lbp->buffer[0] = '\0'; 6893 lbp->len = 0; 6894} 6895 6896/* Set the minimum size of a string contained in a linebuffer. */ 6897static void 6898linebuffer_setlen (lbp, toksize) 6899 linebuffer *lbp; 6900 int toksize; 6901{ 6902 while (lbp->size <= toksize) 6903 { 6904 lbp->size *= 2; 6905 xrnew (lbp->buffer, lbp->size, char); 6906 } 6907 lbp->len = toksize; 6908} 6909 6910/* Like malloc but get fatal error if memory is exhausted. */ 6911static PTR 6912xmalloc (size) 6913 unsigned int size; 6914{ 6915 PTR result = (PTR) malloc (size); 6916 if (result == NULL) 6917 fatal ("virtual memory exhausted", (char *)NULL); 6918 return result; 6919} 6920 6921static PTR 6922xrealloc (ptr, size) 6923 char *ptr; 6924 unsigned int size; 6925{ 6926 PTR result = (PTR) realloc (ptr, size); 6927 if (result == NULL) 6928 fatal ("virtual memory exhausted", (char *)NULL); 6929 return result; 6930} 6931 6932/* 6933 * Local Variables: 6934 * indent-tabs-mode: t 6935 * tab-width: 8 6936 * fill-column: 79 6937 * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node" "regexp") 6938 * c-file-style: "gnu" 6939 * End: 6940 */ 6941 6942/* arch-tag: 8a9b748d-390c-4922-99db-2eeefa921051 6943 (do not change this comment) */ 6944 6945/* etags.c ends here */ 6946