1/* Tags file maker to go with GNU Emacs           -*- coding: latin-1 -*-
2
3Copyright (C) 1984 The Regents of the University of California
4
5Redistribution and use in source and binary forms, with or without
6modification, are permitted provided that the following conditions are
7met:
81. Redistributions of source code must retain the above copyright
9   notice, this list of conditions and the following disclaimer.
102. Redistributions in binary form must reproduce the above copyright
11   notice, this list of conditions and the following disclaimer in the
12   documentation and/or other materials provided with the
13   distribution.
143. Neither the name of the University nor the names of its
15   contributors may be used to endorse or promote products derived
16   from this software without specific prior written permission.
17
18THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS''
19AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
20THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS
22BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
25BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
27OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
28IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30
31Copyright (C) 1984, 1987, 1988, 1989, 1993, 1994, 1995, 1998, 1999,
32  2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007
33  Free Software Foundation, Inc.
34
35This file is not considered part of GNU Emacs.
36
37This program is free software; you can redistribute it and/or modify
38it under the terms of the GNU General Public License as published by
39the Free Software Foundation; either version 2 of the License, or
40(at your option) any later version.
41
42This program is distributed in the hope that it will be useful,
43but WITHOUT ANY WARRANTY; without even the implied warranty of
44MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
45GNU General Public License for more details.
46
47You should have received a copy of the GNU General Public License
48along with this program; if not, write to the Free Software Foundation,
49Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
50
51
52/* NB To comply with the above BSD license, copyright information is
53reproduced in etc/ETAGS.README.  That file should be updated when the
54above notices are.
55
56To the best of our knowledge, this code was originally based on the
57ctags.c distributed with BSD4.2, which was copyrighted by the
58University of California, as described above. */
59
60
61/*
62 * Authors:
63 *	Ctags originally by Ken Arnold.
64 *	Fortran added by Jim Kleckner.
65 *	Ed Pelegri-Llopart added C typedefs.
66 *	Gnu Emacs TAGS format and modifications by RMS?
67 * 1989	Sam Kendall added C++.
68 * 1992 Joseph B. Wells improved C and C++ parsing.
69 * 1993	Francesco Potort� reorganised C and C++.
70 * 1994	Line-by-line regexp tags by Tom Tromey.
71 * 2001 Nested classes by Francesco Potort� (concept by Mykola Dzyuba).
72 * 2002 #line directives by Francesco Potort�.
73 *
74 * Francesco Potort� <pot@gnu.org> has maintained and improved it since 1993.
75 */
76
77/*
78 * If you want to add support for a new language, start by looking at the LUA
79 * language, which is the simplest.  Alternatively, consider shipping a
80 * configuration file containing regexp definitions for etags.
81 */
82
83char pot_etags_version[] = "@(#) pot revision number is 17.26";
84
85#define	TRUE	1
86#define	FALSE	0
87
88#ifdef DEBUG
89#  undef DEBUG
90#  define DEBUG TRUE
91#else
92#  define DEBUG  FALSE
93#  define NDEBUG		/* disable assert */
94#endif
95
96#ifdef HAVE_CONFIG_H
97# include <config.h>
98  /* On some systems, Emacs defines static as nothing for the sake
99     of unexec.  We don't want that here since we don't use unexec. */
100# undef static
101# ifndef PTR			/* for XEmacs */
102#   define PTR void *
103# endif
104# ifndef __P			/* for XEmacs */
105#   define __P(args) args
106# endif
107#else  /* no config.h */
108# if defined(__STDC__) && (__STDC__ || defined(__SUNPRO_C))
109#   define __P(args) args	/* use prototypes */
110#   define PTR void *		/* for generic pointers */
111# else /* not standard C */
112#   define __P(args) ()		/* no prototypes */
113#   define const		/* remove const for old compilers' sake */
114#   define PTR long *		/* don't use void* */
115# endif
116#endif /* !HAVE_CONFIG_H */
117
118#ifndef _GNU_SOURCE
119# define _GNU_SOURCE 1		/* enables some compiler checks on GNU */
120#endif
121
122/* WIN32_NATIVE is for XEmacs.
123   MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
124#ifdef WIN32_NATIVE
125# undef MSDOS
126# undef  WINDOWSNT
127# define WINDOWSNT
128#endif /* WIN32_NATIVE */
129
130#ifdef MSDOS
131# undef MSDOS
132# define MSDOS TRUE
133# include <fcntl.h>
134# include <sys/param.h>
135# include <io.h>
136# ifndef HAVE_CONFIG_H
137#   define DOS_NT
138#   include <sys/config.h>
139# endif
140#else
141# define MSDOS FALSE
142#endif /* MSDOS */
143
144#ifdef WINDOWSNT
145# include <stdlib.h>
146# include <fcntl.h>
147# include <string.h>
148# include <direct.h>
149# include <io.h>
150# define MAXPATHLEN _MAX_PATH
151# undef HAVE_NTGUI
152# undef  DOS_NT
153# define DOS_NT
154# ifndef HAVE_GETCWD
155#   define HAVE_GETCWD
156# endif /* undef HAVE_GETCWD */
157#else /* not WINDOWSNT */
158# ifdef STDC_HEADERS
159#  include <stdlib.h>
160#  include <string.h>
161# else /* no standard C headers */
162    extern char *getenv ();
163#  ifdef VMS
164#   define EXIT_SUCCESS	1
165#   define EXIT_FAILURE	0
166#  else /* no VMS */
167#   define EXIT_SUCCESS	0
168#   define EXIT_FAILURE	1
169#  endif
170# endif
171#endif /* !WINDOWSNT */
172
173#ifdef HAVE_UNISTD_H
174# include <unistd.h>
175#else
176# if defined (HAVE_GETCWD) && !defined (WINDOWSNT)
177    extern char *getcwd (char *buf, size_t size);
178# endif
179#endif /* HAVE_UNISTD_H */
180
181#include <stdio.h>
182#include <ctype.h>
183#include <errno.h>
184#ifndef errno
185  extern int errno;
186#endif
187#include <sys/types.h>
188#include <sys/stat.h>
189
190#include <assert.h>
191#ifdef NDEBUG
192# undef  assert			/* some systems have a buggy assert.h */
193# define assert(x) ((void) 0)
194#endif
195
196#if !defined (S_ISREG) && defined (S_IFREG)
197# define S_ISREG(m)	(((m) & S_IFMT) == S_IFREG)
198#endif
199
200#ifdef NO_LONG_OPTIONS		/* define this if you don't have GNU getopt */
201# define NO_LONG_OPTIONS TRUE
202# define getopt_long(argc,argv,optstr,lopts,lind) getopt (argc, argv, optstr)
203  extern char *optarg;
204  extern int optind, opterr;
205#else
206# define NO_LONG_OPTIONS FALSE
207# include <getopt.h>
208#endif /* NO_LONG_OPTIONS */
209
210#ifndef HAVE_CONFIG_H		/* this is a standalone compilation */
211# ifdef __CYGWIN__         	/* compiling on Cygwin */
212			     !!! NOTICE !!!
213 the regex.h distributed with Cygwin is not compatible with etags, alas!
214If you want regular expression support, you should delete this notice and
215	      arrange to use the GNU regex.h and regex.c.
216# endif
217#endif
218#include <regex.h>
219
220/* Define CTAGS to make the program "ctags" compatible with the usual one.
221 Leave it undefined to make the program "etags", which makes emacs-style
222 tag tables and tags typedefs, #defines and struct/union/enum by default. */
223#ifdef CTAGS
224# undef  CTAGS
225# define CTAGS TRUE
226#else
227# define CTAGS FALSE
228#endif
229
230#define streq(s,t)	(assert((s)!=NULL || (t)!=NULL), !strcmp (s, t))
231#define strcaseeq(s,t)	(assert((s)!=NULL && (t)!=NULL), !etags_strcasecmp (s, t))
232#define strneq(s,t,n)	(assert((s)!=NULL || (t)!=NULL), !strncmp (s, t, n))
233#define strncaseeq(s,t,n) (assert((s)!=NULL && (t)!=NULL), !etags_strncasecmp (s, t, n))
234
235#define CHARS 256		/* 2^sizeof(char) */
236#define CHAR(x)		((unsigned int)(x) & (CHARS - 1))
237#define	iswhite(c)	(_wht[CHAR(c)]) /* c is white (see white) */
238#define notinname(c)	(_nin[CHAR(c)]) /* c is not in a name (see nonam) */
239#define	begtoken(c)	(_btk[CHAR(c)]) /* c can start token (see begtk) */
240#define	intoken(c)	(_itk[CHAR(c)]) /* c can be in token (see midtk) */
241#define	endtoken(c)	(_etk[CHAR(c)]) /* c ends tokens (see endtk) */
242
243#define ISALNUM(c)	isalnum (CHAR(c))
244#define ISALPHA(c)	isalpha (CHAR(c))
245#define ISDIGIT(c)	isdigit (CHAR(c))
246#define ISLOWER(c)	islower (CHAR(c))
247
248#define lowcase(c)	tolower (CHAR(c))
249#define upcase(c)	toupper (CHAR(c))
250
251
252/*
253 *	xnew, xrnew -- allocate, reallocate storage
254 *
255 * SYNOPSIS:	Type *xnew (int n, Type);
256 *		void xrnew (OldPointer, int n, Type);
257 */
258#if DEBUG
259# include "chkmalloc.h"
260# define xnew(n,Type)	  ((Type *) trace_malloc (__FILE__, __LINE__, \
261						  (n) * sizeof (Type)))
262# define xrnew(op,n,Type) ((op) = (Type *) trace_realloc (__FILE__, __LINE__, \
263					(char *) (op), (n) * sizeof (Type)))
264#else
265# define xnew(n,Type)	  ((Type *) xmalloc ((n) * sizeof (Type)))
266# define xrnew(op,n,Type) ((op) = (Type *) xrealloc ( \
267					(char *) (op), (n) * sizeof (Type)))
268#endif
269
270#define bool int
271
272typedef void Lang_function __P((FILE *));
273
274typedef struct
275{
276  char *suffix;			/* file name suffix for this compressor */
277  char *command;		/* takes one arg and decompresses to stdout */
278} compressor;
279
280typedef struct
281{
282  char *name;			/* language name */
283  char *help;                   /* detailed help for the language */
284  Lang_function *function;	/* parse function */
285  char **suffixes;		/* name suffixes of this language's files */
286  char **filenames;		/* names of this language's files */
287  char **interpreters;		/* interpreters for this language */
288  bool metasource;		/* source used to generate other sources */
289} language;
290
291typedef struct fdesc
292{
293  struct fdesc *next;		/* for the linked list */
294  char *infname;		/* uncompressed input file name */
295  char *infabsname;		/* absolute uncompressed input file name */
296  char *infabsdir;		/* absolute dir of input file */
297  char *taggedfname;		/* file name to write in tagfile */
298  language *lang;		/* language of file */
299  char *prop;			/* file properties to write in tagfile */
300  bool usecharno;		/* etags tags shall contain char number */
301  bool written;			/* entry written in the tags file */
302} fdesc;
303
304typedef struct node_st
305{				/* sorting structure */
306  struct node_st *left, *right;	/* left and right sons */
307  fdesc *fdp;			/* description of file to whom tag belongs */
308  char *name;			/* tag name */
309  char *regex;			/* search regexp */
310  bool valid;			/* write this tag on the tag file */
311  bool is_func;			/* function tag: use regexp in CTAGS mode */
312  bool been_warned;		/* warning already given for duplicated tag */
313  int lno;			/* line number tag is on */
314  long cno;			/* character number line starts on */
315} node;
316
317/*
318 * A `linebuffer' is a structure which holds a line of text.
319 * `readline_internal' reads a line from a stream into a linebuffer
320 * and works regardless of the length of the line.
321 * SIZE is the size of BUFFER, LEN is the length of the string in
322 * BUFFER after readline reads it.
323 */
324typedef struct
325{
326  long size;
327  int len;
328  char *buffer;
329} linebuffer;
330
331/* Used to support mixing of --lang and file names. */
332typedef struct
333{
334  enum {
335    at_language,		/* a language specification */
336    at_regexp,			/* a regular expression */
337    at_filename,		/* a file name */
338    at_stdin,			/* read from stdin here */
339    at_end			/* stop parsing the list */
340  } arg_type;			/* argument type */
341  language *lang;		/* language associated with the argument */
342  char *what;			/* the argument itself */
343} argument;
344
345/* Structure defining a regular expression. */
346typedef struct regexp
347{
348  struct regexp *p_next;	/* pointer to next in list */
349  language *lang;		/* if set, use only for this language */
350  char *pattern;		/* the regexp pattern */
351  char *name;			/* tag name */
352  struct re_pattern_buffer *pat; /* the compiled pattern */
353  struct re_registers regs;	/* re registers */
354  bool error_signaled;		/* already signaled for this regexp */
355  bool force_explicit_name;	/* do not allow implict tag name */
356  bool ignore_case;		/* ignore case when matching */
357  bool multi_line;		/* do a multi-line match on the whole file */
358} regexp;
359
360
361/* Many compilers barf on this:
362	Lang_function Ada_funcs;
363   so let's write it this way */
364static void Ada_funcs __P((FILE *));
365static void Asm_labels __P((FILE *));
366static void C_entries __P((int c_ext, FILE *));
367static void default_C_entries __P((FILE *));
368static void plain_C_entries __P((FILE *));
369static void Cjava_entries __P((FILE *));
370static void Cobol_paragraphs __P((FILE *));
371static void Cplusplus_entries __P((FILE *));
372static void Cstar_entries __P((FILE *));
373static void Erlang_functions __P((FILE *));
374static void Forth_words __P((FILE *));
375static void Fortran_functions __P((FILE *));
376static void HTML_labels __P((FILE *));
377static void Lisp_functions __P((FILE *));
378static void Lua_functions __P((FILE *));
379static void Makefile_targets __P((FILE *));
380static void Pascal_functions __P((FILE *));
381static void Perl_functions __P((FILE *));
382static void PHP_functions __P((FILE *));
383static void PS_functions __P((FILE *));
384static void Prolog_functions __P((FILE *));
385static void Python_functions __P((FILE *));
386static void Scheme_functions __P((FILE *));
387static void TeX_commands __P((FILE *));
388static void Texinfo_nodes __P((FILE *));
389static void Yacc_entries __P((FILE *));
390static void just_read_file __P((FILE *));
391
392static void print_language_names __P((void));
393static void print_version __P((void));
394static void print_help __P((argument *));
395int main __P((int, char **));
396
397static compressor *get_compressor_from_suffix __P((char *, char **));
398static language *get_language_from_langname __P((const char *));
399static language *get_language_from_interpreter __P((char *));
400static language *get_language_from_filename __P((char *, bool));
401static void readline __P((linebuffer *, FILE *));
402static long readline_internal __P((linebuffer *, FILE *));
403static bool nocase_tail __P((char *));
404static void get_tag __P((char *, char **));
405
406static void analyse_regex __P((char *));
407static void free_regexps __P((void));
408static void regex_tag_multiline __P((void));
409static void error __P((const char *, const char *));
410static void suggest_asking_for_help __P((void));
411void fatal __P((char *, char *));
412static void pfatal __P((char *));
413static void add_node __P((node *, node **));
414
415static void init __P((void));
416static void process_file_name __P((char *, language *));
417static void process_file __P((FILE *, char *, language *));
418static void find_entries __P((FILE *));
419static void free_tree __P((node *));
420static void free_fdesc __P((fdesc *));
421static void pfnote __P((char *, bool, char *, int, int, long));
422static void make_tag __P((char *, int, bool, char *, int, int, long));
423static void invalidate_nodes __P((fdesc *, node **));
424static void put_entries __P((node *));
425
426static char *concat __P((char *, char *, char *));
427static char *skip_spaces __P((char *));
428static char *skip_non_spaces __P((char *));
429static char *savenstr __P((char *, int));
430static char *savestr __P((char *));
431static char *etags_strchr __P((const char *, int));
432static char *etags_strrchr __P((const char *, int));
433static int etags_strcasecmp __P((const char *, const char *));
434static int etags_strncasecmp __P((const char *, const char *, int));
435static char *etags_getcwd __P((void));
436static char *relative_filename __P((char *, char *));
437static char *absolute_filename __P((char *, char *));
438static char *absolute_dirname __P((char *, char *));
439static bool filename_is_absolute __P((char *f));
440static void canonicalize_filename __P((char *));
441static void linebuffer_init __P((linebuffer *));
442static void linebuffer_setlen __P((linebuffer *, int));
443static PTR xmalloc __P((unsigned int));
444static PTR xrealloc __P((char *, unsigned int));
445
446
447static char searchar = '/';	/* use /.../ searches */
448
449static char *tagfile;		/* output file */
450static char *progname;		/* name this program was invoked with */
451static char *cwd;		/* current working directory */
452static char *tagfiledir;	/* directory of tagfile */
453static FILE *tagf;		/* ioptr for tags file */
454
455static fdesc *fdhead;		/* head of file description list */
456static fdesc *curfdp;		/* current file description */
457static int lineno;		/* line number of current line */
458static long charno;		/* current character number */
459static long linecharno;		/* charno of start of current line */
460static char *dbp;		/* pointer to start of current tag */
461
462static const int invalidcharno = -1;
463
464static node *nodehead;		/* the head of the binary tree of tags */
465static node *last_node;		/* the last node created */
466
467static linebuffer lb;		/* the current line */
468static linebuffer filebuf;	/* a buffer containing the whole file */
469static linebuffer token_name;	/* a buffer containing a tag name */
470
471/* boolean "functions" (see init)	*/
472static bool _wht[CHARS], _nin[CHARS], _itk[CHARS], _btk[CHARS], _etk[CHARS];
473static char
474  /* white chars */
475  *white = " \f\t\n\r\v",
476  /* not in a name */
477  *nonam = " \f\t\n\r()=,;",	/* look at make_tag before modifying! */
478  /* token ending chars */
479  *endtk = " \t\n\r\"'#()[]{}=-+%*/&|^~!<>;,.:?",
480  /* token starting chars */
481  *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$~@",
482  /* valid in-token chars */
483  *midtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$0123456789";
484
485static bool append_to_tagfile;	/* -a: append to tags */
486/* The next four default to TRUE for etags, but to FALSE for ctags.  */
487static bool typedefs;		/* -t: create tags for C and Ada typedefs */
488static bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
489				/* 0 struct/enum/union decls, and C++ */
490				/* member functions. */
491static bool constantypedefs;	/* -d: create tags for C #define, enum */
492				/* constants and variables. */
493				/* -D: opposite of -d.  Default under ctags. */
494static bool globals;		/* create tags for global variables */
495static bool members;		/* create tags for C member variables */
496static bool declarations;	/* --declarations: tag them and extern in C&Co*/
497static bool no_line_directive;	/* ignore #line directives (undocumented) */
498static bool no_duplicates;	/* no duplicate tags for ctags (undocumented) */
499static bool update;		/* -u: update tags */
500static bool vgrind_style;	/* -v: create vgrind style index output */
501static bool no_warnings;	/* -w: suppress warnings (undocumented) */
502static bool cxref_style;	/* -x: create cxref style output */
503static bool cplusplus;		/* .[hc] means C++, not C */
504static bool ignoreindent;	/* -I: ignore indentation in C */
505static bool packages_only;	/* --packages-only: in Ada, only tag packages*/
506
507/* STDIN is defined in LynxOS system headers */
508#ifdef STDIN
509# undef STDIN
510#endif
511
512#define STDIN 0x1001		/* returned by getopt_long on --parse-stdin */
513static bool parsing_stdin;	/* --parse-stdin used */
514
515static regexp *p_head;		/* list of all regexps */
516static bool need_filebuf;	/* some regexes are multi-line */
517
518static struct option longopts[] =
519{
520  { "append",             no_argument,       NULL,               'a'   },
521  { "packages-only",      no_argument,       &packages_only,     TRUE  },
522  { "c++",                no_argument,       NULL,               'C'   },
523  { "declarations",       no_argument,       &declarations,      TRUE  },
524  { "no-line-directive",  no_argument,       &no_line_directive, TRUE  },
525  { "no-duplicates",      no_argument,       &no_duplicates,     TRUE  },
526  { "help",               no_argument,       NULL,               'h'   },
527  { "help",               no_argument,       NULL,               'H'   },
528  { "ignore-indentation", no_argument,       NULL,               'I'   },
529  { "language",           required_argument, NULL,               'l'   },
530  { "members",            no_argument,       &members,           TRUE  },
531  { "no-members",         no_argument,       &members,           FALSE },
532  { "output",             required_argument, NULL,               'o'   },
533  { "regex",              required_argument, NULL,               'r'   },
534  { "no-regex",           no_argument,       NULL,               'R'   },
535  { "ignore-case-regex",  required_argument, NULL,               'c'   },
536  { "parse-stdin",        required_argument, NULL,               STDIN },
537  { "version",            no_argument,       NULL,               'V'   },
538
539#if CTAGS /* Ctags options */
540  { "backward-search",    no_argument,       NULL,               'B'   },
541  { "cxref",              no_argument,       NULL,               'x'   },
542  { "defines",            no_argument,       NULL,               'd'   },
543  { "globals",            no_argument,       &globals,           TRUE  },
544  { "typedefs",           no_argument,       NULL,               't'   },
545  { "typedefs-and-c++",   no_argument,       NULL,               'T'   },
546  { "update",             no_argument,       NULL,               'u'   },
547  { "vgrind",             no_argument,       NULL,               'v'   },
548  { "no-warn",            no_argument,       NULL,               'w'   },
549
550#else /* Etags options */
551  { "no-defines",         no_argument,       NULL,               'D'   },
552  { "no-globals",         no_argument,       &globals,           FALSE },
553  { "include",            required_argument, NULL,               'i'   },
554#endif
555  { NULL }
556};
557
558static compressor compressors[] =
559{
560  { "z", "gzip -d -c"},
561  { "Z", "gzip -d -c"},
562  { "gz", "gzip -d -c"},
563  { "GZ", "gzip -d -c"},
564  { "bz2", "bzip2 -d -c" },
565  { NULL }
566};
567
568/*
569 * Language stuff.
570 */
571
572/* Ada code */
573static char *Ada_suffixes [] =
574  { "ads", "adb", "ada", NULL };
575static char Ada_help [] =
576"In Ada code, functions, procedures, packages, tasks and types are\n\
577tags.  Use the `--packages-only' option to create tags for\n\
578packages only.\n\
579Ada tag names have suffixes indicating the type of entity:\n\
580	Entity type:	Qualifier:\n\
581	------------	----------\n\
582	function	/f\n\
583	procedure	/p\n\
584	package spec	/s\n\
585	package body	/b\n\
586	type		/t\n\
587	task		/k\n\
588Thus, `M-x find-tag <RET> bidule/b <RET>' will go directly to the\n\
589body of the package `bidule', while `M-x find-tag <RET> bidule <RET>'\n\
590will just search for any tag `bidule'.";
591
592/* Assembly code */
593static char *Asm_suffixes [] =
594  { "a",	/* Unix assembler */
595    "asm", /* Microcontroller assembly */
596    "def", /* BSO/Tasking definition includes  */
597    "inc", /* Microcontroller include files */
598    "ins", /* Microcontroller include files */
599    "s", "sa", /* Unix assembler */
600    "S",   /* cpp-processed Unix assembler */
601    "src", /* BSO/Tasking C compiler output */
602    NULL
603  };
604static char Asm_help [] =
605"In assembler code, labels appearing at the beginning of a line,\n\
606followed by a colon, are tags.";
607
608
609/* Note that .c and .h can be considered C++, if the --c++ flag was
610   given, or if the `class' or `template' keyowrds are met inside the file.
611   That is why default_C_entries is called for these. */
612static char *default_C_suffixes [] =
613  { "c", "h", NULL };
614static char default_C_help [] =
615"In C code, any C function or typedef is a tag, and so are\n\
616definitions of `struct', `union' and `enum'.  `#define' macro\n\
617definitions and `enum' constants are tags unless you specify\n\
618`--no-defines'.  Global variables are tags unless you specify\n\
619`--no-globals' and so are struct members unless you specify\n\
620`--no-members'.  Use of `--no-globals', `--no-defines' and\n\
621`--no-members' can make the tags table file much smaller.\n\
622You can tag function declarations and external variables by\n\
623using `--declarations'.";
624
625static char *Cplusplus_suffixes [] =
626  { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
627    "M",			/* Objective C++ */
628    "pdb",			/* Postscript with C syntax */
629    NULL };
630static char Cplusplus_help [] =
631"In C++ code, all the tag constructs of C code are tagged.  (Use\n\
632--help --lang=c --lang=c++ for full help.)\n\
633In addition to C tags, member functions are also recognized.  Member\n\
634variables are recognized unless you use the `--no-members' option.\n\
635Tags for variables and functions in classes are named `CLASS::VARIABLE'\n\
636and `CLASS::FUNCTION'.  `operator' definitions have tag names like\n\
637`operator+'.";
638
639static char *Cjava_suffixes [] =
640  { "java", NULL };
641static char Cjava_help [] =
642"In Java code, all the tags constructs of C and C++ code are\n\
643tagged.  (Use --help --lang=c --lang=c++ --lang=java for full help.)";
644
645
646static char *Cobol_suffixes [] =
647  { "COB", "cob", NULL };
648static char Cobol_help [] =
649"In Cobol code, tags are paragraph names; that is, any word\n\
650starting in column 8 and followed by a period.";
651
652static char *Cstar_suffixes [] =
653  { "cs", "hs", NULL };
654
655static char *Erlang_suffixes [] =
656  { "erl", "hrl", NULL };
657static char Erlang_help [] =
658"In Erlang code, the tags are the functions, records and macros\n\
659defined in the file.";
660
661char *Forth_suffixes [] =
662  { "fth", "tok", NULL };
663static char Forth_help [] =
664"In Forth code, tags are words defined by `:',\n\
665constant, code, create, defer, value, variable, buffer:, field.";
666
667static char *Fortran_suffixes [] =
668  { "F", "f", "f90", "for", NULL };
669static char Fortran_help [] =
670"In Fortran code, functions, subroutines and block data are tags.";
671
672static char *HTML_suffixes [] =
673  { "htm", "html", "shtml", NULL };
674static char HTML_help [] =
675"In HTML input files, the tags are the `title' and the `h1', `h2',\n\
676`h3' headers.  Also, tags are `name=' in anchors and all\n\
677occurrences of `id='.";
678
679static char *Lisp_suffixes [] =
680  { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
681static char Lisp_help [] =
682"In Lisp code, any function defined with `defun', any variable\n\
683defined with `defvar' or `defconst', and in general the first\n\
684argument of any expression that starts with `(def' in column zero\n\
685is a tag.";
686
687static char *Lua_suffixes [] =
688  { "lua", "LUA", NULL };
689static char Lua_help [] =
690"In Lua scripts, all functions are tags.";
691
692static char *Makefile_filenames [] =
693  { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
694static char Makefile_help [] =
695"In makefiles, targets are tags; additionally, variables are tags\n\
696unless you specify `--no-globals'.";
697
698static char *Objc_suffixes [] =
699  { "lm",			/* Objective lex file */
700    "m",			/* Objective C file */
701     NULL };
702static char Objc_help [] =
703"In Objective C code, tags include Objective C definitions for classes,\n\
704class categories, methods and protocols.  Tags for variables and\n\
705functions in classes are named `CLASS::VARIABLE' and `CLASS::FUNCTION'.\n\
706(Use --help --lang=c --lang=objc --lang=java for full help.)";
707
708static char *Pascal_suffixes [] =
709  { "p", "pas", NULL };
710static char Pascal_help [] =
711"In Pascal code, the tags are the functions and procedures defined\n\
712in the file.";
713/* " // this is for working around an Emacs highlighting bug... */
714
715static char *Perl_suffixes [] =
716  { "pl", "pm", NULL };
717static char *Perl_interpreters [] =
718  { "perl", "@PERL@", NULL };
719static char Perl_help [] =
720"In Perl code, the tags are the packages, subroutines and variables\n\
721defined by the `package', `sub', `my' and `local' keywords.  Use\n\
722`--globals' if you want to tag global variables.  Tags for\n\
723subroutines are named `PACKAGE::SUB'.  The name for subroutines\n\
724defined in the default package is `main::SUB'.";
725
726static char *PHP_suffixes [] =
727  { "php", "php3", "php4", NULL };
728static char PHP_help [] =
729"In PHP code, tags are functions, classes and defines.  Unless you use\n\
730the `--no-members' option, vars are tags too.";
731
732static char *plain_C_suffixes [] =
733  { "pc",			/* Pro*C file */
734     NULL };
735
736static char *PS_suffixes [] =
737  { "ps", "psw", NULL };	/* .psw is for PSWrap */
738static char PS_help [] =
739"In PostScript code, the tags are the functions.";
740
741static char *Prolog_suffixes [] =
742  { "prolog", NULL };
743static char Prolog_help [] =
744"In Prolog code, tags are predicates and rules at the beginning of\n\
745line.";
746
747static char *Python_suffixes [] =
748  { "py", NULL };
749static char Python_help [] =
750"In Python code, `def' or `class' at the beginning of a line\n\
751generate a tag.";
752
753/* Can't do the `SCM' or `scm' prefix with a version number. */
754static char *Scheme_suffixes [] =
755  { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
756static char Scheme_help [] =
757"In Scheme code, tags include anything defined with `def' or with a\n\
758construct whose name starts with `def'.  They also include\n\
759variables set with `set!' at top level in the file.";
760
761static char *TeX_suffixes [] =
762  { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
763static char TeX_help [] =
764"In LaTeX text, the argument of any of the commands `\\chapter',\n\
765`\\section', `\\subsection', `\\subsubsection', `\\eqno', `\\label',\n\
766`\\ref', `\\cite', `\\bibitem', `\\part', `\\appendix', `\\entry',\n\
767`\\index', `\\def', `\\newcommand', `\\renewcommand',\n\
768`\\newenvironment' or `\\renewenvironment' is a tag.\n\
769\n\
770Other commands can be specified by setting the environment variable\n\
771`TEXTAGS' to a colon-separated list like, for example,\n\
772     TEXTAGS=\"mycommand:myothercommand\".";
773
774
775static char *Texinfo_suffixes [] =
776  { "texi", "texinfo", "txi", NULL };
777static char Texinfo_help [] =
778"for texinfo files, lines starting with @node are tagged.";
779
780static char *Yacc_suffixes [] =
781  { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
782static char Yacc_help [] =
783"In Bison or Yacc input files, each rule defines as a tag the\n\
784nonterminal it constructs.  The portions of the file that contain\n\
785C code are parsed as C code (use --help --lang=c --lang=yacc\n\
786for full help).";
787
788static char auto_help [] =
789"`auto' is not a real language, it indicates to use\n\
790a default language for files base on file name suffix and file contents.";
791
792static char none_help [] =
793"`none' is not a real language, it indicates to only do\n\
794regexp processing on files.";
795
796static char no_lang_help [] =
797"No detailed help available for this language.";
798
799
800/*
801 * Table of languages.
802 *
803 * It is ok for a given function to be listed under more than one
804 * name.  I just didn't.
805 */
806
807static language lang_names [] =
808{
809  { "ada",       Ada_help,       Ada_funcs,         Ada_suffixes       },
810  { "asm",       Asm_help,       Asm_labels,        Asm_suffixes       },
811  { "c",         default_C_help, default_C_entries, default_C_suffixes },
812  { "c++",       Cplusplus_help, Cplusplus_entries, Cplusplus_suffixes },
813  { "c*",        no_lang_help,   Cstar_entries,     Cstar_suffixes     },
814  { "cobol",     Cobol_help,     Cobol_paragraphs,  Cobol_suffixes     },
815  { "erlang",    Erlang_help,    Erlang_functions,  Erlang_suffixes    },
816  { "forth",     Forth_help,     Forth_words,       Forth_suffixes     },
817  { "fortran",   Fortran_help,   Fortran_functions, Fortran_suffixes   },
818  { "html",      HTML_help,      HTML_labels,       HTML_suffixes      },
819  { "java",      Cjava_help,     Cjava_entries,     Cjava_suffixes     },
820  { "lisp",      Lisp_help,      Lisp_functions,    Lisp_suffixes      },
821  { "lua",       Lua_help,       Lua_functions,     Lua_suffixes       },
822  { "makefile",  Makefile_help,Makefile_targets,NULL,Makefile_filenames},
823  { "objc",      Objc_help,      plain_C_entries,   Objc_suffixes      },
824  { "pascal",    Pascal_help,    Pascal_functions,  Pascal_suffixes    },
825  { "perl",Perl_help,Perl_functions,Perl_suffixes,NULL,Perl_interpreters},
826  { "php",       PHP_help,       PHP_functions,     PHP_suffixes       },
827  { "postscript",PS_help,        PS_functions,      PS_suffixes        },
828  { "proc",      no_lang_help,   plain_C_entries,   plain_C_suffixes   },
829  { "prolog",    Prolog_help,    Prolog_functions,  Prolog_suffixes    },
830  { "python",    Python_help,    Python_functions,  Python_suffixes    },
831  { "scheme",    Scheme_help,    Scheme_functions,  Scheme_suffixes    },
832  { "tex",       TeX_help,       TeX_commands,      TeX_suffixes       },
833  { "texinfo",   Texinfo_help,   Texinfo_nodes,     Texinfo_suffixes   },
834  { "yacc",      Yacc_help,Yacc_entries,Yacc_suffixes,NULL,NULL,TRUE},
835  { "auto",      auto_help },                      /* default guessing scheme */
836  { "none",      none_help,      just_read_file }, /* regexp matching only */
837  { NULL }                /* end of list */
838};
839
840
841static void
842print_language_names ()
843{
844  language *lang;
845  char **name, **ext;
846
847  puts ("\nThese are the currently supported languages, along with the\n\
848default file names and dot suffixes:");
849  for (lang = lang_names; lang->name != NULL; lang++)
850    {
851      printf ("  %-*s", 10, lang->name);
852      if (lang->filenames != NULL)
853	for (name = lang->filenames; *name != NULL; name++)
854	  printf (" %s", *name);
855      if (lang->suffixes != NULL)
856	for (ext = lang->suffixes; *ext != NULL; ext++)
857	  printf (" .%s", *ext);
858      puts ("");
859    }
860  puts ("where `auto' means use default language for files based on file\n\
861name suffix, and `none' means only do regexp processing on files.\n\
862If no language is specified and no matching suffix is found,\n\
863the first line of the file is read for a sharp-bang (#!) sequence\n\
864followed by the name of an interpreter.  If no such sequence is found,\n\
865Fortran is tried first; if no tags are found, C is tried next.\n\
866When parsing any C file, a \"class\" or \"template\" keyword\n\
867switches to C++.");
868  puts ("Compressed files are supported using gzip and bzip2.\n\
869\n\
870For detailed help on a given language use, for example,\n\
871etags --help --lang=ada.");
872}
873
874#ifndef EMACS_NAME
875# define EMACS_NAME "standalone"
876#endif
877#ifndef VERSION
878# define VERSION "version"
879#endif
880static void
881print_version ()
882{
883  printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
884  puts ("Copyright (C) 2007 Free Software Foundation, Inc.");
885  puts ("This program is distributed under the terms in ETAGS.README");
886
887  exit (EXIT_SUCCESS);
888}
889
890static void
891print_help (argbuffer)
892     argument *argbuffer;
893{
894  bool help_for_lang = FALSE;
895
896  for (; argbuffer->arg_type != at_end; argbuffer++)
897    if (argbuffer->arg_type == at_language)
898      {
899	if (help_for_lang)
900	  puts ("");
901	puts (argbuffer->lang->help);
902	help_for_lang = TRUE;
903      }
904
905  if (help_for_lang)
906    exit (EXIT_SUCCESS);
907
908  printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
909\n\
910These are the options accepted by %s.\n", progname, progname);
911  if (NO_LONG_OPTIONS)
912    puts ("WARNING: long option names do not work with this executable,\n\
913as it is not linked with GNU getopt.");
914  else
915    puts ("You may use unambiguous abbreviations for the long option names.");
916  puts ("  A - as file name means read names from stdin (one per line).\n\
917Absolute names are stored in the output file as they are.\n\
918Relative ones are stored relative to the output file's directory.\n");
919
920  puts ("-a, --append\n\
921        Append tag entries to existing tags file.");
922
923  puts ("--packages-only\n\
924        For Ada files, only generate tags for packages.");
925
926  if (CTAGS)
927    puts ("-B, --backward-search\n\
928        Write the search commands for the tag entries using '?', the\n\
929        backward-search command instead of '/', the forward-search command.");
930
931  /* This option is mostly obsolete, because etags can now automatically
932     detect C++.  Retained for backward compatibility and for debugging and
933     experimentation.  In principle, we could want to tag as C++ even
934     before any "class" or "template" keyword.
935  puts ("-C, --c++\n\
936        Treat files whose name suffix defaults to C language as C++ files.");
937  */
938
939  puts ("--declarations\n\
940	In C and derived languages, create tags for function declarations,");
941  if (CTAGS)
942    puts ("\tand create tags for extern variables if --globals is used.");
943  else
944    puts
945      ("\tand create tags for extern variables unless --no-globals is used.");
946
947  if (CTAGS)
948    puts ("-d, --defines\n\
949        Create tag entries for C #define constants and enum constants, too.");
950  else
951    puts ("-D, --no-defines\n\
952        Don't create tag entries for C #define constants and enum constants.\n\
953	This makes the tags file smaller.");
954
955  if (!CTAGS)
956    puts ("-i FILE, --include=FILE\n\
957        Include a note in tag file indicating that, when searching for\n\
958        a tag, one should also consult the tags file FILE after\n\
959        checking the current file.");
960
961  puts ("-l LANG, --language=LANG\n\
962        Force the following files to be considered as written in the\n\
963	named language up to the next --language=LANG option.");
964
965  if (CTAGS)
966    puts ("--globals\n\
967	Create tag entries for global variables in some languages.");
968  else
969    puts ("--no-globals\n\
970	Do not create tag entries for global variables in some\n\
971	languages.  This makes the tags file smaller.");
972  puts ("--no-members\n\
973	Do not create tag entries for members of structures\n\
974	in some languages.");
975
976  puts ("-r REGEXP, --regex=REGEXP or --regex=@regexfile\n\
977        Make a tag for each line matching a regular expression pattern\n\
978	in the following files.  {LANGUAGE}REGEXP uses REGEXP for LANGUAGE\n\
979	files only.  REGEXFILE is a file containing one REGEXP per line.\n\
980	REGEXP takes the form /TAGREGEXP/TAGNAME/MODS, where TAGNAME/ is\n\
981	optional.  The TAGREGEXP pattern is anchored (as if preceded by ^).");
982  puts ("	If TAGNAME/ is present, the tags created are named.\n\
983	For example Tcl named tags can be created with:\n\
984	  --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\".\n\
985	MODS are optional one-letter modifiers: `i' means to ignore case,\n\
986	`m' means to allow multi-line matches, `s' implies `m' and\n\
987	causes dot to match any character, including newline.");
988  puts ("-R, --no-regex\n\
989        Don't create tags from regexps for the following files.");
990  puts ("-I, --ignore-indentation\n\
991        In C and C++ do not assume that a closing brace in the first\n\
992        column is the final brace of a function or structure definition.");
993  puts ("-o FILE, --output=FILE\n\
994        Write the tags to FILE.");
995  puts ("--parse-stdin=NAME\n\
996        Read from standard input and record tags as belonging to file NAME.");
997
998  if (CTAGS)
999    {
1000      puts ("-t, --typedefs\n\
1001        Generate tag entries for C and Ada typedefs.");
1002      puts ("-T, --typedefs-and-c++\n\
1003        Generate tag entries for C typedefs, C struct/enum/union tags,\n\
1004        and C++ member functions.");
1005    }
1006
1007  if (CTAGS)
1008    puts ("-u, --update\n\
1009        Update the tag entries for the given files, leaving tag\n\
1010        entries for other files in place.  Currently, this is\n\
1011        implemented by deleting the existing entries for the given\n\
1012        files and then rewriting the new entries at the end of the\n\
1013        tags file.  It is often faster to simply rebuild the entire\n\
1014        tag file than to use this.");
1015
1016  if (CTAGS)
1017    {
1018      puts ("-v, --vgrind\n\
1019        Print on the standard output an index of items intended for\n\
1020        human consumption, similar to the output of vgrind.  The index\n\
1021        is sorted, and gives the page number of each item.");
1022# if PRINT_UNDOCUMENTED_OPTIONS_HELP
1023      puts ("-w, --no-duplicates\n\
1024        Do not create duplicate tag entries, for compatibility with\n\
1025	traditional ctags.");
1026      puts ("-w, --no-warn\n\
1027        Suppress warning messages about duplicate tag entries.");
1028# endif /* PRINT_UNDOCUMENTED_OPTIONS_HELP */
1029      puts ("-x, --cxref\n\
1030        Like --vgrind, but in the style of cxref, rather than vgrind.\n\
1031        The output uses line numbers instead of page numbers, but\n\
1032        beyond that the differences are cosmetic; try both to see\n\
1033        which you like.");
1034    }
1035
1036  puts ("-V, --version\n\
1037        Print the version of the program.\n\
1038-h, --help\n\
1039        Print this help message.\n\
1040        Followed by one or more `--language' options prints detailed\n\
1041        help about tag generation for the specified languages.");
1042
1043  print_language_names ();
1044
1045  puts ("");
1046  puts ("Report bugs to bug-gnu-emacs@gnu.org");
1047
1048  exit (EXIT_SUCCESS);
1049}
1050
1051
1052#ifdef VMS			/* VMS specific functions */
1053
1054#define	EOS	'\0'
1055
1056/* This is a BUG!  ANY arbitrary limit is a BUG!
1057   Won't someone please fix this?  */
1058#define	MAX_FILE_SPEC_LEN	255
1059typedef struct	{
1060  short   curlen;
1061  char    body[MAX_FILE_SPEC_LEN + 1];
1062} vspec;
1063
1064/*
1065 v1.05 nmm 26-Jun-86 fn_exp - expand specification of list of file names
1066 returning in each successive call the next file name matching the input
1067 spec. The function expects that each in_spec passed
1068 to it will be processed to completion; in particular, up to and
1069 including the call following that in which the last matching name
1070 is returned, the function ignores the value of in_spec, and will
1071 only start processing a new spec with the following call.
1072 If an error occurs, on return out_spec contains the value
1073 of in_spec when the error occurred.
1074
1075 With each successive file name returned in out_spec, the
1076 function's return value is one. When there are no more matching
1077 names the function returns zero. If on the first call no file
1078 matches in_spec, or there is any other error, -1 is returned.
1079*/
1080
1081#include	<rmsdef.h>
1082#include	<descrip.h>
1083#define		OUTSIZE	MAX_FILE_SPEC_LEN
1084static short
1085fn_exp (out, in)
1086     vspec *out;
1087     char *in;
1088{
1089  static long context = 0;
1090  static struct dsc$descriptor_s o;
1091  static struct dsc$descriptor_s i;
1092  static bool pass1 = TRUE;
1093  long status;
1094  short retval;
1095
1096  if (pass1)
1097    {
1098      pass1 = FALSE;
1099      o.dsc$a_pointer = (char *) out;
1100      o.dsc$w_length = (short)OUTSIZE;
1101      i.dsc$a_pointer = in;
1102      i.dsc$w_length = (short)strlen(in);
1103      i.dsc$b_dtype = DSC$K_DTYPE_T;
1104      i.dsc$b_class = DSC$K_CLASS_S;
1105      o.dsc$b_dtype = DSC$K_DTYPE_VT;
1106      o.dsc$b_class = DSC$K_CLASS_VS;
1107    }
1108  if ((status = lib$find_file(&i, &o, &context, 0, 0)) == RMS$_NORMAL)
1109    {
1110      out->body[out->curlen] = EOS;
1111      return 1;
1112    }
1113  else if (status == RMS$_NMF)
1114    retval = 0;
1115  else
1116    {
1117      strcpy(out->body, in);
1118      retval = -1;
1119    }
1120  lib$find_file_end(&context);
1121  pass1 = TRUE;
1122  return retval;
1123}
1124
1125/*
1126  v1.01 nmm 19-Aug-85 gfnames - return in successive calls the
1127  name of each file specified by the provided arg expanding wildcards.
1128*/
1129static char *
1130gfnames (arg, p_error)
1131     char *arg;
1132     bool *p_error;
1133{
1134  static vspec filename = {MAX_FILE_SPEC_LEN, "\0"};
1135
1136  switch (fn_exp (&filename, arg))
1137    {
1138    case 1:
1139      *p_error = FALSE;
1140      return filename.body;
1141    case 0:
1142      *p_error = FALSE;
1143      return NULL;
1144    default:
1145      *p_error = TRUE;
1146      return filename.body;
1147    }
1148}
1149
1150#ifndef OLD  /* Newer versions of VMS do provide `system'.  */
1151system (cmd)
1152     char *cmd;
1153{
1154  error ("%s", "system() function not implemented under VMS");
1155}
1156#endif
1157
1158#define	VERSION_DELIM	';'
1159char *massage_name (s)
1160     char *s;
1161{
1162  char *start = s;
1163
1164  for ( ; *s; s++)
1165    if (*s == VERSION_DELIM)
1166      {
1167	*s = EOS;
1168	break;
1169      }
1170    else
1171      *s = lowcase (*s);
1172  return start;
1173}
1174#endif /* VMS */
1175
1176
1177int
1178main (argc, argv)
1179     int argc;
1180     char *argv[];
1181{
1182  int i;
1183  unsigned int nincluded_files;
1184  char **included_files;
1185  argument *argbuffer;
1186  int current_arg, file_count;
1187  linebuffer filename_lb;
1188  bool help_asked = FALSE;
1189#ifdef VMS
1190  bool got_err;
1191#endif
1192 char *optstring;
1193 int opt;
1194
1195
1196#ifdef DOS_NT
1197  _fmode = O_BINARY;   /* all of files are treated as binary files */
1198#endif /* DOS_NT */
1199
1200  progname = argv[0];
1201  nincluded_files = 0;
1202  included_files = xnew (argc, char *);
1203  current_arg = 0;
1204  file_count = 0;
1205
1206  /* Allocate enough no matter what happens.  Overkill, but each one
1207     is small. */
1208  argbuffer = xnew (argc, argument);
1209
1210  /*
1211   * If etags, always find typedefs and structure tags.  Why not?
1212   * Also default to find macro constants, enum constants, struct
1213   * members and global variables.
1214   */
1215  if (!CTAGS)
1216    {
1217      typedefs = typedefs_or_cplusplus = constantypedefs = TRUE;
1218      globals = TRUE;
1219    }
1220
1221  /* When the optstring begins with a '-' getopt_long does not rearrange the
1222     non-options arguments to be at the end, but leaves them alone. */
1223  optstring = concat (NO_LONG_OPTIONS ? "" : "-",
1224		      "ac:Cf:Il:o:r:RSVhH",
1225		      (CTAGS) ? "BxdtTuvw" : "Di:");
1226
1227  while ((opt = getopt_long (argc, argv, optstring, longopts, NULL)) != EOF)
1228    switch (opt)
1229      {
1230      case 0:
1231	/* If getopt returns 0, then it has already processed a
1232	   long-named option.  We should do nothing.  */
1233	break;
1234
1235      case 1:
1236	/* This means that a file name has been seen.  Record it. */
1237	argbuffer[current_arg].arg_type = at_filename;
1238	argbuffer[current_arg].what     = optarg;
1239	++current_arg;
1240	++file_count;
1241	break;
1242
1243      case STDIN:
1244	/* Parse standard input.  Idea by Vivek <vivek@etla.org>. */
1245	argbuffer[current_arg].arg_type = at_stdin;
1246	argbuffer[current_arg].what     = optarg;
1247	++current_arg;
1248	++file_count;
1249	if (parsing_stdin)
1250	  fatal ("cannot parse standard input more than once", (char *)NULL);
1251	parsing_stdin = TRUE;
1252	break;
1253
1254	/* Common options. */
1255      case 'a': append_to_tagfile = TRUE;	break;
1256      case 'C': cplusplus = TRUE;		break;
1257      case 'f':		/* for compatibility with old makefiles */
1258      case 'o':
1259	if (tagfile)
1260	  {
1261	    error ("-o option may only be given once.", (char *)NULL);
1262	    suggest_asking_for_help ();
1263	    /* NOTREACHED */
1264	  }
1265	tagfile = optarg;
1266	break;
1267      case 'I':
1268      case 'S':		/* for backward compatibility */
1269	ignoreindent = TRUE;
1270	break;
1271      case 'l':
1272	{
1273	  language *lang = get_language_from_langname (optarg);
1274	  if (lang != NULL)
1275	    {
1276	      argbuffer[current_arg].lang = lang;
1277	      argbuffer[current_arg].arg_type = at_language;
1278	      ++current_arg;
1279	    }
1280	}
1281	break;
1282      case 'c':
1283	/* Backward compatibility: support obsolete --ignore-case-regexp. */
1284	optarg = concat (optarg, "i", ""); /* memory leak here */
1285	/* FALLTHRU */
1286      case 'r':
1287	argbuffer[current_arg].arg_type = at_regexp;
1288	argbuffer[current_arg].what = optarg;
1289	++current_arg;
1290	break;
1291      case 'R':
1292	argbuffer[current_arg].arg_type = at_regexp;
1293	argbuffer[current_arg].what = NULL;
1294	++current_arg;
1295	break;
1296      case 'V':
1297	print_version ();
1298	break;
1299      case 'h':
1300      case 'H':
1301	help_asked = TRUE;
1302	break;
1303
1304	/* Etags options */
1305      case 'D': constantypedefs = FALSE;			break;
1306      case 'i': included_files[nincluded_files++] = optarg;	break;
1307
1308	/* Ctags options. */
1309      case 'B': searchar = '?';					break;
1310      case 'd': constantypedefs = TRUE;				break;
1311      case 't': typedefs = TRUE;				break;
1312      case 'T': typedefs = typedefs_or_cplusplus = TRUE;	break;
1313      case 'u': update = TRUE;					break;
1314      case 'v': vgrind_style = TRUE;			  /*FALLTHRU*/
1315      case 'x': cxref_style = TRUE;				break;
1316      case 'w': no_warnings = TRUE;				break;
1317      default:
1318	suggest_asking_for_help ();
1319	/* NOTREACHED */
1320      }
1321
1322  /* No more options.  Store the rest of arguments. */
1323  for (; optind < argc; optind++)
1324    {
1325      argbuffer[current_arg].arg_type = at_filename;
1326      argbuffer[current_arg].what = argv[optind];
1327      ++current_arg;
1328      ++file_count;
1329    }
1330
1331  argbuffer[current_arg].arg_type = at_end;
1332
1333  if (help_asked)
1334    print_help (argbuffer);
1335    /* NOTREACHED */
1336
1337  if (nincluded_files == 0 && file_count == 0)
1338    {
1339      error ("no input files specified.", (char *)NULL);
1340      suggest_asking_for_help ();
1341      /* NOTREACHED */
1342    }
1343
1344  if (tagfile == NULL)
1345    tagfile = CTAGS ? "tags" : "TAGS";
1346  cwd = etags_getcwd ();	/* the current working directory */
1347  if (cwd[strlen (cwd) - 1] != '/')
1348    {
1349      char *oldcwd = cwd;
1350      cwd = concat (oldcwd, "/", "");
1351      free (oldcwd);
1352    }
1353  /* Relative file names are made relative to the current directory. */
1354  if (streq (tagfile, "-")
1355      || strneq (tagfile, "/dev/", 5))
1356    tagfiledir = cwd;
1357  else
1358    tagfiledir = absolute_dirname (tagfile, cwd);
1359
1360  init ();			/* set up boolean "functions" */
1361
1362  linebuffer_init (&lb);
1363  linebuffer_init (&filename_lb);
1364  linebuffer_init (&filebuf);
1365  linebuffer_init (&token_name);
1366
1367  if (!CTAGS)
1368    {
1369      if (streq (tagfile, "-"))
1370	{
1371	  tagf = stdout;
1372#ifdef DOS_NT
1373	  /* Switch redirected `stdout' to binary mode (setting `_fmode'
1374	     doesn't take effect until after `stdout' is already open). */
1375	  if (!isatty (fileno (stdout)))
1376	    setmode (fileno (stdout), O_BINARY);
1377#endif /* DOS_NT */
1378	}
1379      else
1380	tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1381      if (tagf == NULL)
1382	pfatal (tagfile);
1383    }
1384
1385  /*
1386   * Loop through files finding functions.
1387   */
1388  for (i = 0; i < current_arg; i++)
1389    {
1390      static language *lang;	/* non-NULL if language is forced */
1391      char *this_file;
1392
1393      switch (argbuffer[i].arg_type)
1394	{
1395	case at_language:
1396	  lang = argbuffer[i].lang;
1397	  break;
1398	case at_regexp:
1399	  analyse_regex (argbuffer[i].what);
1400	  break;
1401	case at_filename:
1402#ifdef VMS
1403	  while ((this_file = gfnames (argbuffer[i].what, &got_err)) != NULL)
1404	    {
1405	      if (got_err)
1406		{
1407		  error ("can't find file %s\n", this_file);
1408		  argc--, argv++;
1409		}
1410	      else
1411		{
1412		  this_file = massage_name (this_file);
1413		}
1414#else
1415	      this_file = argbuffer[i].what;
1416#endif
1417	      /* Input file named "-" means read file names from stdin
1418		 (one per line) and use them. */
1419	      if (streq (this_file, "-"))
1420		{
1421		  if (parsing_stdin)
1422		    fatal ("cannot parse standard input AND read file names from it",
1423			   (char *)NULL);
1424		  while (readline_internal (&filename_lb, stdin) > 0)
1425		    process_file_name (filename_lb.buffer, lang);
1426		}
1427	      else
1428		process_file_name (this_file, lang);
1429#ifdef VMS
1430	    }
1431#endif
1432	  break;
1433        case at_stdin:
1434          this_file = argbuffer[i].what;
1435          process_file (stdin, this_file, lang);
1436          break;
1437	}
1438    }
1439
1440  free_regexps ();
1441  free (lb.buffer);
1442  free (filebuf.buffer);
1443  free (token_name.buffer);
1444
1445  if (!CTAGS || cxref_style)
1446    {
1447      /* Write the remaining tags to tagf (ETAGS) or stdout (CXREF). */
1448      put_entries (nodehead);
1449      free_tree (nodehead);
1450      nodehead = NULL;
1451      if (!CTAGS)
1452	{
1453	  fdesc *fdp;
1454
1455	  /* Output file entries that have no tags. */
1456	  for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1457	    if (!fdp->written)
1458	      fprintf (tagf, "\f\n%s,0\n", fdp->taggedfname);
1459
1460	  while (nincluded_files-- > 0)
1461	    fprintf (tagf, "\f\n%s,include\n", *included_files++);
1462
1463	  if (fclose (tagf) == EOF)
1464	    pfatal (tagfile);
1465	}
1466
1467      exit (EXIT_SUCCESS);
1468    }
1469
1470  if (update)
1471    {
1472      char cmd[BUFSIZ];
1473      for (i = 0; i < current_arg; ++i)
1474	{
1475	  switch (argbuffer[i].arg_type)
1476	    {
1477	    case at_filename:
1478	    case at_stdin:
1479	      break;
1480	    default:
1481	      continue;		/* the for loop */
1482	    }
1483	  sprintf (cmd,
1484		   "mv %s OTAGS;fgrep -v '\t%s\t' OTAGS >%s;rm OTAGS",
1485		   tagfile, argbuffer[i].what, tagfile);
1486	  if (system (cmd) != EXIT_SUCCESS)
1487	    fatal ("failed to execute shell command", (char *)NULL);
1488	}
1489      append_to_tagfile = TRUE;
1490    }
1491
1492  tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1493  if (tagf == NULL)
1494    pfatal (tagfile);
1495  put_entries (nodehead);	/* write all the tags (CTAGS) */
1496  free_tree (nodehead);
1497  nodehead = NULL;
1498  if (fclose (tagf) == EOF)
1499    pfatal (tagfile);
1500
1501  if (CTAGS)
1502    if (append_to_tagfile || update)
1503      {
1504	char cmd[2*BUFSIZ+20];
1505	/* Maybe these should be used:
1506	   setenv ("LC_COLLATE", "C", 1);
1507	   setenv ("LC_ALL", "C", 1); */
1508	sprintf (cmd, "sort -u -o %.*s %.*s", BUFSIZ, tagfile, BUFSIZ, tagfile);
1509	exit (system (cmd));
1510      }
1511  return EXIT_SUCCESS;
1512}
1513
1514
1515/*
1516 * Return a compressor given the file name.  If EXTPTR is non-zero,
1517 * return a pointer into FILE where the compressor-specific
1518 * extension begins.  If no compressor is found, NULL is returned
1519 * and EXTPTR is not significant.
1520 * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1521 */
1522static compressor *
1523get_compressor_from_suffix (file, extptr)
1524     char *file;
1525     char **extptr;
1526{
1527  compressor *compr;
1528  char *slash, *suffix;
1529
1530  /* This relies on FN to be after canonicalize_filename,
1531     so we don't need to consider backslashes on DOS_NT.  */
1532  slash = etags_strrchr (file, '/');
1533  suffix = etags_strrchr (file, '.');
1534  if (suffix == NULL || suffix < slash)
1535    return NULL;
1536  if (extptr != NULL)
1537    *extptr = suffix;
1538  suffix += 1;
1539  /* Let those poor souls who live with DOS 8+3 file name limits get
1540     some solace by treating foo.cgz as if it were foo.c.gz, etc.
1541     Only the first do loop is run if not MSDOS */
1542  do
1543    {
1544      for (compr = compressors; compr->suffix != NULL; compr++)
1545	if (streq (compr->suffix, suffix))
1546	  return compr;
1547      if (!MSDOS)
1548	break;			/* do it only once: not really a loop */
1549      if (extptr != NULL)
1550	*extptr = ++suffix;
1551    } while (*suffix != '\0');
1552  return NULL;
1553}
1554
1555
1556
1557/*
1558 * Return a language given the name.
1559 */
1560static language *
1561get_language_from_langname (name)
1562     const char *name;
1563{
1564  language *lang;
1565
1566  if (name == NULL)
1567    error ("empty language name", (char *)NULL);
1568  else
1569    {
1570      for (lang = lang_names; lang->name != NULL; lang++)
1571	if (streq (name, lang->name))
1572	  return lang;
1573      error ("unknown language \"%s\"", name);
1574    }
1575
1576  return NULL;
1577}
1578
1579
1580/*
1581 * Return a language given the interpreter name.
1582 */
1583static language *
1584get_language_from_interpreter (interpreter)
1585     char *interpreter;
1586{
1587  language *lang;
1588  char **iname;
1589
1590  if (interpreter == NULL)
1591    return NULL;
1592  for (lang = lang_names; lang->name != NULL; lang++)
1593    if (lang->interpreters != NULL)
1594      for (iname = lang->interpreters; *iname != NULL; iname++)
1595	if (streq (*iname, interpreter))
1596	    return lang;
1597
1598  return NULL;
1599}
1600
1601
1602
1603/*
1604 * Return a language given the file name.
1605 */
1606static language *
1607get_language_from_filename (file, case_sensitive)
1608     char *file;
1609     bool case_sensitive;
1610{
1611  language *lang;
1612  char **name, **ext, *suffix;
1613
1614  /* Try whole file name first. */
1615  for (lang = lang_names; lang->name != NULL; lang++)
1616    if (lang->filenames != NULL)
1617      for (name = lang->filenames; *name != NULL; name++)
1618	if ((case_sensitive)
1619	    ? streq (*name, file)
1620	    : strcaseeq (*name, file))
1621	  return lang;
1622
1623  /* If not found, try suffix after last dot. */
1624  suffix = etags_strrchr (file, '.');
1625  if (suffix == NULL)
1626    return NULL;
1627  suffix += 1;
1628  for (lang = lang_names; lang->name != NULL; lang++)
1629    if (lang->suffixes != NULL)
1630      for (ext = lang->suffixes; *ext != NULL; ext++)
1631	if ((case_sensitive)
1632	    ? streq (*ext, suffix)
1633	    : strcaseeq (*ext, suffix))
1634	  return lang;
1635  return NULL;
1636}
1637
1638
1639/*
1640 * This routine is called on each file argument.
1641 */
1642static void
1643process_file_name (file, lang)
1644     char *file;
1645     language *lang;
1646{
1647  struct stat stat_buf;
1648  FILE *inf;
1649  fdesc *fdp;
1650  compressor *compr;
1651  char *compressed_name, *uncompressed_name;
1652  char *ext, *real_name;
1653  int retval;
1654
1655  canonicalize_filename (file);
1656  if (streq (file, tagfile) && !streq (tagfile, "-"))
1657    {
1658      error ("skipping inclusion of %s in self.", file);
1659      return;
1660    }
1661  if ((compr = get_compressor_from_suffix (file, &ext)) == NULL)
1662    {
1663      compressed_name = NULL;
1664      real_name = uncompressed_name = savestr (file);
1665    }
1666  else
1667    {
1668      real_name = compressed_name = savestr (file);
1669      uncompressed_name = savenstr (file, ext - file);
1670    }
1671
1672  /* If the canonicalized uncompressed name
1673     has already been dealt with, skip it silently. */
1674  for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1675    {
1676      assert (fdp->infname != NULL);
1677      if (streq (uncompressed_name, fdp->infname))
1678	goto cleanup;
1679    }
1680
1681  if (stat (real_name, &stat_buf) != 0)
1682    {
1683      /* Reset real_name and try with a different name. */
1684      real_name = NULL;
1685      if (compressed_name != NULL) /* try with the given suffix */
1686	{
1687	  if (stat (uncompressed_name, &stat_buf) == 0)
1688	    real_name = uncompressed_name;
1689	}
1690      else			/* try all possible suffixes */
1691	{
1692	  for (compr = compressors; compr->suffix != NULL; compr++)
1693	    {
1694	      compressed_name = concat (file, ".", compr->suffix);
1695	      if (stat (compressed_name, &stat_buf) != 0)
1696		{
1697		  if (MSDOS)
1698		    {
1699		      char *suf = compressed_name + strlen (file);
1700		      size_t suflen = strlen (compr->suffix) + 1;
1701		      for ( ; suf[1]; suf++, suflen--)
1702			{
1703			  memmove (suf, suf + 1, suflen);
1704			  if (stat (compressed_name, &stat_buf) == 0)
1705			    {
1706			      real_name = compressed_name;
1707			      break;
1708			    }
1709			}
1710		      if (real_name != NULL)
1711			break;
1712		    } /* MSDOS */
1713		  free (compressed_name);
1714		  compressed_name = NULL;
1715		}
1716	      else
1717		{
1718		  real_name = compressed_name;
1719		  break;
1720		}
1721	    }
1722	}
1723      if (real_name == NULL)
1724	{
1725	  perror (file);
1726	  goto cleanup;
1727	}
1728    } /* try with a different name */
1729
1730  if (!S_ISREG (stat_buf.st_mode))
1731    {
1732      error ("skipping %s: it is not a regular file.", real_name);
1733      goto cleanup;
1734    }
1735  if (real_name == compressed_name)
1736    {
1737      char *cmd = concat (compr->command, " ", real_name);
1738      inf = (FILE *) popen (cmd, "r");
1739      free (cmd);
1740    }
1741  else
1742    inf = fopen (real_name, "r");
1743  if (inf == NULL)
1744    {
1745      perror (real_name);
1746      goto cleanup;
1747    }
1748
1749  process_file (inf, uncompressed_name, lang);
1750
1751  if (real_name == compressed_name)
1752    retval = pclose (inf);
1753  else
1754    retval = fclose (inf);
1755  if (retval < 0)
1756    pfatal (file);
1757
1758 cleanup:
1759  if (compressed_name) free (compressed_name);
1760  if (uncompressed_name) free (uncompressed_name);
1761  last_node = NULL;
1762  curfdp = NULL;
1763  return;
1764}
1765
1766static void
1767process_file (fh, fn, lang)
1768     FILE *fh;
1769     char *fn;
1770     language *lang;
1771{
1772  static const fdesc emptyfdesc;
1773  fdesc *fdp;
1774
1775  /* Create a new input file description entry. */
1776  fdp = xnew (1, fdesc);
1777  *fdp = emptyfdesc;
1778  fdp->next = fdhead;
1779  fdp->infname = savestr (fn);
1780  fdp->lang = lang;
1781  fdp->infabsname = absolute_filename (fn, cwd);
1782  fdp->infabsdir = absolute_dirname (fn, cwd);
1783  if (filename_is_absolute (fn))
1784    {
1785      /* An absolute file name.  Canonicalize it. */
1786      fdp->taggedfname = absolute_filename (fn, NULL);
1787    }
1788  else
1789    {
1790      /* A file name relative to cwd.  Make it relative
1791	 to the directory of the tags file. */
1792      fdp->taggedfname = relative_filename (fn, tagfiledir);
1793    }
1794  fdp->usecharno = TRUE;	/* use char position when making tags */
1795  fdp->prop = NULL;
1796  fdp->written = FALSE;		/* not written on tags file yet */
1797
1798  fdhead = fdp;
1799  curfdp = fdhead;		/* the current file description */
1800
1801  find_entries (fh);
1802
1803  /* If not Ctags, and if this is not metasource and if it contained no #line
1804     directives, we can write the tags and free all nodes pointing to
1805     curfdp. */
1806  if (!CTAGS
1807      && curfdp->usecharno	/* no #line directives in this file */
1808      && !curfdp->lang->metasource)
1809    {
1810      node *np, *prev;
1811
1812      /* Look for the head of the sublist relative to this file.  See add_node
1813	 for the structure of the node tree. */
1814      prev = NULL;
1815      for (np = nodehead; np != NULL; prev = np, np = np->left)
1816	if (np->fdp == curfdp)
1817	  break;
1818
1819      /* If we generated tags for this file, write and delete them. */
1820      if (np != NULL)
1821	{
1822	  /* This is the head of the last sublist, if any.  The following
1823	     instructions depend on this being true. */
1824	  assert (np->left == NULL);
1825
1826	  assert (fdhead == curfdp);
1827	  assert (last_node->fdp == curfdp);
1828	  put_entries (np);	/* write tags for file curfdp->taggedfname */
1829	  free_tree (np);	/* remove the written nodes */
1830	  if (prev == NULL)
1831	    nodehead = NULL;	/* no nodes left */
1832	  else
1833	    prev->left = NULL;	/* delete the pointer to the sublist */
1834	}
1835    }
1836}
1837
1838/*
1839 * This routine sets up the boolean pseudo-functions which work
1840 * by setting boolean flags dependent upon the corresponding character.
1841 * Every char which is NOT in that string is not a white char.  Therefore,
1842 * all of the array "_wht" is set to FALSE, and then the elements
1843 * subscripted by the chars in "white" are set to TRUE.  Thus "_wht"
1844 * of a char is TRUE if it is the string "white", else FALSE.
1845 */
1846static void
1847init ()
1848{
1849  register char *sp;
1850  register int i;
1851
1852  for (i = 0; i < CHARS; i++)
1853    iswhite(i) = notinname(i) = begtoken(i) = intoken(i) = endtoken(i) = FALSE;
1854  for (sp = white; *sp != '\0'; sp++) iswhite (*sp) = TRUE;
1855  for (sp = nonam; *sp != '\0'; sp++) notinname (*sp) = TRUE;
1856  notinname('\0') = notinname('\n');
1857  for (sp = begtk; *sp != '\0'; sp++) begtoken (*sp) = TRUE;
1858  begtoken('\0') = begtoken('\n');
1859  for (sp = midtk; *sp != '\0'; sp++) intoken (*sp) = TRUE;
1860  intoken('\0') = intoken('\n');
1861  for (sp = endtk; *sp != '\0'; sp++) endtoken (*sp) = TRUE;
1862  endtoken('\0') = endtoken('\n');
1863}
1864
1865/*
1866 * This routine opens the specified file and calls the function
1867 * which finds the function and type definitions.
1868 */
1869static void
1870find_entries (inf)
1871     FILE *inf;
1872{
1873  char *cp;
1874  language *lang = curfdp->lang;
1875  Lang_function *parser = NULL;
1876
1877  /* If user specified a language, use it. */
1878  if (lang != NULL && lang->function != NULL)
1879    {
1880      parser = lang->function;
1881    }
1882
1883  /* Else try to guess the language given the file name. */
1884  if (parser == NULL)
1885    {
1886      lang = get_language_from_filename (curfdp->infname, TRUE);
1887      if (lang != NULL && lang->function != NULL)
1888	{
1889	  curfdp->lang = lang;
1890	  parser = lang->function;
1891	}
1892    }
1893
1894  /* Else look for sharp-bang as the first two characters. */
1895  if (parser == NULL
1896      && readline_internal (&lb, inf) > 0
1897      && lb.len >= 2
1898      && lb.buffer[0] == '#'
1899      && lb.buffer[1] == '!')
1900    {
1901      char *lp;
1902
1903      /* Set lp to point at the first char after the last slash in the
1904         line or, if no slashes, at the first nonblank.  Then set cp to
1905	 the first successive blank and terminate the string. */
1906      lp = etags_strrchr (lb.buffer+2, '/');
1907      if (lp != NULL)
1908	lp += 1;
1909      else
1910	lp = skip_spaces (lb.buffer + 2);
1911      cp = skip_non_spaces (lp);
1912      *cp = '\0';
1913
1914      if (strlen (lp) > 0)
1915	{
1916	  lang = get_language_from_interpreter (lp);
1917	  if (lang != NULL && lang->function != NULL)
1918	    {
1919	      curfdp->lang = lang;
1920	      parser = lang->function;
1921	    }
1922	}
1923    }
1924
1925  /* We rewind here, even if inf may be a pipe.  We fail if the
1926     length of the first line is longer than the pipe block size,
1927     which is unlikely. */
1928  rewind (inf);
1929
1930  /* Else try to guess the language given the case insensitive file name. */
1931  if (parser == NULL)
1932    {
1933      lang = get_language_from_filename (curfdp->infname, FALSE);
1934      if (lang != NULL && lang->function != NULL)
1935	{
1936	  curfdp->lang = lang;
1937	  parser = lang->function;
1938	}
1939    }
1940
1941  /* Else try Fortran or C. */
1942  if (parser == NULL)
1943    {
1944      node *old_last_node = last_node;
1945
1946      curfdp->lang = get_language_from_langname ("fortran");
1947      find_entries (inf);
1948
1949      if (old_last_node == last_node)
1950	/* No Fortran entries found.  Try C. */
1951	{
1952	  /* We do not tag if rewind fails.
1953	     Only the file name will be recorded in the tags file. */
1954	  rewind (inf);
1955	  curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c");
1956	  find_entries (inf);
1957	}
1958      return;
1959    }
1960
1961  if (!no_line_directive
1962      && curfdp->lang != NULL && curfdp->lang->metasource)
1963    /* It may be that this is a bingo.y file, and we already parsed a bingo.c
1964       file, or anyway we parsed a file that is automatically generated from
1965       this one.  If this is the case, the bingo.c file contained #line
1966       directives that generated tags pointing to this file.  Let's delete
1967       them all before parsing this file, which is the real source. */
1968    {
1969      fdesc **fdpp = &fdhead;
1970      while (*fdpp != NULL)
1971	if (*fdpp != curfdp
1972	    && streq ((*fdpp)->taggedfname, curfdp->taggedfname))
1973	  /* We found one of those!  We must delete both the file description
1974	     and all tags referring to it. */
1975	  {
1976	    fdesc *badfdp = *fdpp;
1977
1978	    /* Delete the tags referring to badfdp->taggedfname
1979	       that were obtained from badfdp->infname. */
1980	    invalidate_nodes (badfdp, &nodehead);
1981
1982	    *fdpp = badfdp->next; /* remove the bad description from the list */
1983	    free_fdesc (badfdp);
1984	  }
1985	else
1986	  fdpp = &(*fdpp)->next; /* advance the list pointer */
1987    }
1988
1989  assert (parser != NULL);
1990
1991  /* Generic initialisations before reading from file. */
1992  linebuffer_setlen (&filebuf, 0); /* reset the file buffer */
1993
1994  /* Generic initialisations before parsing file with readline. */
1995  lineno = 0;		       /* reset global line number */
1996  charno = 0;		       /* reset global char number */
1997  linecharno = 0;	       /* reset global char number of line start */
1998
1999  parser (inf);
2000
2001  regex_tag_multiline ();
2002}
2003
2004
2005/*
2006 * Check whether an implicitly named tag should be created,
2007 * then call `pfnote'.
2008 * NAME is a string that is internally copied by this function.
2009 *
2010 * TAGS format specification
2011 * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
2012 * The following is explained in some more detail in etc/ETAGS.EBNF.
2013 *
2014 * make_tag creates tags with "implicit tag names" (unnamed tags)
2015 * if the following are all true, assuming NONAM=" \f\t\n\r()=,;":
2016 *  1. NAME does not contain any of the characters in NONAM;
2017 *  2. LINESTART contains name as either a rightmost, or rightmost but
2018 *     one character, substring;
2019 *  3. the character, if any, immediately before NAME in LINESTART must
2020 *     be a character in NONAM;
2021 *  4. the character, if any, immediately after NAME in LINESTART must
2022 *     also be a character in NONAM.
2023 *
2024 * The implementation uses the notinname() macro, which recognises the
2025 * characters stored in the string `nonam'.
2026 * etags.el needs to use the same characters that are in NONAM.
2027 */
2028static void
2029make_tag (name, namelen, is_func, linestart, linelen, lno, cno)
2030     char *name;		/* tag name, or NULL if unnamed */
2031     int namelen;		/* tag length */
2032     bool is_func;		/* tag is a function */
2033     char *linestart;		/* start of the line where tag is */
2034     int linelen;		/* length of the line where tag is */
2035     int lno;			/* line number */
2036     long cno;			/* character number */
2037{
2038  bool named = (name != NULL && namelen > 0);
2039
2040  if (!CTAGS && named)		/* maybe set named to false */
2041    /* Let's try to make an implicit tag name, that is, create an unnamed tag
2042       such that etags.el can guess a name from it. */
2043    {
2044      int i;
2045      register char *cp = name;
2046
2047      for (i = 0; i < namelen; i++)
2048	if (notinname (*cp++))
2049	  break;
2050      if (i == namelen)				/* rule #1 */
2051	{
2052	  cp = linestart + linelen - namelen;
2053	  if (notinname (linestart[linelen-1]))
2054	    cp -= 1;				/* rule #4 */
2055	  if (cp >= linestart			/* rule #2 */
2056	      && (cp == linestart
2057		  || notinname (cp[-1]))	/* rule #3 */
2058	      && strneq (name, cp, namelen))	/* rule #2 */
2059	    named = FALSE;	/* use implicit tag name */
2060	}
2061    }
2062
2063  if (named)
2064    name = savenstr (name, namelen);
2065  else
2066    name = NULL;
2067  pfnote (name, is_func, linestart, linelen, lno, cno);
2068}
2069
2070/* Record a tag. */
2071static void
2072pfnote (name, is_func, linestart, linelen, lno, cno)
2073     char *name;		/* tag name, or NULL if unnamed */
2074     bool is_func;		/* tag is a function */
2075     char *linestart;		/* start of the line where tag is */
2076     int linelen;		/* length of the line where tag is */
2077     int lno;			/* line number */
2078     long cno;			/* character number */
2079{
2080  register node *np;
2081
2082  assert (name == NULL || name[0] != '\0');
2083  if (CTAGS && name == NULL)
2084    return;
2085
2086  np = xnew (1, node);
2087
2088  /* If ctags mode, change name "main" to M<thisfilename>. */
2089  if (CTAGS && !cxref_style && streq (name, "main"))
2090    {
2091      register char *fp = etags_strrchr (curfdp->taggedfname, '/');
2092      np->name = concat ("M", fp == NULL ? curfdp->taggedfname : fp + 1, "");
2093      fp = etags_strrchr (np->name, '.');
2094      if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
2095	fp[0] = '\0';
2096    }
2097  else
2098    np->name = name;
2099  np->valid = TRUE;
2100  np->been_warned = FALSE;
2101  np->fdp = curfdp;
2102  np->is_func = is_func;
2103  np->lno = lno;
2104  if (np->fdp->usecharno)
2105    /* Our char numbers are 0-base, because of C language tradition?
2106       ctags compatibility?  old versions compatibility?   I don't know.
2107       Anyway, since emacs's are 1-base we expect etags.el to take care
2108       of the difference.  If we wanted to have 1-based numbers, we would
2109       uncomment the +1 below. */
2110    np->cno = cno /* + 1 */ ;
2111  else
2112    np->cno = invalidcharno;
2113  np->left = np->right = NULL;
2114  if (CTAGS && !cxref_style)
2115    {
2116      if (strlen (linestart) < 50)
2117	np->regex = concat (linestart, "$", "");
2118      else
2119	np->regex = savenstr (linestart, 50);
2120    }
2121  else
2122    np->regex = savenstr (linestart, linelen);
2123
2124  add_node (np, &nodehead);
2125}
2126
2127/*
2128 * free_tree ()
2129 *	recurse on left children, iterate on right children.
2130 */
2131static void
2132free_tree (np)
2133     register node *np;
2134{
2135  while (np)
2136    {
2137      register node *node_right = np->right;
2138      free_tree (np->left);
2139      if (np->name != NULL)
2140	free (np->name);
2141      free (np->regex);
2142      free (np);
2143      np = node_right;
2144    }
2145}
2146
2147/*
2148 * free_fdesc ()
2149 *	delete a file description
2150 */
2151static void
2152free_fdesc (fdp)
2153     register fdesc *fdp;
2154{
2155  if (fdp->infname != NULL) free (fdp->infname);
2156  if (fdp->infabsname != NULL) free (fdp->infabsname);
2157  if (fdp->infabsdir != NULL) free (fdp->infabsdir);
2158  if (fdp->taggedfname != NULL) free (fdp->taggedfname);
2159  if (fdp->prop != NULL) free (fdp->prop);
2160  free (fdp);
2161}
2162
2163/*
2164 * add_node ()
2165 *	Adds a node to the tree of nodes.  In etags mode, sort by file
2166 *  	name.  In ctags mode, sort by tag name.  Make no attempt at
2167 *    	balancing.
2168 *
2169 *	add_node is the only function allowed to add nodes, so it can
2170 *	maintain state.
2171 */
2172static void
2173add_node (np, cur_node_p)
2174     node *np, **cur_node_p;
2175{
2176  register int dif;
2177  register node *cur_node = *cur_node_p;
2178
2179  if (cur_node == NULL)
2180    {
2181      *cur_node_p = np;
2182      last_node = np;
2183      return;
2184    }
2185
2186  if (!CTAGS)
2187    /* Etags Mode */
2188    {
2189      /* For each file name, tags are in a linked sublist on the right
2190	 pointer.  The first tags of different files are a linked list
2191	 on the left pointer.  last_node points to the end of the last
2192	 used sublist. */
2193      if (last_node != NULL && last_node->fdp == np->fdp)
2194	{
2195	  /* Let's use the same sublist as the last added node. */
2196	  assert (last_node->right == NULL);
2197	  last_node->right = np;
2198	  last_node = np;
2199	}
2200      else if (cur_node->fdp == np->fdp)
2201	{
2202	  /* Scanning the list we found the head of a sublist which is
2203	     good for us.  Let's scan this sublist. */
2204	  add_node (np, &cur_node->right);
2205	}
2206      else
2207	/* The head of this sublist is not good for us.  Let's try the
2208	   next one. */
2209	add_node (np, &cur_node->left);
2210    } /* if ETAGS mode */
2211
2212  else
2213    {
2214      /* Ctags Mode */
2215      dif = strcmp (np->name, cur_node->name);
2216
2217      /*
2218       * If this tag name matches an existing one, then
2219       * do not add the node, but maybe print a warning.
2220       */
2221      if (no_duplicates && !dif)
2222	{
2223	  if (np->fdp == cur_node->fdp)
2224	    {
2225	      if (!no_warnings)
2226		{
2227		  fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n",
2228			   np->fdp->infname, lineno, np->name);
2229		  fprintf (stderr, "Second entry ignored\n");
2230		}
2231	    }
2232	  else if (!cur_node->been_warned && !no_warnings)
2233	    {
2234	      fprintf
2235		(stderr,
2236		 "Duplicate entry in files %s and %s: %s (Warning only)\n",
2237		 np->fdp->infname, cur_node->fdp->infname, np->name);
2238	      cur_node->been_warned = TRUE;
2239	    }
2240	  return;
2241	}
2242
2243      /* Actually add the node */
2244      add_node (np, dif < 0 ? &cur_node->left : &cur_node->right);
2245    } /* if CTAGS mode */
2246}
2247
2248/*
2249 * invalidate_nodes ()
2250 *	Scan the node tree and invalidate all nodes pointing to the
2251 *	given file description (CTAGS case) or free them (ETAGS case).
2252 */
2253static void
2254invalidate_nodes (badfdp, npp)
2255     fdesc *badfdp;
2256     node **npp;
2257{
2258  node *np = *npp;
2259
2260  if (np == NULL)
2261    return;
2262
2263  if (CTAGS)
2264    {
2265      if (np->left != NULL)
2266	invalidate_nodes (badfdp, &np->left);
2267      if (np->fdp == badfdp)
2268	np->valid = FALSE;
2269      if (np->right != NULL)
2270	invalidate_nodes (badfdp, &np->right);
2271    }
2272  else
2273    {
2274      assert (np->fdp != NULL);
2275      if (np->fdp == badfdp)
2276	{
2277	  *npp = np->left;	/* detach the sublist from the list */
2278	  np->left = NULL;	/* isolate it */
2279	  free_tree (np);	/* free it */
2280	  invalidate_nodes (badfdp, npp);
2281	}
2282      else
2283	invalidate_nodes (badfdp, &np->left);
2284    }
2285}
2286
2287
2288static int total_size_of_entries __P((node *));
2289static int number_len __P((long));
2290
2291/* Length of a non-negative number's decimal representation. */
2292static int
2293number_len (num)
2294     long num;
2295{
2296  int len = 1;
2297  while ((num /= 10) > 0)
2298    len += 1;
2299  return len;
2300}
2301
2302/*
2303 * Return total number of characters that put_entries will output for
2304 * the nodes in the linked list at the right of the specified node.
2305 * This count is irrelevant with etags.el since emacs 19.34 at least,
2306 * but is still supplied for backward compatibility.
2307 */
2308static int
2309total_size_of_entries (np)
2310     register node *np;
2311{
2312  register int total = 0;
2313
2314  for (; np != NULL; np = np->right)
2315    if (np->valid)
2316      {
2317	total += strlen (np->regex) + 1;		/* pat\177 */
2318	if (np->name != NULL)
2319	  total += strlen (np->name) + 1;		/* name\001 */
2320	total += number_len ((long) np->lno) + 1;	/* lno, */
2321	if (np->cno != invalidcharno)			/* cno */
2322	  total += number_len (np->cno);
2323	total += 1;					/* newline */
2324      }
2325
2326  return total;
2327}
2328
2329static void
2330put_entries (np)
2331     register node *np;
2332{
2333  register char *sp;
2334  static fdesc *fdp = NULL;
2335
2336  if (np == NULL)
2337    return;
2338
2339  /* Output subentries that precede this one */
2340  if (CTAGS)
2341    put_entries (np->left);
2342
2343  /* Output this entry */
2344  if (np->valid)
2345    {
2346      if (!CTAGS)
2347	{
2348	  /* Etags mode */
2349	  if (fdp != np->fdp)
2350	    {
2351	      fdp = np->fdp;
2352	      fprintf (tagf, "\f\n%s,%d\n",
2353		       fdp->taggedfname, total_size_of_entries (np));
2354	      fdp->written = TRUE;
2355	    }
2356	  fputs (np->regex, tagf);
2357	  fputc ('\177', tagf);
2358	  if (np->name != NULL)
2359	    {
2360	      fputs (np->name, tagf);
2361	      fputc ('\001', tagf);
2362	    }
2363	  fprintf (tagf, "%d,", np->lno);
2364	  if (np->cno != invalidcharno)
2365	    fprintf (tagf, "%ld", np->cno);
2366	  fputs ("\n", tagf);
2367	}
2368      else
2369	{
2370	  /* Ctags mode */
2371	  if (np->name == NULL)
2372	    error ("internal error: NULL name in ctags mode.", (char *)NULL);
2373
2374	  if (cxref_style)
2375	    {
2376	      if (vgrind_style)
2377		fprintf (stdout, "%s %s %d\n",
2378			 np->name, np->fdp->taggedfname, (np->lno + 63) / 64);
2379	      else
2380		fprintf (stdout, "%-16s %3d %-16s %s\n",
2381			 np->name, np->lno, np->fdp->taggedfname, np->regex);
2382	    }
2383	  else
2384	    {
2385	      fprintf (tagf, "%s\t%s\t", np->name, np->fdp->taggedfname);
2386
2387	      if (np->is_func)
2388		{		/* function or #define macro with args */
2389		  putc (searchar, tagf);
2390		  putc ('^', tagf);
2391
2392		  for (sp = np->regex; *sp; sp++)
2393		    {
2394		      if (*sp == '\\' || *sp == searchar)
2395			putc ('\\', tagf);
2396		      putc (*sp, tagf);
2397		    }
2398		  putc (searchar, tagf);
2399		}
2400	      else
2401		{		/* anything else; text pattern inadequate */
2402		  fprintf (tagf, "%d", np->lno);
2403		}
2404	      putc ('\n', tagf);
2405	    }
2406	}
2407    } /* if this node contains a valid tag */
2408
2409  /* Output subentries that follow this one */
2410  put_entries (np->right);
2411  if (!CTAGS)
2412    put_entries (np->left);
2413}
2414
2415
2416/* C extensions. */
2417#define C_EXT	0x00fff		/* C extensions */
2418#define C_PLAIN 0x00000		/* C */
2419#define C_PLPL	0x00001		/* C++ */
2420#define C_STAR	0x00003		/* C* */
2421#define C_JAVA	0x00005		/* JAVA */
2422#define C_AUTO  0x01000		/* C, but switch to C++ if `class' is met */
2423#define YACC	0x10000		/* yacc file */
2424
2425/*
2426 * The C symbol tables.
2427 */
2428enum sym_type
2429{
2430  st_none,
2431  st_C_objprot, st_C_objimpl, st_C_objend,
2432  st_C_gnumacro,
2433  st_C_ignore, st_C_attribute,
2434  st_C_javastruct,
2435  st_C_operator,
2436  st_C_class, st_C_template,
2437  st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef
2438};
2439
2440static unsigned int hash __P((const char *, unsigned int));
2441static struct C_stab_entry * in_word_set __P((const char *, unsigned int));
2442static enum sym_type C_symtype __P((char *, int, int));
2443
2444/* Feed stuff between (but not including) %[ and %] lines to:
2445     gperf -m 5
2446%[
2447%compare-strncmp
2448%enum
2449%struct-type
2450struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
2451%%
2452if,		0,			st_C_ignore
2453for,		0,			st_C_ignore
2454while,		0,			st_C_ignore
2455switch,		0,			st_C_ignore
2456return,		0,			st_C_ignore
2457__attribute__,	0,			st_C_attribute
2458@interface,	0,			st_C_objprot
2459@protocol,	0,			st_C_objprot
2460@implementation,0,			st_C_objimpl
2461@end,		0,			st_C_objend
2462import,		(C_JAVA & ~C_PLPL),	st_C_ignore
2463package,	(C_JAVA & ~C_PLPL),	st_C_ignore
2464friend,		C_PLPL,			st_C_ignore
2465extends,	(C_JAVA & ~C_PLPL),	st_C_javastruct
2466implements,	(C_JAVA & ~C_PLPL),	st_C_javastruct
2467interface,	(C_JAVA & ~C_PLPL),	st_C_struct
2468class,		0,			st_C_class
2469namespace,	C_PLPL,			st_C_struct
2470domain,		C_STAR,			st_C_struct
2471union,		0,			st_C_struct
2472struct,		0,			st_C_struct
2473extern,		0,			st_C_extern
2474enum,		0,			st_C_enum
2475typedef,	0,			st_C_typedef
2476define,		0,			st_C_define
2477undef,		0,			st_C_define
2478operator,	C_PLPL,			st_C_operator
2479template,	0,			st_C_template
2480# DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
2481DEFUN,		0,			st_C_gnumacro
2482SYSCALL,	0,			st_C_gnumacro
2483ENTRY,		0,			st_C_gnumacro
2484PSEUDO,		0,			st_C_gnumacro
2485# These are defined inside C functions, so currently they are not met.
2486# EXFUN used in glibc, DEFVAR_* in emacs.
2487#EXFUN,		0,			st_C_gnumacro
2488#DEFVAR_,	0,			st_C_gnumacro
2489%]
2490and replace lines between %< and %> with its output, then:
2491 - remove the #if characterset check
2492 - make in_word_set static and not inline. */
2493/*%<*/
2494/* C code produced by gperf version 3.0.1 */
2495/* Command-line: gperf -m 5  */
2496/* Computed positions: -k'2-3' */
2497
2498struct C_stab_entry { char *name; int c_ext; enum sym_type type; };
2499/* maximum key range = 33, duplicates = 0 */
2500
2501#ifdef __GNUC__
2502__inline
2503#else
2504#ifdef __cplusplus
2505inline
2506#endif
2507#endif
2508static unsigned int
2509hash (str, len)
2510     register const char *str;
2511     register unsigned int len;
2512{
2513  static unsigned char asso_values[] =
2514    {
2515      35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2516      35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2517      35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2518      35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2519      35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2520      35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2521      35, 35, 35, 35, 35, 35, 35, 35, 35, 15,
2522      14, 35, 35, 35, 35, 35, 35, 35, 14, 35,
2523      35, 35, 35, 12, 13, 35, 35, 35, 35, 12,
2524      35, 35, 35, 35, 35,  1, 35, 16, 35,  6,
2525      23,  0,  0, 35, 22,  0, 35, 35,  5,  0,
2526       0, 15,  1, 35,  6, 35,  8, 19, 35, 16,
2527       4,  5, 35, 35, 35, 35, 35, 35, 35, 35,
2528      35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2529      35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2530      35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2531      35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2532      35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2533      35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2534      35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2535      35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2536      35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2537      35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2538      35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2539      35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2540      35, 35, 35, 35, 35, 35
2541    };
2542  register int hval = len;
2543
2544  switch (hval)
2545    {
2546      default:
2547        hval += asso_values[(unsigned char)str[2]];
2548      /*FALLTHROUGH*/
2549      case 2:
2550        hval += asso_values[(unsigned char)str[1]];
2551        break;
2552    }
2553  return hval;
2554}
2555
2556static struct C_stab_entry *
2557in_word_set (str, len)
2558     register const char *str;
2559     register unsigned int len;
2560{
2561  enum
2562    {
2563      TOTAL_KEYWORDS = 32,
2564      MIN_WORD_LENGTH = 2,
2565      MAX_WORD_LENGTH = 15,
2566      MIN_HASH_VALUE = 2,
2567      MAX_HASH_VALUE = 34
2568    };
2569
2570  static struct C_stab_entry wordlist[] =
2571    {
2572      {""}, {""},
2573      {"if",		0,			st_C_ignore},
2574      {""},
2575      {"@end",		0,			st_C_objend},
2576      {"union",		0,			st_C_struct},
2577      {"define",		0,			st_C_define},
2578      {"import",		(C_JAVA & ~C_PLPL),	st_C_ignore},
2579      {"template",	0,			st_C_template},
2580      {"operator",	C_PLPL,			st_C_operator},
2581      {"@interface",	0,			st_C_objprot},
2582      {"implements",	(C_JAVA & ~C_PLPL),	st_C_javastruct},
2583      {"friend",		C_PLPL,			st_C_ignore},
2584      {"typedef",	0,			st_C_typedef},
2585      {"return",		0,			st_C_ignore},
2586      {"@implementation",0,			st_C_objimpl},
2587      {"@protocol",	0,			st_C_objprot},
2588      {"interface",	(C_JAVA & ~C_PLPL),	st_C_struct},
2589      {"extern",		0,			st_C_extern},
2590      {"extends",	(C_JAVA & ~C_PLPL),	st_C_javastruct},
2591      {"struct",		0,			st_C_struct},
2592      {"domain",		C_STAR,			st_C_struct},
2593      {"switch",		0,			st_C_ignore},
2594      {"enum",		0,			st_C_enum},
2595      {"for",		0,			st_C_ignore},
2596      {"namespace",	C_PLPL,			st_C_struct},
2597      {"class",		0,			st_C_class},
2598      {"while",		0,			st_C_ignore},
2599      {"undef",		0,			st_C_define},
2600      {"package",	(C_JAVA & ~C_PLPL),	st_C_ignore},
2601      {"__attribute__",	0,			st_C_attribute},
2602      {"SYSCALL",	0,			st_C_gnumacro},
2603      {"ENTRY",		0,			st_C_gnumacro},
2604      {"PSEUDO",		0,			st_C_gnumacro},
2605      {"DEFUN",		0,			st_C_gnumacro}
2606    };
2607
2608  if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2609    {
2610      register int key = hash (str, len);
2611
2612      if (key <= MAX_HASH_VALUE && key >= 0)
2613        {
2614          register const char *s = wordlist[key].name;
2615
2616          if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0')
2617            return &wordlist[key];
2618        }
2619    }
2620  return 0;
2621}
2622/*%>*/
2623
2624static enum sym_type
2625C_symtype (str, len, c_ext)
2626     char *str;
2627     int len;
2628     int c_ext;
2629{
2630  register struct C_stab_entry *se = in_word_set (str, len);
2631
2632  if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2633    return st_none;
2634  return se->type;
2635}
2636
2637
2638/*
2639 * Ignoring __attribute__ ((list))
2640 */
2641static bool inattribute;	/* looking at an __attribute__ construct */
2642
2643/*
2644 * C functions and variables are recognized using a simple
2645 * finite automaton.  fvdef is its state variable.
2646 */
2647static enum
2648{
2649  fvnone,			/* nothing seen */
2650  fdefunkey,			/* Emacs DEFUN keyword seen */
2651  fdefunname,			/* Emacs DEFUN name seen */
2652  foperator,			/* func: operator keyword seen (cplpl) */
2653  fvnameseen,			/* function or variable name seen */
2654  fstartlist,			/* func: just after open parenthesis */
2655  finlist,			/* func: in parameter list */
2656  flistseen,			/* func: after parameter list */
2657  fignore,			/* func: before open brace */
2658  vignore			/* var-like: ignore until ';' */
2659} fvdef;
2660
2661static bool fvextern;		/* func or var: extern keyword seen; */
2662
2663/*
2664 * typedefs are recognized using a simple finite automaton.
2665 * typdef is its state variable.
2666 */
2667static enum
2668{
2669  tnone,			/* nothing seen */
2670  tkeyseen,			/* typedef keyword seen */
2671  ttypeseen,			/* defined type seen */
2672  tinbody,			/* inside typedef body */
2673  tend,				/* just before typedef tag */
2674  tignore			/* junk after typedef tag */
2675} typdef;
2676
2677/*
2678 * struct-like structures (enum, struct and union) are recognized
2679 * using another simple finite automaton.  `structdef' is its state
2680 * variable.
2681 */
2682static enum
2683{
2684  snone,			/* nothing seen yet,
2685				   or in struct body if bracelev > 0 */
2686  skeyseen,			/* struct-like keyword seen */
2687  stagseen,			/* struct-like tag seen */
2688  scolonseen			/* colon seen after struct-like tag */
2689} structdef;
2690
2691/*
2692 * When objdef is different from onone, objtag is the name of the class.
2693 */
2694static char *objtag = "<uninited>";
2695
2696/*
2697 * Yet another little state machine to deal with preprocessor lines.
2698 */
2699static enum
2700{
2701  dnone,			/* nothing seen */
2702  dsharpseen,			/* '#' seen as first char on line */
2703  ddefineseen,			/* '#' and 'define' seen */
2704  dignorerest			/* ignore rest of line */
2705} definedef;
2706
2707/*
2708 * State machine for Objective C protocols and implementations.
2709 * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2710 */
2711static enum
2712{
2713  onone,			/* nothing seen */
2714  oprotocol,			/* @interface or @protocol seen */
2715  oimplementation,		/* @implementations seen */
2716  otagseen,			/* class name seen */
2717  oparenseen,			/* parenthesis before category seen */
2718  ocatseen,			/* category name seen */
2719  oinbody,			/* in @implementation body */
2720  omethodsign,			/* in @implementation body, after +/- */
2721  omethodtag,			/* after method name */
2722  omethodcolon,			/* after method colon */
2723  omethodparm,			/* after method parameter */
2724  oignore			/* wait for @end */
2725} objdef;
2726
2727
2728/*
2729 * Use this structure to keep info about the token read, and how it
2730 * should be tagged.  Used by the make_C_tag function to build a tag.
2731 */
2732static struct tok
2733{
2734  char *line;			/* string containing the token */
2735  int offset;			/* where the token starts in LINE */
2736  int length;			/* token length */
2737  /*
2738    The previous members can be used to pass strings around for generic
2739    purposes.  The following ones specifically refer to creating tags.  In this
2740    case the token contained here is the pattern that will be used to create a
2741    tag.
2742  */
2743  bool valid;			/* do not create a tag; the token should be
2744				   invalidated whenever a state machine is
2745				   reset prematurely */
2746  bool named;			/* create a named tag */
2747  int lineno;			/* source line number of tag */
2748  long linepos;			/* source char number of tag */
2749} token;			/* latest token read */
2750
2751/*
2752 * Variables and functions for dealing with nested structures.
2753 * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2754 */
2755static void pushclass_above __P((int, char *, int));
2756static void popclass_above __P((int));
2757static void write_classname __P((linebuffer *, char *qualifier));
2758
2759static struct {
2760  char **cname;			/* nested class names */
2761  int *bracelev;		/* nested class brace level */
2762  int nl;			/* class nesting level (elements used) */
2763  int size;			/* length of the array */
2764} cstack;			/* stack for nested declaration tags */
2765/* Current struct nesting depth (namespace, class, struct, union, enum). */
2766#define nestlev		(cstack.nl)
2767/* After struct keyword or in struct body, not inside a nested function. */
2768#define instruct	(structdef == snone && nestlev > 0			\
2769			 && bracelev == cstack.bracelev[nestlev-1] + 1)
2770
2771static void
2772pushclass_above (bracelev, str, len)
2773     int bracelev;
2774     char *str;
2775     int len;
2776{
2777  int nl;
2778
2779  popclass_above (bracelev);
2780  nl = cstack.nl;
2781  if (nl >= cstack.size)
2782    {
2783      int size = cstack.size *= 2;
2784      xrnew (cstack.cname, size, char *);
2785      xrnew (cstack.bracelev, size, int);
2786    }
2787  assert (nl == 0 || cstack.bracelev[nl-1] < bracelev);
2788  cstack.cname[nl] = (str == NULL) ? NULL : savenstr (str, len);
2789  cstack.bracelev[nl] = bracelev;
2790  cstack.nl = nl + 1;
2791}
2792
2793static void
2794popclass_above (bracelev)
2795     int bracelev;
2796{
2797  int nl;
2798
2799  for (nl = cstack.nl - 1;
2800       nl >= 0 && cstack.bracelev[nl] >= bracelev;
2801       nl--)
2802    {
2803      if (cstack.cname[nl] != NULL)
2804	free (cstack.cname[nl]);
2805      cstack.nl = nl;
2806    }
2807}
2808
2809static void
2810write_classname (cn, qualifier)
2811     linebuffer *cn;
2812     char *qualifier;
2813{
2814  int i, len;
2815  int qlen = strlen (qualifier);
2816
2817  if (cstack.nl == 0 || cstack.cname[0] == NULL)
2818    {
2819      len = 0;
2820      cn->len = 0;
2821      cn->buffer[0] = '\0';
2822    }
2823  else
2824    {
2825      len = strlen (cstack.cname[0]);
2826      linebuffer_setlen (cn, len);
2827      strcpy (cn->buffer, cstack.cname[0]);
2828    }
2829  for (i = 1; i < cstack.nl; i++)
2830    {
2831      char *s;
2832      int slen;
2833
2834      s = cstack.cname[i];
2835      if (s == NULL)
2836	continue;
2837      slen = strlen (s);
2838      len += slen + qlen;
2839      linebuffer_setlen (cn, len);
2840      strncat (cn->buffer, qualifier, qlen);
2841      strncat (cn->buffer, s, slen);
2842    }
2843}
2844
2845
2846static bool consider_token __P((char *, int, int, int *, int, int, bool *));
2847static void make_C_tag __P((bool));
2848
2849/*
2850 * consider_token ()
2851 *	checks to see if the current token is at the start of a
2852 *	function or variable, or corresponds to a typedef, or
2853 * 	is a struct/union/enum tag, or #define, or an enum constant.
2854 *
2855 *	*IS_FUNC gets TRUE iff the token is a function or #define macro
2856 *	with args.  C_EXTP points to which language we are looking at.
2857 *
2858 * Globals
2859 *	fvdef			IN OUT
2860 *	structdef		IN OUT
2861 *	definedef		IN OUT
2862 *	typdef			IN OUT
2863 *	objdef			IN OUT
2864 */
2865
2866static bool
2867consider_token (str, len, c, c_extp, bracelev, parlev, is_func_or_var)
2868     register char *str;	/* IN: token pointer */
2869     register int len;		/* IN: token length */
2870     register int c;		/* IN: first char after the token */
2871     int *c_extp;		/* IN, OUT: C extensions mask */
2872     int bracelev;		/* IN: brace level */
2873     int parlev;		/* IN: parenthesis level */
2874     bool *is_func_or_var;	/* OUT: function or variable found */
2875{
2876  /* When structdef is stagseen, scolonseen, or snone with bracelev > 0,
2877     structtype is the type of the preceding struct-like keyword, and
2878     structbracelev is the brace level where it has been seen. */
2879  static enum sym_type structtype;
2880  static int structbracelev;
2881  static enum sym_type toktype;
2882
2883
2884  toktype = C_symtype (str, len, *c_extp);
2885
2886  /*
2887   * Skip __attribute__
2888   */
2889  if (toktype == st_C_attribute)
2890    {
2891      inattribute = TRUE;
2892      return FALSE;
2893     }
2894
2895   /*
2896    * Advance the definedef state machine.
2897    */
2898   switch (definedef)
2899     {
2900     case dnone:
2901       /* We're not on a preprocessor line. */
2902       if (toktype == st_C_gnumacro)
2903	 {
2904	   fvdef = fdefunkey;
2905	   return FALSE;
2906	 }
2907       break;
2908     case dsharpseen:
2909       if (toktype == st_C_define)
2910	 {
2911	   definedef = ddefineseen;
2912	 }
2913       else
2914	 {
2915	   definedef = dignorerest;
2916	 }
2917       return FALSE;
2918     case ddefineseen:
2919       /*
2920	* Make a tag for any macro, unless it is a constant
2921	* and constantypedefs is FALSE.
2922	*/
2923       definedef = dignorerest;
2924       *is_func_or_var = (c == '(');
2925       if (!*is_func_or_var && !constantypedefs)
2926	 return FALSE;
2927       else
2928	 return TRUE;
2929     case dignorerest:
2930       return FALSE;
2931     default:
2932       error ("internal error: definedef value.", (char *)NULL);
2933     }
2934
2935   /*
2936    * Now typedefs
2937    */
2938   switch (typdef)
2939     {
2940     case tnone:
2941       if (toktype == st_C_typedef)
2942	 {
2943	   if (typedefs)
2944	     typdef = tkeyseen;
2945	   fvextern = FALSE;
2946	   fvdef = fvnone;
2947	   return FALSE;
2948	 }
2949       break;
2950     case tkeyseen:
2951       switch (toktype)
2952	 {
2953	 case st_none:
2954	 case st_C_class:
2955	 case st_C_struct:
2956	 case st_C_enum:
2957	   typdef = ttypeseen;
2958	 }
2959       break;
2960     case ttypeseen:
2961       if (structdef == snone && fvdef == fvnone)
2962	 {
2963	   fvdef = fvnameseen;
2964	   return TRUE;
2965	 }
2966       break;
2967     case tend:
2968       switch (toktype)
2969	 {
2970	 case st_C_class:
2971	 case st_C_struct:
2972	 case st_C_enum:
2973	   return FALSE;
2974	 }
2975       return TRUE;
2976     }
2977
2978   /*
2979    * This structdef business is NOT invoked when we are ctags and the
2980    * file is plain C.  This is because a struct tag may have the same
2981    * name as another tag, and this loses with ctags.
2982    */
2983   switch (toktype)
2984     {
2985     case st_C_javastruct:
2986       if (structdef == stagseen)
2987	 structdef = scolonseen;
2988       return FALSE;
2989     case st_C_template:
2990     case st_C_class:
2991       if ((*c_extp & C_AUTO)	/* automatic detection of C++ language */
2992	   && bracelev == 0
2993	   && definedef == dnone && structdef == snone
2994	   && typdef == tnone && fvdef == fvnone)
2995	 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2996       if (toktype == st_C_template)
2997	 break;
2998       /* FALLTHRU */
2999     case st_C_struct:
3000     case st_C_enum:
3001       if (parlev == 0
3002	   && fvdef != vignore
3003	   && (typdef == tkeyseen
3004	       || (typedefs_or_cplusplus && structdef == snone)))
3005	 {
3006	   structdef = skeyseen;
3007	   structtype = toktype;
3008	   structbracelev = bracelev;
3009	   if (fvdef == fvnameseen)
3010	     fvdef = fvnone;
3011	 }
3012       return FALSE;
3013     }
3014
3015   if (structdef == skeyseen)
3016     {
3017       structdef = stagseen;
3018       return TRUE;
3019     }
3020
3021   if (typdef != tnone)
3022     definedef = dnone;
3023
3024   /* Detect Objective C constructs. */
3025   switch (objdef)
3026     {
3027     case onone:
3028       switch (toktype)
3029	 {
3030	 case st_C_objprot:
3031	   objdef = oprotocol;
3032	   return FALSE;
3033	 case st_C_objimpl:
3034	   objdef = oimplementation;
3035	   return FALSE;
3036	 }
3037       break;
3038     case oimplementation:
3039       /* Save the class tag for functions or variables defined inside. */
3040       objtag = savenstr (str, len);
3041       objdef = oinbody;
3042       return FALSE;
3043     case oprotocol:
3044       /* Save the class tag for categories. */
3045       objtag = savenstr (str, len);
3046       objdef = otagseen;
3047       *is_func_or_var = TRUE;
3048       return TRUE;
3049     case oparenseen:
3050       objdef = ocatseen;
3051       *is_func_or_var = TRUE;
3052       return TRUE;
3053     case oinbody:
3054       break;
3055     case omethodsign:
3056       if (parlev == 0)
3057	 {
3058	   fvdef = fvnone;
3059	   objdef = omethodtag;
3060	   linebuffer_setlen (&token_name, len);
3061	   strncpy (token_name.buffer, str, len);
3062	   token_name.buffer[len] = '\0';
3063	   return TRUE;
3064	 }
3065       return FALSE;
3066     case omethodcolon:
3067       if (parlev == 0)
3068	 objdef = omethodparm;
3069       return FALSE;
3070     case omethodparm:
3071       if (parlev == 0)
3072	 {
3073	   fvdef = fvnone;
3074	   objdef = omethodtag;
3075	   linebuffer_setlen (&token_name, token_name.len + len);
3076	   strncat (token_name.buffer, str, len);
3077	   return TRUE;
3078	 }
3079       return FALSE;
3080     case oignore:
3081       if (toktype == st_C_objend)
3082	 {
3083	   /* Memory leakage here: the string pointed by objtag is
3084	      never released, because many tests would be needed to
3085	      avoid breaking on incorrect input code.  The amount of
3086	      memory leaked here is the sum of the lengths of the
3087	      class tags.
3088	   free (objtag); */
3089	   objdef = onone;
3090	 }
3091       return FALSE;
3092     }
3093
3094   /* A function, variable or enum constant? */
3095   switch (toktype)
3096     {
3097     case st_C_extern:
3098       fvextern = TRUE;
3099       switch  (fvdef)
3100	 {
3101	 case finlist:
3102	 case flistseen:
3103	 case fignore:
3104	 case vignore:
3105	   break;
3106	 default:
3107	   fvdef = fvnone;
3108	 }
3109       return FALSE;
3110     case st_C_ignore:
3111       fvextern = FALSE;
3112       fvdef = vignore;
3113       return FALSE;
3114     case st_C_operator:
3115       fvdef = foperator;
3116       *is_func_or_var = TRUE;
3117       return TRUE;
3118     case st_none:
3119       if (constantypedefs
3120	   && structdef == snone
3121	   && structtype == st_C_enum && bracelev > structbracelev)
3122	 return TRUE;		/* enum constant */
3123       switch (fvdef)
3124	 {
3125	 case fdefunkey:
3126	   if (bracelev > 0)
3127	     break;
3128	   fvdef = fdefunname;	/* GNU macro */
3129	   *is_func_or_var = TRUE;
3130	   return TRUE;
3131	 case fvnone:
3132	   switch (typdef)
3133	     {
3134	     case ttypeseen:
3135	       return FALSE;
3136	     case tnone:
3137	       if ((strneq (str, "asm", 3) && endtoken (str[3]))
3138		   || (strneq (str, "__asm__", 7) && endtoken (str[7])))
3139		 {
3140		   fvdef = vignore;
3141		   return FALSE;
3142		 }
3143	       break;
3144	     }
3145	  /* FALLTHRU */
3146	  case fvnameseen:
3147	  if (len >= 10 && strneq (str+len-10, "::operator", 10))
3148	    {
3149	      if (*c_extp & C_AUTO) /* automatic detection of C++ */
3150		*c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
3151	      fvdef = foperator;
3152	      *is_func_or_var = TRUE;
3153	      return TRUE;
3154	    }
3155	  if (bracelev > 0 && !instruct)
3156	    break;
3157	  fvdef = fvnameseen;	/* function or variable */
3158	  *is_func_or_var = TRUE;
3159	  return TRUE;
3160	}
3161      break;
3162    }
3163
3164  return FALSE;
3165}
3166
3167
3168/*
3169 * C_entries often keeps pointers to tokens or lines which are older than
3170 * the line currently read.  By keeping two line buffers, and switching
3171 * them at end of line, it is possible to use those pointers.
3172 */
3173static struct
3174{
3175  long linepos;
3176  linebuffer lb;
3177} lbs[2];
3178
3179#define current_lb_is_new (newndx == curndx)
3180#define switch_line_buffers() (curndx = 1 - curndx)
3181
3182#define curlb (lbs[curndx].lb)
3183#define newlb (lbs[newndx].lb)
3184#define curlinepos (lbs[curndx].linepos)
3185#define newlinepos (lbs[newndx].linepos)
3186
3187#define plainc ((c_ext & C_EXT) == C_PLAIN)
3188#define cplpl (c_ext & C_PLPL)
3189#define cjava ((c_ext & C_JAVA) == C_JAVA)
3190
3191#define CNL_SAVE_DEFINEDEF()						\
3192do {									\
3193  curlinepos = charno;							\
3194  readline (&curlb, inf);						\
3195  lp = curlb.buffer;							\
3196  quotednl = FALSE;							\
3197  newndx = curndx;							\
3198} while (0)
3199
3200#define CNL()								\
3201do {									\
3202  CNL_SAVE_DEFINEDEF();							\
3203  if (savetoken.valid)							\
3204    {									\
3205      token = savetoken;						\
3206      savetoken.valid = FALSE;						\
3207    }									\
3208  definedef = dnone;							\
3209} while (0)
3210
3211
3212static void
3213make_C_tag (isfun)
3214     bool isfun;
3215{
3216  /* This function is never called when token.valid is FALSE, but
3217     we must protect against invalid input or internal errors. */
3218  if (!DEBUG && !token.valid)
3219    return;
3220
3221  if (token.valid)
3222    make_tag (token_name.buffer, token_name.len, isfun, token.line,
3223	      token.offset+token.length+1, token.lineno, token.linepos);
3224  else				/* this case is optimised away if !DEBUG */
3225    make_tag (concat ("INVALID TOKEN:-->", token_name.buffer, ""),
3226	      token_name.len + 17, isfun, token.line,
3227	      token.offset+token.length+1, token.lineno, token.linepos);
3228
3229  token.valid = FALSE;
3230}
3231
3232
3233/*
3234 * C_entries ()
3235 *	This routine finds functions, variables, typedefs,
3236 * 	#define's, enum constants and struct/union/enum definitions in
3237 * 	C syntax and adds them to the list.
3238 */
3239static void
3240C_entries (c_ext, inf)
3241     int c_ext;			/* extension of C */
3242     FILE *inf;			/* input file */
3243{
3244  register char c;		/* latest char read; '\0' for end of line */
3245  register char *lp;		/* pointer one beyond the character `c' */
3246  int curndx, newndx;		/* indices for current and new lb */
3247  register int tokoff;		/* offset in line of start of current token */
3248  register int toklen;		/* length of current token */
3249  char *qualifier;		/* string used to qualify names */
3250  int qlen;			/* length of qualifier */
3251  int bracelev;			/* current brace level */
3252  int bracketlev;		/* current bracket level */
3253  int parlev;			/* current parenthesis level */
3254  int attrparlev;		/* __attribute__ parenthesis level */
3255  int templatelev;		/* current template level */
3256  int typdefbracelev;		/* bracelev where a typedef struct body begun */
3257  bool incomm, inquote, inchar, quotednl, midtoken;
3258  bool yacc_rules;		/* in the rules part of a yacc file */
3259  struct tok savetoken = {0};	/* token saved during preprocessor handling */
3260
3261
3262  linebuffer_init (&lbs[0].lb);
3263  linebuffer_init (&lbs[1].lb);
3264  if (cstack.size == 0)
3265    {
3266      cstack.size = (DEBUG) ? 1 : 4;
3267      cstack.nl = 0;
3268      cstack.cname = xnew (cstack.size, char *);
3269      cstack.bracelev = xnew (cstack.size, int);
3270    }
3271
3272  tokoff = toklen = typdefbracelev = 0; /* keep compiler quiet */
3273  curndx = newndx = 0;
3274  lp = curlb.buffer;
3275  *lp = 0;
3276
3277  fvdef = fvnone; fvextern = FALSE; typdef = tnone;
3278  structdef = snone; definedef = dnone; objdef = onone;
3279  yacc_rules = FALSE;
3280  midtoken = inquote = inchar = incomm = quotednl = FALSE;
3281  token.valid = savetoken.valid = FALSE;
3282  bracelev = bracketlev = parlev = attrparlev = templatelev = 0;
3283  if (cjava)
3284    { qualifier = "."; qlen = 1; }
3285  else
3286    { qualifier = "::"; qlen = 2; }
3287
3288
3289  while (!feof (inf))
3290    {
3291      c = *lp++;
3292      if (c == '\\')
3293	{
3294	  /* If we are at the end of the line, the next character is a
3295	     '\0'; do not skip it, because it is what tells us
3296	     to read the next line.  */
3297	  if (*lp == '\0')
3298	    {
3299	      quotednl = TRUE;
3300	      continue;
3301	    }
3302	  lp++;
3303	  c = ' ';
3304	}
3305      else if (incomm)
3306	{
3307	  switch (c)
3308	    {
3309	    case '*':
3310	      if (*lp == '/')
3311		{
3312		  c = *lp++;
3313		  incomm = FALSE;
3314		}
3315	      break;
3316	    case '\0':
3317	      /* Newlines inside comments do not end macro definitions in
3318		 traditional cpp. */
3319	      CNL_SAVE_DEFINEDEF ();
3320	      break;
3321	    }
3322	  continue;
3323	}
3324      else if (inquote)
3325	{
3326	  switch (c)
3327	    {
3328	    case '"':
3329	      inquote = FALSE;
3330	      break;
3331	    case '\0':
3332	      /* Newlines inside strings do not end macro definitions
3333		 in traditional cpp, even though compilers don't
3334		 usually accept them. */
3335	      CNL_SAVE_DEFINEDEF ();
3336	      break;
3337	    }
3338	  continue;
3339	}
3340      else if (inchar)
3341	{
3342	  switch (c)
3343	    {
3344	    case '\0':
3345	      /* Hmmm, something went wrong. */
3346	      CNL ();
3347	      /* FALLTHRU */
3348	    case '\'':
3349	      inchar = FALSE;
3350	      break;
3351	    }
3352	  continue;
3353	}
3354      else if (bracketlev > 0)
3355	{
3356	  switch (c)
3357	    {
3358	    case ']':
3359	      if (--bracketlev > 0)
3360		continue;
3361	      break;
3362	    case '\0':
3363	      CNL_SAVE_DEFINEDEF ();
3364	      break;
3365	    }
3366	  continue;
3367	}
3368      else switch (c)
3369	{
3370	case '"':
3371	  inquote = TRUE;
3372	  if (inattribute)
3373	    break;
3374	  switch (fvdef)
3375	    {
3376	    case fdefunkey:
3377	    case fstartlist:
3378	    case finlist:
3379	    case fignore:
3380	    case vignore:
3381	      break;
3382	    default:
3383	      fvextern = FALSE;
3384	      fvdef = fvnone;
3385	    }
3386	  continue;
3387	case '\'':
3388	  inchar = TRUE;
3389	  if (inattribute)
3390	    break;
3391	  if (fvdef != finlist && fvdef != fignore && fvdef !=vignore)
3392	    {
3393	      fvextern = FALSE;
3394	      fvdef = fvnone;
3395	    }
3396	  continue;
3397	case '/':
3398	  if (*lp == '*')
3399	    {
3400	      lp++;
3401	      incomm = TRUE;
3402	      continue;
3403	    }
3404	  else if (/* cplpl && */ *lp == '/')
3405	    {
3406	      c = '\0';
3407	      break;
3408	    }
3409	  else
3410	    break;
3411	case '%':
3412	  if ((c_ext & YACC) && *lp == '%')
3413	    {
3414	      /* Entering or exiting rules section in yacc file. */
3415	      lp++;
3416	      definedef = dnone; fvdef = fvnone; fvextern = FALSE;
3417	      typdef = tnone; structdef = snone;
3418	      midtoken = inquote = inchar = incomm = quotednl = FALSE;
3419	      bracelev = 0;
3420	      yacc_rules = !yacc_rules;
3421	      continue;
3422	    }
3423	  else
3424	    break;
3425	case '#':
3426	  if (definedef == dnone)
3427	    {
3428	      char *cp;
3429	      bool cpptoken = TRUE;
3430
3431	      /* Look back on this line.  If all blanks, or nonblanks
3432		 followed by an end of comment, this is a preprocessor
3433		 token. */
3434	      for (cp = newlb.buffer; cp < lp-1; cp++)
3435		if (!iswhite (*cp))
3436		  {
3437		    if (*cp == '*' && *(cp+1) == '/')
3438		      {
3439			cp++;
3440			cpptoken = TRUE;
3441		      }
3442		    else
3443		      cpptoken = FALSE;
3444		  }
3445	      if (cpptoken)
3446		definedef = dsharpseen;
3447	    } /* if (definedef == dnone) */
3448	  continue;
3449	case '[':
3450	  bracketlev++;
3451	    continue;
3452	} /* switch (c) */
3453
3454
3455      /* Consider token only if some involved conditions are satisfied. */
3456      if (typdef != tignore
3457	  && definedef != dignorerest
3458	  && fvdef != finlist
3459	  && templatelev == 0
3460	  && (definedef != dnone
3461	      || structdef != scolonseen)
3462	  && !inattribute)
3463	{
3464	  if (midtoken)
3465	    {
3466	      if (endtoken (c))
3467		{
3468		  if (c == ':' && *lp == ':' && begtoken (lp[1]))
3469		    /* This handles :: in the middle,
3470		       but not at the beginning of an identifier.
3471		       Also, space-separated :: is not recognised. */
3472		    {
3473		      if (c_ext & C_AUTO) /* automatic detection of C++ */
3474			c_ext = (c_ext | C_PLPL) & ~C_AUTO;
3475		      lp += 2;
3476		      toklen += 2;
3477		      c = lp[-1];
3478		      goto still_in_token;
3479		    }
3480		  else
3481		    {
3482		      bool funorvar = FALSE;
3483
3484		      if (yacc_rules
3485			  || consider_token (newlb.buffer + tokoff, toklen, c,
3486					     &c_ext, bracelev, parlev,
3487					     &funorvar))
3488			{
3489			  if (fvdef == foperator)
3490			    {
3491			      char *oldlp = lp;
3492			      lp = skip_spaces (lp-1);
3493			      if (*lp != '\0')
3494				lp += 1;
3495			      while (*lp != '\0'
3496				     && !iswhite (*lp) && *lp != '(')
3497				lp += 1;
3498			      c = *lp++;
3499			      toklen += lp - oldlp;
3500			    }
3501			  token.named = FALSE;
3502			  if (!plainc
3503			      && nestlev > 0 && definedef == dnone)
3504			    /* in struct body */
3505			    {
3506                              write_classname (&token_name, qualifier);
3507			      linebuffer_setlen (&token_name,
3508						 token_name.len+qlen+toklen);
3509			      strcat (token_name.buffer, qualifier);
3510			      strncat (token_name.buffer,
3511				       newlb.buffer + tokoff, toklen);
3512			      token.named = TRUE;
3513			    }
3514			  else if (objdef == ocatseen)
3515			    /* Objective C category */
3516			    {
3517			      int len = strlen (objtag) + 2 + toklen;
3518			      linebuffer_setlen (&token_name, len);
3519			      strcpy (token_name.buffer, objtag);
3520			      strcat (token_name.buffer, "(");
3521			      strncat (token_name.buffer,
3522				       newlb.buffer + tokoff, toklen);
3523			      strcat (token_name.buffer, ")");
3524			      token.named = TRUE;
3525			    }
3526			  else if (objdef == omethodtag
3527				   || objdef == omethodparm)
3528			    /* Objective C method */
3529			    {
3530			      token.named = TRUE;
3531			    }
3532			  else if (fvdef == fdefunname)
3533			    /* GNU DEFUN and similar macros */
3534			    {
3535			      bool defun = (newlb.buffer[tokoff] == 'F');
3536			      int off = tokoff;
3537			      int len = toklen;
3538
3539			      /* Rewrite the tag so that emacs lisp DEFUNs
3540				 can be found by their elisp name */
3541			      if (defun)
3542				{
3543				  off += 1;
3544				  len -= 1;
3545				}
3546			      linebuffer_setlen (&token_name, len);
3547			      strncpy (token_name.buffer,
3548				       newlb.buffer + off, len);
3549			      token_name.buffer[len] = '\0';
3550			      if (defun)
3551				while (--len >= 0)
3552				  if (token_name.buffer[len] == '_')
3553				    token_name.buffer[len] = '-';
3554			      token.named = defun;
3555			    }
3556			  else
3557			    {
3558			      linebuffer_setlen (&token_name, toklen);
3559			      strncpy (token_name.buffer,
3560				       newlb.buffer + tokoff, toklen);
3561			      token_name.buffer[toklen] = '\0';
3562			      /* Name macros and members. */
3563			      token.named = (structdef == stagseen
3564					     || typdef == ttypeseen
3565					     || typdef == tend
3566					     || (funorvar
3567						 && definedef == dignorerest)
3568					     || (funorvar
3569						 && definedef == dnone
3570						 && structdef == snone
3571						 && bracelev > 0));
3572			    }
3573			  token.lineno = lineno;
3574			  token.offset = tokoff;
3575			  token.length = toklen;
3576			  token.line = newlb.buffer;
3577			  token.linepos = newlinepos;
3578			  token.valid = TRUE;
3579
3580			  if (definedef == dnone
3581			      && (fvdef == fvnameseen
3582				  || fvdef == foperator
3583				  || structdef == stagseen
3584				  || typdef == tend
3585				  || typdef == ttypeseen
3586				  || objdef != onone))
3587			    {
3588			      if (current_lb_is_new)
3589				switch_line_buffers ();
3590			    }
3591			  else if (definedef != dnone
3592				   || fvdef == fdefunname
3593				   || instruct)
3594			    make_C_tag (funorvar);
3595			}
3596		      else /* not yacc and consider_token failed */
3597			{
3598			  if (inattribute && fvdef == fignore)
3599			    {
3600			      /* We have just met __attribute__ after a
3601				 function parameter list: do not tag the
3602				 function again. */
3603			      fvdef = fvnone;
3604			    }
3605			}
3606		      midtoken = FALSE;
3607		    }
3608		} /* if (endtoken (c)) */
3609	      else if (intoken (c))
3610		still_in_token:
3611		{
3612		  toklen++;
3613		  continue;
3614		}
3615	    } /* if (midtoken) */
3616	  else if (begtoken (c))
3617	    {
3618	      switch (definedef)
3619		{
3620		case dnone:
3621		  switch (fvdef)
3622		    {
3623		    case fstartlist:
3624		      /* This prevents tagging fb in
3625			 void (__attribute__((noreturn)) *fb) (void);
3626			 Fixing this is not easy and not very important. */
3627		      fvdef = finlist;
3628		      continue;
3629		    case flistseen:
3630		      if (plainc || declarations)
3631			{
3632			  make_C_tag (TRUE); /* a function */
3633			  fvdef = fignore;
3634			}
3635		      break;
3636		    }
3637		  if (structdef == stagseen && !cjava)
3638		    {
3639		      popclass_above (bracelev);
3640		      structdef = snone;
3641		    }
3642		  break;
3643		case dsharpseen:
3644		  savetoken = token;
3645		  break;
3646		}
3647	      if (!yacc_rules || lp == newlb.buffer + 1)
3648		{
3649		  tokoff = lp - 1 - newlb.buffer;
3650		  toklen = 1;
3651		  midtoken = TRUE;
3652		}
3653	      continue;
3654	    } /* if (begtoken) */
3655	} /* if must look at token */
3656
3657
3658      /* Detect end of line, colon, comma, semicolon and various braces
3659	 after having handled a token.*/
3660      switch (c)
3661	{
3662	case ':':
3663	  if (inattribute)
3664	    break;
3665	  if (yacc_rules && token.offset == 0 && token.valid)
3666	    {
3667	      make_C_tag (FALSE); /* a yacc function */
3668	      break;
3669	    }
3670	  if (definedef != dnone)
3671	    break;
3672	  switch (objdef)
3673	    {
3674	    case  otagseen:
3675	      objdef = oignore;
3676	      make_C_tag (TRUE); /* an Objective C class */
3677	      break;
3678	    case omethodtag:
3679	    case omethodparm:
3680	      objdef = omethodcolon;
3681	      linebuffer_setlen (&token_name, token_name.len + 1);
3682	      strcat (token_name.buffer, ":");
3683	      break;
3684	    }
3685	  if (structdef == stagseen)
3686	    {
3687	      structdef = scolonseen;
3688	      break;
3689	    }
3690	  /* Should be useless, but may be work as a safety net. */
3691	  if (cplpl && fvdef == flistseen)
3692	    {
3693	      make_C_tag (TRUE); /* a function */
3694	      fvdef = fignore;
3695	      break;
3696	    }
3697	  break;
3698	case ';':
3699	  if (definedef != dnone || inattribute)
3700	    break;
3701	  switch (typdef)
3702	    {
3703	    case tend:
3704	    case ttypeseen:
3705	      make_C_tag (FALSE); /* a typedef */
3706	      typdef = tnone;
3707	      fvdef = fvnone;
3708	      break;
3709	    case tnone:
3710	    case tinbody:
3711	    case tignore:
3712	      switch (fvdef)
3713		{
3714		case fignore:
3715		  if (typdef == tignore || cplpl)
3716		    fvdef = fvnone;
3717		  break;
3718		case fvnameseen:
3719		  if ((globals && bracelev == 0 && (!fvextern || declarations))
3720		      || (members && instruct))
3721		    make_C_tag (FALSE); /* a variable */
3722		  fvextern = FALSE;
3723		  fvdef = fvnone;
3724		  token.valid = FALSE;
3725		  break;
3726		case flistseen:
3727		  if ((declarations
3728		       && (cplpl || !instruct)
3729		       && (typdef == tnone || (typdef != tignore && instruct)))
3730		      || (members
3731			  && plainc && instruct))
3732		    make_C_tag (TRUE);  /* a function */
3733		  /* FALLTHRU */
3734		default:
3735		  fvextern = FALSE;
3736		  fvdef = fvnone;
3737		  if (declarations
3738		       && cplpl && structdef == stagseen)
3739		    make_C_tag (FALSE);	/* forward declaration */
3740		  else
3741		    token.valid = FALSE;
3742		} /* switch (fvdef) */
3743	      /* FALLTHRU */
3744	    default:
3745	      if (!instruct)
3746		typdef = tnone;
3747	    }
3748	  if (structdef == stagseen)
3749	    structdef = snone;
3750	  break;
3751	case ',':
3752	  if (definedef != dnone || inattribute)
3753	    break;
3754	  switch (objdef)
3755	    {
3756	    case omethodtag:
3757	    case omethodparm:
3758	      make_C_tag (TRUE); /* an Objective C method */
3759	      objdef = oinbody;
3760	      break;
3761	    }
3762	  switch (fvdef)
3763	    {
3764	    case fdefunkey:
3765	    case foperator:
3766	    case fstartlist:
3767	    case finlist:
3768	    case fignore:
3769	    case vignore:
3770	      break;
3771	    case fdefunname:
3772	      fvdef = fignore;
3773	      break;
3774	    case fvnameseen:
3775	      if (parlev == 0
3776		  && ((globals
3777		       && bracelev == 0
3778		       && templatelev == 0
3779		       && (!fvextern || declarations))
3780		      || (members && instruct)))
3781		  make_C_tag (FALSE); /* a variable */
3782	      break;
3783	    case flistseen:
3784	      if ((declarations && typdef == tnone && !instruct)
3785		  || (members && typdef != tignore && instruct))
3786		{
3787		  make_C_tag (TRUE); /* a function */
3788		  fvdef = fvnameseen;
3789		}
3790	      else if (!declarations)
3791		fvdef = fvnone;
3792	      token.valid = FALSE;
3793	      break;
3794	    default:
3795	      fvdef = fvnone;
3796	    }
3797	  if (structdef == stagseen)
3798	    structdef = snone;
3799	  break;
3800	case ']':
3801	  if (definedef != dnone || inattribute)
3802	    break;
3803	  if (structdef == stagseen)
3804	    structdef = snone;
3805	  switch (typdef)
3806	    {
3807	    case ttypeseen:
3808	    case tend:
3809	      typdef = tignore;
3810	      make_C_tag (FALSE);	/* a typedef */
3811	      break;
3812	    case tnone:
3813	    case tinbody:
3814	      switch (fvdef)
3815		{
3816		case foperator:
3817		case finlist:
3818		case fignore:
3819		case vignore:
3820		  break;
3821		case fvnameseen:
3822		  if ((members && bracelev == 1)
3823		      || (globals && bracelev == 0
3824			  && (!fvextern || declarations)))
3825		    make_C_tag (FALSE); /* a variable */
3826		  /* FALLTHRU */
3827		default:
3828		  fvdef = fvnone;
3829		}
3830	      break;
3831	    }
3832	  break;
3833	case '(':
3834	  if (inattribute)
3835	    {
3836	      attrparlev++;
3837	      break;
3838	    }
3839	  if (definedef != dnone)
3840	    break;
3841	  if (objdef == otagseen && parlev == 0)
3842	    objdef = oparenseen;
3843	  switch (fvdef)
3844	    {
3845	    case fvnameseen:
3846	      if (typdef == ttypeseen
3847		  && *lp != '*'
3848		  && !instruct)
3849		{
3850		  /* This handles constructs like:
3851		     typedef void OperatorFun (int fun); */
3852		  make_C_tag (FALSE);
3853		  typdef = tignore;
3854		  fvdef = fignore;
3855		  break;
3856		}
3857	      /* FALLTHRU */
3858	    case foperator:
3859	      fvdef = fstartlist;
3860	      break;
3861	    case flistseen:
3862	      fvdef = finlist;
3863	      break;
3864	    }
3865	  parlev++;
3866	  break;
3867	case ')':
3868	  if (inattribute)
3869	    {
3870	      if (--attrparlev == 0)
3871		inattribute = FALSE;
3872	      break;
3873	    }
3874	  if (definedef != dnone)
3875	    break;
3876	  if (objdef == ocatseen && parlev == 1)
3877	    {
3878	      make_C_tag (TRUE); /* an Objective C category */
3879	      objdef = oignore;
3880	    }
3881	  if (--parlev == 0)
3882	    {
3883	      switch (fvdef)
3884		{
3885		case fstartlist:
3886		case finlist:
3887		  fvdef = flistseen;
3888		  break;
3889		}
3890	      if (!instruct
3891		  && (typdef == tend
3892		      || typdef == ttypeseen))
3893		{
3894		  typdef = tignore;
3895		  make_C_tag (FALSE); /* a typedef */
3896		}
3897	    }
3898	  else if (parlev < 0)	/* can happen due to ill-conceived #if's. */
3899	    parlev = 0;
3900	  break;
3901	case '{':
3902	  if (definedef != dnone)
3903	    break;
3904	  if (typdef == ttypeseen)
3905	    {
3906	      /* Whenever typdef is set to tinbody (currently only
3907		 here), typdefbracelev should be set to bracelev. */
3908	      typdef = tinbody;
3909	      typdefbracelev = bracelev;
3910	    }
3911	  switch (fvdef)
3912	    {
3913	    case flistseen:
3914	      make_C_tag (TRUE);    /* a function */
3915	      /* FALLTHRU */
3916	    case fignore:
3917	      fvdef = fvnone;
3918	      break;
3919	    case fvnone:
3920	      switch (objdef)
3921		{
3922		case otagseen:
3923		  make_C_tag (TRUE); /* an Objective C class */
3924		  objdef = oignore;
3925		  break;
3926		case omethodtag:
3927		case omethodparm:
3928		  make_C_tag (TRUE); /* an Objective C method */
3929		  objdef = oinbody;
3930		  break;
3931		default:
3932		  /* Neutralize `extern "C" {' grot. */
3933		  if (bracelev == 0 && structdef == snone && nestlev == 0
3934		      && typdef == tnone)
3935		    bracelev = -1;
3936		}
3937	      break;
3938	    }
3939	  switch (structdef)
3940	    {
3941	    case skeyseen:	   /* unnamed struct */
3942	      pushclass_above (bracelev, NULL, 0);
3943	      structdef = snone;
3944	      break;
3945	    case stagseen:	   /* named struct or enum */
3946	    case scolonseen:	   /* a class */
3947	      pushclass_above (bracelev,token.line+token.offset, token.length);
3948	      structdef = snone;
3949	      make_C_tag (FALSE);  /* a struct or enum */
3950	      break;
3951	    }
3952	  bracelev++;
3953	  break;
3954	case '*':
3955	  if (definedef != dnone)
3956	    break;
3957	  if (fvdef == fstartlist)
3958	    {
3959	      fvdef = fvnone;	/* avoid tagging `foo' in `foo (*bar()) ()' */
3960	      token.valid = FALSE;
3961	    }
3962	  break;
3963	case '}':
3964	  if (definedef != dnone)
3965	    break;
3966	  if (!ignoreindent && lp == newlb.buffer + 1)
3967	    {
3968	      if (bracelev != 0)
3969		token.valid = FALSE;
3970	      bracelev = 0;	/* reset brace level if first column */
3971	      parlev = 0;	/* also reset paren level, just in case... */
3972	    }
3973	  else if (bracelev > 0)
3974	    bracelev--;
3975	  else
3976	    token.valid = FALSE; /* something gone amiss, token unreliable */
3977	  popclass_above (bracelev);
3978	  structdef = snone;
3979	  /* Only if typdef == tinbody is typdefbracelev significant. */
3980	  if (typdef == tinbody && bracelev <= typdefbracelev)
3981	    {
3982	      assert (bracelev == typdefbracelev);
3983	      typdef = tend;
3984	    }
3985	  break;
3986	case '=':
3987	  if (definedef != dnone)
3988	    break;
3989	  switch (fvdef)
3990	    {
3991	    case foperator:
3992	    case finlist:
3993	    case fignore:
3994	    case vignore:
3995	      break;
3996	    case fvnameseen:
3997	      if ((members && bracelev == 1)
3998		  || (globals && bracelev == 0 && (!fvextern || declarations)))
3999		make_C_tag (FALSE); /* a variable */
4000	      /* FALLTHRU */
4001	    default:
4002	      fvdef = vignore;
4003	    }
4004	  break;
4005	case '<':
4006	  if (cplpl
4007	      && (structdef == stagseen || fvdef == fvnameseen))
4008	    {
4009	      templatelev++;
4010	      break;
4011	    }
4012	  goto resetfvdef;
4013	case '>':
4014	  if (templatelev > 0)
4015	    {
4016	      templatelev--;
4017	      break;
4018	    }
4019	  goto resetfvdef;
4020	case '+':
4021	case '-':
4022	  if (objdef == oinbody && bracelev == 0)
4023	    {
4024	      objdef = omethodsign;
4025	      break;
4026	    }
4027	  /* FALLTHRU */
4028	resetfvdef:
4029	case '#': case '~': case '&': case '%': case '/':
4030	case '|': case '^': case '!': case '.': case '?':
4031	  if (definedef != dnone)
4032	    break;
4033	  /* These surely cannot follow a function tag in C. */
4034	  switch (fvdef)
4035	    {
4036	    case foperator:
4037	    case finlist:
4038	    case fignore:
4039	    case vignore:
4040	      break;
4041	    default:
4042	      fvdef = fvnone;
4043	    }
4044	  break;
4045	case '\0':
4046	  if (objdef == otagseen)
4047	    {
4048	      make_C_tag (TRUE); /* an Objective C class */
4049	      objdef = oignore;
4050	    }
4051	  /* If a macro spans multiple lines don't reset its state. */
4052	  if (quotednl)
4053	    CNL_SAVE_DEFINEDEF ();
4054	  else
4055	    CNL ();
4056	  break;
4057	} /* switch (c) */
4058
4059    } /* while not eof */
4060
4061  free (lbs[0].lb.buffer);
4062  free (lbs[1].lb.buffer);
4063}
4064
4065/*
4066 * Process either a C++ file or a C file depending on the setting
4067 * of a global flag.
4068 */
4069static void
4070default_C_entries (inf)
4071     FILE *inf;
4072{
4073  C_entries (cplusplus ? C_PLPL : C_AUTO, inf);
4074}
4075
4076/* Always do plain C. */
4077static void
4078plain_C_entries (inf)
4079     FILE *inf;
4080{
4081  C_entries (0, inf);
4082}
4083
4084/* Always do C++. */
4085static void
4086Cplusplus_entries (inf)
4087     FILE *inf;
4088{
4089  C_entries (C_PLPL, inf);
4090}
4091
4092/* Always do Java. */
4093static void
4094Cjava_entries (inf)
4095     FILE *inf;
4096{
4097  C_entries (C_JAVA, inf);
4098}
4099
4100/* Always do C*. */
4101static void
4102Cstar_entries (inf)
4103     FILE *inf;
4104{
4105  C_entries (C_STAR, inf);
4106}
4107
4108/* Always do Yacc. */
4109static void
4110Yacc_entries (inf)
4111     FILE *inf;
4112{
4113  C_entries (YACC, inf);
4114}
4115
4116
4117/* Useful macros. */
4118#define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer)	\
4119  for (;			/* loop initialization */		\
4120       !feof (file_pointer)	/* loop test */				\
4121       &&			/* instructions at start of loop */	\
4122	  (readline (&line_buffer, file_pointer),			\
4123           char_pointer = line_buffer.buffer,				\
4124	   TRUE);							\
4125      )
4126
4127#define LOOKING_AT(cp, kw)  /* kw is the keyword, a literal string */	\
4128  ((assert("" kw), TRUE)   /* syntax error if not a literal string */	\
4129   && strneq ((cp), kw, sizeof(kw)-1)		/* cp points at kw */	\
4130   && notinname ((cp)[sizeof(kw)-1])		/* end of kw */		\
4131   && ((cp) = skip_spaces((cp)+sizeof(kw)-1)))	/* skip spaces */
4132
4133/* Similar to LOOKING_AT but does not use notinname, does not skip */
4134#define LOOKING_AT_NOCASE(cp, kw) /* the keyword is a literal string */	\
4135  ((assert("" kw), TRUE)     /* syntax error if not a literal string */	\
4136   && strncaseeq ((cp), kw, sizeof(kw)-1)	/* cp points at kw */	\
4137   && ((cp) += sizeof(kw)-1))			/* skip spaces */
4138
4139/*
4140 * Read a file, but do no processing.  This is used to do regexp
4141 * matching on files that have no language defined.
4142 */
4143static void
4144just_read_file (inf)
4145     FILE *inf;
4146{
4147  register char *dummy;
4148
4149  LOOP_ON_INPUT_LINES (inf, lb, dummy)
4150    continue;
4151}
4152
4153
4154/* Fortran parsing */
4155
4156static void F_takeprec __P((void));
4157static void F_getit __P((FILE *));
4158
4159static void
4160F_takeprec ()
4161{
4162  dbp = skip_spaces (dbp);
4163  if (*dbp != '*')
4164    return;
4165  dbp++;
4166  dbp = skip_spaces (dbp);
4167  if (strneq (dbp, "(*)", 3))
4168    {
4169      dbp += 3;
4170      return;
4171    }
4172  if (!ISDIGIT (*dbp))
4173    {
4174      --dbp;			/* force failure */
4175      return;
4176    }
4177  do
4178    dbp++;
4179  while (ISDIGIT (*dbp));
4180}
4181
4182static void
4183F_getit (inf)
4184     FILE *inf;
4185{
4186  register char *cp;
4187
4188  dbp = skip_spaces (dbp);
4189  if (*dbp == '\0')
4190    {
4191      readline (&lb, inf);
4192      dbp = lb.buffer;
4193      if (dbp[5] != '&')
4194	return;
4195      dbp += 6;
4196      dbp = skip_spaces (dbp);
4197    }
4198  if (!ISALPHA (*dbp) && *dbp != '_' && *dbp != '$')
4199    return;
4200  for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
4201    continue;
4202  make_tag (dbp, cp-dbp, TRUE,
4203	    lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4204}
4205
4206
4207static void
4208Fortran_functions (inf)
4209     FILE *inf;
4210{
4211  LOOP_ON_INPUT_LINES (inf, lb, dbp)
4212    {
4213      if (*dbp == '%')
4214	dbp++;			/* Ratfor escape to fortran */
4215      dbp = skip_spaces (dbp);
4216      if (*dbp == '\0')
4217	continue;
4218      switch (lowcase (*dbp))
4219	{
4220	case 'i':
4221	  if (nocase_tail ("integer"))
4222	    F_takeprec ();
4223	  break;
4224	case 'r':
4225	  if (nocase_tail ("real"))
4226	    F_takeprec ();
4227	  break;
4228	case 'l':
4229	  if (nocase_tail ("logical"))
4230	    F_takeprec ();
4231	  break;
4232	case 'c':
4233	  if (nocase_tail ("complex") || nocase_tail ("character"))
4234	    F_takeprec ();
4235	  break;
4236	case 'd':
4237	  if (nocase_tail ("double"))
4238	    {
4239	      dbp = skip_spaces (dbp);
4240	      if (*dbp == '\0')
4241		continue;
4242	      if (nocase_tail ("precision"))
4243		break;
4244	      continue;
4245	    }
4246	  break;
4247	}
4248      dbp = skip_spaces (dbp);
4249      if (*dbp == '\0')
4250	continue;
4251      switch (lowcase (*dbp))
4252	{
4253	case 'f':
4254	  if (nocase_tail ("function"))
4255	    F_getit (inf);
4256	  continue;
4257	case 's':
4258	  if (nocase_tail ("subroutine"))
4259	    F_getit (inf);
4260	  continue;
4261	case 'e':
4262	  if (nocase_tail ("entry"))
4263	    F_getit (inf);
4264	  continue;
4265	case 'b':
4266	  if (nocase_tail ("blockdata") || nocase_tail ("block data"))
4267	    {
4268	      dbp = skip_spaces (dbp);
4269	      if (*dbp == '\0')	/* assume un-named */
4270		make_tag ("blockdata", 9, TRUE,
4271			  lb.buffer, dbp - lb.buffer, lineno, linecharno);
4272	      else
4273		F_getit (inf);	/* look for name */
4274	    }
4275	  continue;
4276	}
4277    }
4278}
4279
4280
4281/*
4282 * Ada parsing
4283 * Original code by
4284 * Philippe Waroquiers (1998)
4285 */
4286
4287static void Ada_getit __P((FILE *, char *));
4288
4289/* Once we are positioned after an "interesting" keyword, let's get
4290   the real tag value necessary. */
4291static void
4292Ada_getit (inf, name_qualifier)
4293     FILE *inf;
4294     char *name_qualifier;
4295{
4296  register char *cp;
4297  char *name;
4298  char c;
4299
4300  while (!feof (inf))
4301    {
4302      dbp = skip_spaces (dbp);
4303      if (*dbp == '\0'
4304	  || (dbp[0] == '-' && dbp[1] == '-'))
4305	{
4306	  readline (&lb, inf);
4307	  dbp = lb.buffer;
4308	}
4309      switch (lowcase(*dbp))
4310        {
4311        case 'b':
4312          if (nocase_tail ("body"))
4313            {
4314              /* Skipping body of   procedure body   or   package body or ....
4315		 resetting qualifier to body instead of spec. */
4316              name_qualifier = "/b";
4317              continue;
4318            }
4319          break;
4320        case 't':
4321          /* Skipping type of   task type   or   protected type ... */
4322          if (nocase_tail ("type"))
4323            continue;
4324          break;
4325        }
4326      if (*dbp == '"')
4327	{
4328	  dbp += 1;
4329	  for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
4330	    continue;
4331	}
4332      else
4333	{
4334	  dbp = skip_spaces (dbp);
4335	  for (cp = dbp;
4336	       (*cp != '\0'
4337		&& (ISALPHA (*cp) || ISDIGIT (*cp) || *cp == '_' || *cp == '.'));
4338	       cp++)
4339	    continue;
4340	  if (cp == dbp)
4341	    return;
4342	}
4343      c = *cp;
4344      *cp = '\0';
4345      name = concat (dbp, name_qualifier, "");
4346      *cp = c;
4347      make_tag (name, strlen (name), TRUE,
4348		lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4349      free (name);
4350      if (c == '"')
4351	dbp = cp + 1;
4352      return;
4353    }
4354}
4355
4356static void
4357Ada_funcs (inf)
4358     FILE *inf;
4359{
4360  bool inquote = FALSE;
4361  bool skip_till_semicolumn = FALSE;
4362
4363  LOOP_ON_INPUT_LINES (inf, lb, dbp)
4364    {
4365      while (*dbp != '\0')
4366	{
4367	  /* Skip a string i.e. "abcd". */
4368	  if (inquote || (*dbp == '"'))
4369	    {
4370	      dbp = etags_strchr ((inquote) ? dbp : dbp+1, '"');
4371	      if (dbp != NULL)
4372		{
4373		  inquote = FALSE;
4374		  dbp += 1;
4375		  continue;	/* advance char */
4376		}
4377	      else
4378		{
4379		  inquote = TRUE;
4380		  break;	/* advance line */
4381		}
4382	    }
4383
4384	  /* Skip comments. */
4385	  if (dbp[0] == '-' && dbp[1] == '-')
4386	    break;		/* advance line */
4387
4388	  /* Skip character enclosed in single quote i.e. 'a'
4389	     and skip single quote starting an attribute i.e. 'Image. */
4390	  if (*dbp == '\'')
4391	    {
4392	      dbp++ ;
4393	      if (*dbp != '\0')
4394		dbp++;
4395	      continue;
4396	    }
4397
4398	  if (skip_till_semicolumn)
4399	    {
4400	      if (*dbp == ';')
4401		skip_till_semicolumn = FALSE;
4402	      dbp++;
4403	      continue;         /* advance char */
4404	    }
4405
4406	  /* Search for beginning of a token.  */
4407	  if (!begtoken (*dbp))
4408	    {
4409	      dbp++;
4410	      continue;		/* advance char */
4411	    }
4412
4413	  /* We are at the beginning of a token. */
4414	  switch (lowcase(*dbp))
4415	    {
4416	    case 'f':
4417	      if (!packages_only && nocase_tail ("function"))
4418		Ada_getit (inf, "/f");
4419	      else
4420		break;		/* from switch */
4421	      continue;		/* advance char */
4422	    case 'p':
4423	      if (!packages_only && nocase_tail ("procedure"))
4424		Ada_getit (inf, "/p");
4425	      else if (nocase_tail ("package"))
4426		Ada_getit (inf, "/s");
4427	      else if (nocase_tail ("protected")) /* protected type */
4428		Ada_getit (inf, "/t");
4429	      else
4430		break;		/* from switch */
4431	      continue;		/* advance char */
4432
4433	    case 'u':
4434	      if (typedefs && !packages_only && nocase_tail ("use"))
4435		{
4436		  /* when tagging types, avoid tagging  use type Pack.Typename;
4437		     for this, we will skip everything till a ; */
4438		  skip_till_semicolumn = TRUE;
4439		  continue;     /* advance char */
4440		}
4441
4442	    case 't':
4443	      if (!packages_only && nocase_tail ("task"))
4444		Ada_getit (inf, "/k");
4445	      else if (typedefs && !packages_only && nocase_tail ("type"))
4446		{
4447		  Ada_getit (inf, "/t");
4448		  while (*dbp != '\0')
4449		    dbp += 1;
4450		}
4451	      else
4452		break;		/* from switch */
4453	      continue;		/* advance char */
4454	    }
4455
4456	  /* Look for the end of the token. */
4457	  while (!endtoken (*dbp))
4458	    dbp++;
4459
4460	} /* advance char */
4461    } /* advance line */
4462}
4463
4464
4465/*
4466 * Unix and microcontroller assembly tag handling
4467 * Labels:  /^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]/
4468 * Idea by Bob Weiner, Motorola Inc. (1994)
4469 */
4470static void
4471Asm_labels (inf)
4472     FILE *inf;
4473{
4474  register char *cp;
4475
4476  LOOP_ON_INPUT_LINES (inf, lb, cp)
4477    {
4478      /* If first char is alphabetic or one of [_.$], test for colon
4479	 following identifier. */
4480      if (ISALPHA (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4481 	{
4482 	  /* Read past label. */
4483	  cp++;
4484 	  while (ISALNUM (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4485 	    cp++;
4486 	  if (*cp == ':' || iswhite (*cp))
4487	    /* Found end of label, so copy it and add it to the table. */
4488	    make_tag (lb.buffer, cp - lb.buffer, TRUE,
4489		      lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4490 	}
4491    }
4492}
4493
4494
4495/*
4496 * Perl support
4497 * Perl sub names: /^sub[ \t\n]+[^ \t\n{]+/
4498 * Perl variable names: /^(my|local).../
4499 * Original code by Bart Robinson <lomew@cs.utah.edu> (1995)
4500 * Additions by Michael Ernst <mernst@alum.mit.edu> (1997)
4501 * Ideas by Kai Gro�johann <Kai.Grossjohann@CS.Uni-Dortmund.DE> (2001)
4502 */
4503static void
4504Perl_functions (inf)
4505     FILE *inf;
4506{
4507  char *package = savestr ("main"); /* current package name */
4508  register char *cp;
4509
4510  LOOP_ON_INPUT_LINES (inf, lb, cp)
4511    {
4512      skip_spaces(cp);
4513
4514      if (LOOKING_AT (cp, "package"))
4515	{
4516	  free (package);
4517	  get_tag (cp, &package);
4518	}
4519      else if (LOOKING_AT (cp, "sub"))
4520	{
4521	  char *pos;
4522	  char *sp = cp;
4523
4524	  while (!notinname (*cp))
4525	    cp++;
4526	  if (cp == sp)
4527	    continue;		/* nothing found */
4528	  if ((pos = etags_strchr (sp, ':')) != NULL
4529	      && pos < cp && pos[1] == ':')
4530	    /* The name is already qualified. */
4531	    make_tag (sp, cp - sp, TRUE,
4532		      lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4533	  else
4534	    /* Qualify it. */
4535	    {
4536	      char savechar, *name;
4537
4538	      savechar = *cp;
4539	      *cp = '\0';
4540	      name = concat (package, "::", sp);
4541	      *cp = savechar;
4542	      make_tag (name, strlen(name), TRUE,
4543			lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4544	      free (name);
4545	    }
4546 	}
4547       else if (globals)	/* only if we are tagging global vars */
4548 	{
4549	  /* Skip a qualifier, if any. */
4550	  bool qual = LOOKING_AT (cp, "my") || LOOKING_AT (cp, "local");
4551 	  /* After "my" or "local", but before any following paren or space. */
4552	  char *varstart = cp;
4553
4554 	  if (qual		/* should this be removed?  If yes, how? */
4555	      && (*cp == '$' || *cp == '@' || *cp == '%'))
4556 	    {
4557 	      varstart += 1;
4558	      do
4559 		cp++;
4560 	      while (ISALNUM (*cp) || *cp == '_');
4561 	    }
4562 	  else if (qual)
4563 	    {
4564 	      /* Should be examining a variable list at this point;
4565 		 could insist on seeing an open parenthesis. */
4566 	      while (*cp != '\0' && *cp != ';' && *cp != '=' &&  *cp != ')')
4567 		cp++;
4568 	    }
4569	  else
4570	    continue;
4571
4572 	  make_tag (varstart, cp - varstart, FALSE,
4573		    lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4574	}
4575    }
4576  free (package);
4577}
4578
4579
4580/*
4581 * Python support
4582 * Look for /^[\t]*def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4583 * Idea by Eric S. Raymond <esr@thyrsus.com> (1997)
4584 * More ideas by seb bacon <seb@jamkit.com> (2002)
4585 */
4586static void
4587Python_functions (inf)
4588     FILE *inf;
4589{
4590  register char *cp;
4591
4592  LOOP_ON_INPUT_LINES (inf, lb, cp)
4593    {
4594      cp = skip_spaces (cp);
4595      if (LOOKING_AT (cp, "def") || LOOKING_AT (cp, "class"))
4596	{
4597	  char *name = cp;
4598	  while (!notinname (*cp) && *cp != ':')
4599	    cp++;
4600	  make_tag (name, cp - name, TRUE,
4601		    lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4602	}
4603    }
4604}
4605
4606
4607/*
4608 * PHP support
4609 * Look for:
4610 *  - /^[ \t]*function[ \t\n]+[^ \t\n(]+/
4611 *  - /^[ \t]*class[ \t\n]+[^ \t\n]+/
4612 *  - /^[ \t]*define\(\"[^\"]+/
4613 * Only with --members:
4614 *  - /^[ \t]*var[ \t\n]+\$[^ \t\n=;]/
4615 * Idea by Diez B. Roggisch (2001)
4616 */
4617static void
4618PHP_functions (inf)
4619     FILE *inf;
4620{
4621  register char *cp, *name;
4622  bool search_identifier = FALSE;
4623
4624  LOOP_ON_INPUT_LINES (inf, lb, cp)
4625    {
4626      cp = skip_spaces (cp);
4627      name = cp;
4628      if (search_identifier
4629	  && *cp != '\0')
4630	{
4631	  while (!notinname (*cp))
4632	    cp++;
4633	  make_tag (name, cp - name, TRUE,
4634		    lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4635	  search_identifier = FALSE;
4636	}
4637      else if (LOOKING_AT (cp, "function"))
4638	{
4639	  if(*cp == '&')
4640	    cp = skip_spaces (cp+1);
4641	  if(*cp != '\0')
4642	    {
4643	      name = cp;
4644	      while (!notinname (*cp))
4645		cp++;
4646	      make_tag (name, cp - name, TRUE,
4647			lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4648	    }
4649	  else
4650	    search_identifier = TRUE;
4651	}
4652      else if (LOOKING_AT (cp, "class"))
4653	{
4654	  if (*cp != '\0')
4655	    {
4656	      name = cp;
4657	      while (*cp != '\0' && !iswhite (*cp))
4658		cp++;
4659	      make_tag (name, cp - name, FALSE,
4660			lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4661	    }
4662	  else
4663	    search_identifier = TRUE;
4664	}
4665      else if (strneq (cp, "define", 6)
4666	       && (cp = skip_spaces (cp+6))
4667	       && *cp++ == '('
4668	       && (*cp == '"' || *cp == '\''))
4669	{
4670	  char quote = *cp++;
4671	  name = cp;
4672	  while (*cp != quote && *cp != '\0')
4673	    cp++;
4674	  make_tag (name, cp - name, FALSE,
4675		    lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4676	}
4677      else if (members
4678	       && LOOKING_AT (cp, "var")
4679	       && *cp == '$')
4680	{
4681	  name = cp;
4682	  while (!notinname(*cp))
4683	    cp++;
4684	  make_tag (name, cp - name, FALSE,
4685		    lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4686	}
4687    }
4688}
4689
4690
4691/*
4692 * Cobol tag functions
4693 * We could look for anything that could be a paragraph name.
4694 * i.e. anything that starts in column 8 is one word and ends in a full stop.
4695 * Idea by Corny de Souza (1993)
4696 */
4697static void
4698Cobol_paragraphs (inf)
4699     FILE *inf;
4700{
4701  register char *bp, *ep;
4702
4703  LOOP_ON_INPUT_LINES (inf, lb, bp)
4704    {
4705      if (lb.len < 9)
4706	continue;
4707      bp += 8;
4708
4709      /* If eoln, compiler option or comment ignore whole line. */
4710      if (bp[-1] != ' ' || !ISALNUM (bp[0]))
4711        continue;
4712
4713      for (ep = bp; ISALNUM (*ep) || *ep == '-'; ep++)
4714	continue;
4715      if (*ep++ == '.')
4716	make_tag (bp, ep - bp, TRUE,
4717		  lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4718    }
4719}
4720
4721
4722/*
4723 * Makefile support
4724 * Ideas by Assar Westerlund <assar@sics.se> (2001)
4725 */
4726static void
4727Makefile_targets (inf)
4728     FILE *inf;
4729{
4730  register char *bp;
4731
4732  LOOP_ON_INPUT_LINES (inf, lb, bp)
4733    {
4734      if (*bp == '\t' || *bp == '#')
4735	continue;
4736      while (*bp != '\0' && *bp != '=' && *bp != ':')
4737	bp++;
4738      if (*bp == ':' || (globals && *bp == '='))
4739	{
4740	  /* We should detect if there is more than one tag, but we do not.
4741	     We just skip initial and final spaces. */
4742	  char * namestart = skip_spaces (lb.buffer);
4743	  while (--bp > namestart)
4744	    if (!notinname (*bp))
4745	      break;
4746	  make_tag (namestart, bp - namestart + 1, TRUE,
4747		    lb.buffer, bp - lb.buffer + 2, lineno, linecharno);
4748	}
4749    }
4750}
4751
4752
4753/*
4754 * Pascal parsing
4755 * Original code by Mosur K. Mohan (1989)
4756 *
4757 *  Locates tags for procedures & functions.  Doesn't do any type- or
4758 *  var-definitions.  It does look for the keyword "extern" or
4759 *  "forward" immediately following the procedure statement; if found,
4760 *  the tag is skipped.
4761 */
4762static void
4763Pascal_functions (inf)
4764     FILE *inf;
4765{
4766  linebuffer tline;		/* mostly copied from C_entries */
4767  long save_lcno;
4768  int save_lineno, namelen, taglen;
4769  char c, *name;
4770
4771  bool				/* each of these flags is TRUE iff: */
4772    incomment,			/* point is inside a comment */
4773    inquote,			/* point is inside '..' string */
4774    get_tagname,		/* point is after PROCEDURE/FUNCTION
4775				   keyword, so next item = potential tag */
4776    found_tag,			/* point is after a potential tag */
4777    inparms,			/* point is within parameter-list */
4778    verify_tag;			/* point has passed the parm-list, so the
4779				   next token will determine whether this
4780				   is a FORWARD/EXTERN to be ignored, or
4781				   whether it is a real tag */
4782
4783  save_lcno = save_lineno = namelen = taglen = 0; /* keep compiler quiet */
4784  name = NULL;			/* keep compiler quiet */
4785  dbp = lb.buffer;
4786  *dbp = '\0';
4787  linebuffer_init (&tline);
4788
4789  incomment = inquote = FALSE;
4790  found_tag = FALSE;		/* have a proc name; check if extern */
4791  get_tagname = FALSE;		/* found "procedure" keyword	     */
4792  inparms = FALSE;		/* found '(' after "proc"            */
4793  verify_tag = FALSE;		/* check if "extern" is ahead        */
4794
4795
4796  while (!feof (inf))		/* long main loop to get next char */
4797    {
4798      c = *dbp++;
4799      if (c == '\0')		/* if end of line */
4800	{
4801	  readline (&lb, inf);
4802	  dbp = lb.buffer;
4803	  if (*dbp == '\0')
4804	    continue;
4805	  if (!((found_tag && verify_tag)
4806		|| get_tagname))
4807	    c = *dbp++;		/* only if don't need *dbp pointing
4808				   to the beginning of the name of
4809				   the procedure or function */
4810	}
4811      if (incomment)
4812	{
4813	  if (c == '}')		/* within { } comments */
4814	    incomment = FALSE;
4815	  else if (c == '*' && *dbp == ')') /* within (* *) comments */
4816	    {
4817	      dbp++;
4818	      incomment = FALSE;
4819	    }
4820	  continue;
4821	}
4822      else if (inquote)
4823	{
4824	  if (c == '\'')
4825	    inquote = FALSE;
4826	  continue;
4827	}
4828      else
4829	switch (c)
4830	  {
4831	  case '\'':
4832	    inquote = TRUE;	/* found first quote */
4833	    continue;
4834	  case '{':		/* found open { comment */
4835	    incomment = TRUE;
4836	    continue;
4837	  case '(':
4838	    if (*dbp == '*')	/* found open (* comment */
4839	      {
4840		incomment = TRUE;
4841		dbp++;
4842	      }
4843	    else if (found_tag)	/* found '(' after tag, i.e., parm-list */
4844	      inparms = TRUE;
4845	    continue;
4846	  case ')':		/* end of parms list */
4847	    if (inparms)
4848	      inparms = FALSE;
4849	    continue;
4850	  case ';':
4851	    if (found_tag && !inparms) /* end of proc or fn stmt */
4852	      {
4853		verify_tag = TRUE;
4854		break;
4855	      }
4856	    continue;
4857	  }
4858      if (found_tag && verify_tag && (*dbp != ' '))
4859	{
4860	  /* Check if this is an "extern" declaration. */
4861	  if (*dbp == '\0')
4862	    continue;
4863	  if (lowcase (*dbp == 'e'))
4864	    {
4865	      if (nocase_tail ("extern")) /* superfluous, really! */
4866		{
4867		  found_tag = FALSE;
4868		  verify_tag = FALSE;
4869		}
4870	    }
4871	  else if (lowcase (*dbp) == 'f')
4872	    {
4873	      if (nocase_tail ("forward")) /* check for forward reference */
4874		{
4875		  found_tag = FALSE;
4876		  verify_tag = FALSE;
4877		}
4878	    }
4879	  if (found_tag && verify_tag) /* not external proc, so make tag */
4880	    {
4881	      found_tag = FALSE;
4882	      verify_tag = FALSE;
4883	      make_tag (name, namelen, TRUE,
4884			tline.buffer, taglen, save_lineno, save_lcno);
4885	      continue;
4886	    }
4887	}
4888      if (get_tagname)		/* grab name of proc or fn */
4889	{
4890	  char *cp;
4891
4892	  if (*dbp == '\0')
4893	    continue;
4894
4895	  /* Find block name. */
4896	  for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
4897	    continue;
4898
4899	  /* Save all values for later tagging. */
4900	  linebuffer_setlen (&tline, lb.len);
4901	  strcpy (tline.buffer, lb.buffer);
4902	  save_lineno = lineno;
4903	  save_lcno = linecharno;
4904	  name = tline.buffer + (dbp - lb.buffer);
4905	  namelen = cp - dbp;
4906	  taglen = cp - lb.buffer + 1;
4907
4908	  dbp = cp;		/* set dbp to e-o-token */
4909	  get_tagname = FALSE;
4910	  found_tag = TRUE;
4911	  continue;
4912
4913	  /* And proceed to check for "extern". */
4914	}
4915      else if (!incomment && !inquote && !found_tag)
4916	{
4917	  /* Check for proc/fn keywords. */
4918	  switch (lowcase (c))
4919	    {
4920	    case 'p':
4921	      if (nocase_tail ("rocedure")) /* c = 'p', dbp has advanced */
4922		get_tagname = TRUE;
4923	      continue;
4924	    case 'f':
4925	      if (nocase_tail ("unction"))
4926		get_tagname = TRUE;
4927	      continue;
4928	    }
4929	}
4930    } /* while not eof */
4931
4932  free (tline.buffer);
4933}
4934
4935
4936/*
4937 * Lisp tag functions
4938 *  look for (def or (DEF, quote or QUOTE
4939 */
4940
4941static void L_getit __P((void));
4942
4943static void
4944L_getit ()
4945{
4946  if (*dbp == '\'')		/* Skip prefix quote */
4947    dbp++;
4948  else if (*dbp == '(')
4949  {
4950    dbp++;
4951    /* Try to skip "(quote " */
4952    if (!LOOKING_AT (dbp, "quote") && !LOOKING_AT (dbp, "QUOTE"))
4953      /* Ok, then skip "(" before name in (defstruct (foo)) */
4954      dbp = skip_spaces (dbp);
4955  }
4956  get_tag (dbp, NULL);
4957}
4958
4959static void
4960Lisp_functions (inf)
4961     FILE *inf;
4962{
4963  LOOP_ON_INPUT_LINES (inf, lb, dbp)
4964    {
4965      if (dbp[0] != '(')
4966	continue;
4967
4968      if (strneq (dbp+1, "def", 3) || strneq (dbp+1, "DEF", 3))
4969	{
4970	  dbp = skip_non_spaces (dbp);
4971	  dbp = skip_spaces (dbp);
4972	  L_getit ();
4973	}
4974      else
4975	{
4976	  /* Check for (foo::defmumble name-defined ... */
4977	  do
4978	    dbp++;
4979	  while (!notinname (*dbp) && *dbp != ':');
4980	  if (*dbp == ':')
4981	    {
4982	      do
4983		dbp++;
4984	      while (*dbp == ':');
4985
4986	      if (strneq (dbp, "def", 3) || strneq (dbp, "DEF", 3))
4987		{
4988		  dbp = skip_non_spaces (dbp);
4989		  dbp = skip_spaces (dbp);
4990		  L_getit ();
4991		}
4992	    }
4993	}
4994    }
4995}
4996
4997
4998/*
4999 * Lua script language parsing
5000 * Original code by David A. Capello <dacap@users.sourceforge.net> (2004)
5001 *
5002 *  "function" and "local function" are tags if they start at column 1.
5003 */
5004static void
5005Lua_functions (inf)
5006     FILE *inf;
5007{
5008  register char *bp;
5009
5010  LOOP_ON_INPUT_LINES (inf, lb, bp)
5011    {
5012      if (bp[0] != 'f' && bp[0] != 'l')
5013	continue;
5014
5015      (void)LOOKING_AT (bp, "local"); /* skip possible "local" */
5016
5017      if (LOOKING_AT (bp, "function"))
5018	get_tag (bp, NULL);
5019    }
5020}
5021
5022
5023/*
5024 * Postscript tags
5025 * Just look for lines where the first character is '/'
5026 * Also look at "defineps" for PSWrap
5027 * Ideas by:
5028 *   Richard Mlynarik <mly@adoc.xerox.com> (1997)
5029 *   Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
5030 */
5031static void
5032PS_functions (inf)
5033     FILE *inf;
5034{
5035  register char *bp, *ep;
5036
5037  LOOP_ON_INPUT_LINES (inf, lb, bp)
5038    {
5039      if (bp[0] == '/')
5040	{
5041	  for (ep = bp+1;
5042	       *ep != '\0' && *ep != ' ' && *ep != '{';
5043	       ep++)
5044	    continue;
5045	  make_tag (bp, ep - bp, TRUE,
5046		    lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
5047	}
5048      else if (LOOKING_AT (bp, "defineps"))
5049	get_tag (bp, NULL);
5050    }
5051}
5052
5053
5054/*
5055 * Forth tags
5056 * Ignore anything after \ followed by space or in ( )
5057 * Look for words defined by :
5058 * Look for constant, code, create, defer, value, and variable
5059 * OBP extensions:  Look for buffer:, field,
5060 * Ideas by Eduardo Horvath <eeh@netbsd.org> (2004)
5061 */
5062static void
5063Forth_words (inf)
5064     FILE *inf;
5065{
5066  register char *bp;
5067
5068  LOOP_ON_INPUT_LINES (inf, lb, bp)
5069    while ((bp = skip_spaces (bp))[0] != '\0')
5070      if (bp[0] == '\\' && iswhite(bp[1]))
5071	break;			/* read next line */
5072      else if (bp[0] == '(' && iswhite(bp[1]))
5073	do			/* skip to ) or eol */
5074	  bp++;
5075	while (*bp != ')' && *bp != '\0');
5076      else if ((bp[0] == ':' && iswhite(bp[1]) && bp++)
5077	       || LOOKING_AT_NOCASE (bp, "constant")
5078	       || LOOKING_AT_NOCASE (bp, "code")
5079	       || LOOKING_AT_NOCASE (bp, "create")
5080	       || LOOKING_AT_NOCASE (bp, "defer")
5081	       || LOOKING_AT_NOCASE (bp, "value")
5082	       || LOOKING_AT_NOCASE (bp, "variable")
5083	       || LOOKING_AT_NOCASE (bp, "buffer:")
5084	       || LOOKING_AT_NOCASE (bp, "field"))
5085	get_tag (skip_spaces (bp), NULL); /* Yay!  A definition! */
5086      else
5087	bp = skip_non_spaces (bp);
5088}
5089
5090
5091/*
5092 * Scheme tag functions
5093 * look for (def... xyzzy
5094 *          (def... (xyzzy
5095 *          (def ... ((...(xyzzy ....
5096 *          (set! xyzzy
5097 * Original code by Ken Haase (1985?)
5098 */
5099static void
5100Scheme_functions (inf)
5101     FILE *inf;
5102{
5103  register char *bp;
5104
5105  LOOP_ON_INPUT_LINES (inf, lb, bp)
5106    {
5107      if (strneq (bp, "(def", 4) || strneq (bp, "(DEF", 4))
5108	{
5109	  bp = skip_non_spaces (bp+4);
5110	  /* Skip over open parens and white space */
5111	  while (notinname (*bp))
5112	    bp++;
5113	  get_tag (bp, NULL);
5114	}
5115      if (LOOKING_AT (bp, "(SET!") || LOOKING_AT (bp, "(set!"))
5116	get_tag (bp, NULL);
5117    }
5118}
5119
5120
5121/* Find tags in TeX and LaTeX input files.  */
5122
5123/* TEX_toktab is a table of TeX control sequences that define tags.
5124 * Each entry records one such control sequence.
5125 *
5126 * Original code from who knows whom.
5127 * Ideas by:
5128 *   Stefan Monnier (2002)
5129 */
5130
5131static linebuffer *TEX_toktab = NULL; /* Table with tag tokens */
5132
5133/* Default set of control sequences to put into TEX_toktab.
5134   The value of environment var TEXTAGS is prepended to this.  */
5135static char *TEX_defenv = "\
5136:chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
5137:part:appendix:entry:index:def\
5138:newcommand:renewcommand:newenvironment:renewenvironment";
5139
5140static void TEX_mode __P((FILE *));
5141static void TEX_decode_env __P((char *, char *));
5142
5143static char TEX_esc = '\\';
5144static char TEX_opgrp = '{';
5145static char TEX_clgrp = '}';
5146
5147/*
5148 * TeX/LaTeX scanning loop.
5149 */
5150static void
5151TeX_commands (inf)
5152     FILE *inf;
5153{
5154  char *cp;
5155  linebuffer *key;
5156
5157  /* Select either \ or ! as escape character.  */
5158  TEX_mode (inf);
5159
5160  /* Initialize token table once from environment. */
5161  if (TEX_toktab == NULL)
5162    TEX_decode_env ("TEXTAGS", TEX_defenv);
5163
5164  LOOP_ON_INPUT_LINES (inf, lb, cp)
5165    {
5166      /* Look at each TEX keyword in line. */
5167      for (;;)
5168	{
5169	  /* Look for a TEX escape. */
5170	  while (*cp++ != TEX_esc)
5171	    if (cp[-1] == '\0' || cp[-1] == '%')
5172	      goto tex_next_line;
5173
5174	  for (key = TEX_toktab; key->buffer != NULL; key++)
5175	    if (strneq (cp, key->buffer, key->len))
5176	      {
5177		register char *p;
5178		int namelen, linelen;
5179		bool opgrp = FALSE;
5180
5181		cp = skip_spaces (cp + key->len);
5182		if (*cp == TEX_opgrp)
5183		  {
5184		    opgrp = TRUE;
5185		    cp++;
5186		  }
5187		for (p = cp;
5188		     (!iswhite (*p) && *p != '#' &&
5189		      *p != TEX_opgrp && *p != TEX_clgrp);
5190		     p++)
5191		  continue;
5192		namelen = p - cp;
5193		linelen = lb.len;
5194		if (!opgrp || *p == TEX_clgrp)
5195		  {
5196		    while (*p != '\0' && *p != TEX_opgrp && *p != TEX_clgrp)
5197		      p++;
5198		    linelen = p - lb.buffer + 1;
5199		  }
5200		make_tag (cp, namelen, TRUE,
5201			  lb.buffer, linelen, lineno, linecharno);
5202		goto tex_next_line; /* We only tag a line once */
5203	      }
5204	}
5205    tex_next_line:
5206      ;
5207    }
5208}
5209
5210#define TEX_LESC '\\'
5211#define TEX_SESC '!'
5212
5213/* Figure out whether TeX's escapechar is '\\' or '!' and set grouping
5214   chars accordingly. */
5215static void
5216TEX_mode (inf)
5217     FILE *inf;
5218{
5219  int c;
5220
5221  while ((c = getc (inf)) != EOF)
5222    {
5223      /* Skip to next line if we hit the TeX comment char. */
5224      if (c == '%')
5225	while (c != '\n' && c != EOF)
5226	  c = getc (inf);
5227      else if (c == TEX_LESC || c == TEX_SESC )
5228	break;
5229    }
5230
5231  if (c == TEX_LESC)
5232    {
5233      TEX_esc = TEX_LESC;
5234      TEX_opgrp = '{';
5235      TEX_clgrp = '}';
5236    }
5237  else
5238    {
5239      TEX_esc = TEX_SESC;
5240      TEX_opgrp = '<';
5241      TEX_clgrp = '>';
5242    }
5243  /* If the input file is compressed, inf is a pipe, and rewind may fail.
5244     No attempt is made to correct the situation. */
5245  rewind (inf);
5246}
5247
5248/* Read environment and prepend it to the default string.
5249   Build token table. */
5250static void
5251TEX_decode_env (evarname, defenv)
5252     char *evarname;
5253     char *defenv;
5254{
5255  register char *env, *p;
5256  int i, len;
5257
5258  /* Append default string to environment. */
5259  env = getenv (evarname);
5260  if (!env)
5261    env = defenv;
5262  else
5263    {
5264      char *oldenv = env;
5265      env = concat (oldenv, defenv, "");
5266    }
5267
5268  /* Allocate a token table */
5269  for (len = 1, p = env; p;)
5270    if ((p = etags_strchr (p, ':')) && *++p != '\0')
5271      len++;
5272  TEX_toktab = xnew (len, linebuffer);
5273
5274  /* Unpack environment string into token table. Be careful about */
5275  /* zero-length strings (leading ':', "::" and trailing ':') */
5276  for (i = 0; *env != '\0';)
5277    {
5278      p = etags_strchr (env, ':');
5279      if (!p)			/* End of environment string. */
5280	p = env + strlen (env);
5281      if (p - env > 0)
5282	{			/* Only non-zero strings. */
5283	  TEX_toktab[i].buffer = savenstr (env, p - env);
5284	  TEX_toktab[i].len = p - env;
5285	  i++;
5286	}
5287      if (*p)
5288	env = p + 1;
5289      else
5290	{
5291	  TEX_toktab[i].buffer = NULL; /* Mark end of table. */
5292	  TEX_toktab[i].len = 0;
5293	  break;
5294	}
5295    }
5296}
5297
5298
5299/* Texinfo support.  Dave Love, Mar. 2000.  */
5300static void
5301Texinfo_nodes (inf)
5302     FILE * inf;
5303{
5304  char *cp, *start;
5305  LOOP_ON_INPUT_LINES (inf, lb, cp)
5306    if (LOOKING_AT (cp, "@node"))
5307      {
5308	start = cp;
5309	while (*cp != '\0' && *cp != ',')
5310	  cp++;
5311	make_tag (start, cp - start, TRUE,
5312		  lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5313      }
5314}
5315
5316
5317/*
5318 * HTML support.
5319 * Contents of <title>, <h1>, <h2>, <h3> are tags.
5320 * Contents of <a name=xxx> are tags with name xxx.
5321 *
5322 * Francesco Potort�, 2002.
5323 */
5324static void
5325HTML_labels (inf)
5326     FILE * inf;
5327{
5328  bool getnext = FALSE;		/* next text outside of HTML tags is a tag */
5329  bool skiptag = FALSE;		/* skip to the end of the current HTML tag */
5330  bool intag = FALSE;		/* inside an html tag, looking for ID= */
5331  bool inanchor = FALSE;	/* when INTAG, is an anchor, look for NAME= */
5332  char *end;
5333
5334
5335  linebuffer_setlen (&token_name, 0); /* no name in buffer */
5336
5337  LOOP_ON_INPUT_LINES (inf, lb, dbp)
5338    for (;;)			/* loop on the same line */
5339      {
5340	if (skiptag)		/* skip HTML tag */
5341	  {
5342	    while (*dbp != '\0' && *dbp != '>')
5343	      dbp++;
5344	    if (*dbp == '>')
5345	      {
5346		dbp += 1;
5347		skiptag = FALSE;
5348		continue;	/* look on the same line */
5349	      }
5350	    break;		/* go to next line */
5351	  }
5352
5353	else if (intag)	/* look for "name=" or "id=" */
5354	  {
5355	    while (*dbp != '\0' && *dbp != '>'
5356		   && lowcase (*dbp) != 'n' && lowcase (*dbp) != 'i')
5357	      dbp++;
5358	    if (*dbp == '\0')
5359	      break;		/* go to next line */
5360	    if (*dbp == '>')
5361	      {
5362		dbp += 1;
5363		intag = FALSE;
5364		continue;	/* look on the same line */
5365	      }
5366	    if ((inanchor && LOOKING_AT_NOCASE (dbp, "name="))
5367		|| LOOKING_AT_NOCASE (dbp, "id="))
5368	      {
5369		bool quoted = (dbp[0] == '"');
5370
5371		if (quoted)
5372		  for (end = ++dbp; *end != '\0' && *end != '"'; end++)
5373		    continue;
5374		else
5375		  for (end = dbp; *end != '\0' && intoken (*end); end++)
5376		    continue;
5377		linebuffer_setlen (&token_name, end - dbp);
5378		strncpy (token_name.buffer, dbp, end - dbp);
5379		token_name.buffer[end - dbp] = '\0';
5380
5381		dbp = end;
5382		intag = FALSE;	/* we found what we looked for */
5383		skiptag = TRUE; /* skip to the end of the tag */
5384		getnext = TRUE;	/* then grab the text */
5385		continue;	/* look on the same line */
5386	      }
5387	    dbp += 1;
5388	  }
5389
5390	else if (getnext)	/* grab next tokens and tag them */
5391	  {
5392	    dbp = skip_spaces (dbp);
5393	    if (*dbp == '\0')
5394	      break;		/* go to next line */
5395	    if (*dbp == '<')
5396	      {
5397		intag = TRUE;
5398		inanchor = (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]));
5399		continue;	/* look on the same line */
5400	      }
5401
5402	    for (end = dbp + 1; *end != '\0' && *end != '<'; end++)
5403	      continue;
5404	    make_tag (token_name.buffer, token_name.len, TRUE,
5405		      dbp, end - dbp, lineno, linecharno);
5406	    linebuffer_setlen (&token_name, 0);	/* no name in buffer */
5407	    getnext = FALSE;
5408	    break;		/* go to next line */
5409	  }
5410
5411	else			/* look for an interesting HTML tag */
5412	  {
5413	    while (*dbp != '\0' && *dbp != '<')
5414	      dbp++;
5415	    if (*dbp == '\0')
5416	      break;		/* go to next line */
5417	    intag = TRUE;
5418	    if (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]))
5419	      {
5420		inanchor = TRUE;
5421		continue;	/* look on the same line */
5422	      }
5423	    else if (LOOKING_AT_NOCASE (dbp, "<title>")
5424		     || LOOKING_AT_NOCASE (dbp, "<h1>")
5425		     || LOOKING_AT_NOCASE (dbp, "<h2>")
5426		     || LOOKING_AT_NOCASE (dbp, "<h3>"))
5427	      {
5428		intag = FALSE;
5429		getnext = TRUE;
5430		continue;	/* look on the same line */
5431	      }
5432	    dbp += 1;
5433	  }
5434      }
5435}
5436
5437
5438/*
5439 * Prolog support
5440 *
5441 * Assumes that the predicate or rule starts at column 0.
5442 * Only the first clause of a predicate or rule is added.
5443 * Original code by Sunichirou Sugou (1989)
5444 * Rewritten by Anders Lindgren (1996)
5445 */
5446static int prolog_pr __P((char *, char *));
5447static void prolog_skip_comment __P((linebuffer *, FILE *));
5448static int prolog_atom __P((char *, int));
5449
5450static void
5451Prolog_functions (inf)
5452     FILE *inf;
5453{
5454  char *cp, *last;
5455  int len;
5456  int allocated;
5457
5458  allocated = 0;
5459  len = 0;
5460  last = NULL;
5461
5462  LOOP_ON_INPUT_LINES (inf, lb, cp)
5463    {
5464      if (cp[0] == '\0')	/* Empty line */
5465	continue;
5466      else if (iswhite (cp[0])) /* Not a predicate */
5467	continue;
5468      else if (cp[0] == '/' && cp[1] == '*')	/* comment. */
5469	prolog_skip_comment (&lb, inf);
5470      else if ((len = prolog_pr (cp, last)) > 0)
5471	{
5472	  /* Predicate or rule.  Store the function name so that we
5473	     only generate a tag for the first clause.  */
5474	  if (last == NULL)
5475	    last = xnew(len + 1, char);
5476	  else if (len + 1 > allocated)
5477	    xrnew (last, len + 1, char);
5478	  allocated = len + 1;
5479	  strncpy (last, cp, len);
5480	  last[len] = '\0';
5481	}
5482    }
5483  if (last != NULL)
5484    free (last);
5485}
5486
5487
5488static void
5489prolog_skip_comment (plb, inf)
5490     linebuffer *plb;
5491     FILE *inf;
5492{
5493  char *cp;
5494
5495  do
5496    {
5497      for (cp = plb->buffer; *cp != '\0'; cp++)
5498	if (cp[0] == '*' && cp[1] == '/')
5499	  return;
5500      readline (plb, inf);
5501    }
5502  while (!feof(inf));
5503}
5504
5505/*
5506 * A predicate or rule definition is added if it matches:
5507 *     <beginning of line><Prolog Atom><whitespace>(
5508 * or  <beginning of line><Prolog Atom><whitespace>:-
5509 *
5510 * It is added to the tags database if it doesn't match the
5511 * name of the previous clause header.
5512 *
5513 * Return the size of the name of the predicate or rule, or 0 if no
5514 * header was found.
5515 */
5516static int
5517prolog_pr (s, last)
5518     char *s;
5519     char *last;		/* Name of last clause. */
5520{
5521  int pos;
5522  int len;
5523
5524  pos = prolog_atom (s, 0);
5525  if (pos < 1)
5526    return 0;
5527
5528  len = pos;
5529  pos = skip_spaces (s + pos) - s;
5530
5531  if ((s[pos] == '.'
5532       || (s[pos] == '(' && (pos += 1))
5533       || (s[pos] == ':' && s[pos + 1] == '-' && (pos += 2)))
5534      && (last == NULL		/* save only the first clause */
5535	  || len != (int)strlen (last)
5536	  || !strneq (s, last, len)))
5537	{
5538	  make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5539	  return len;
5540	}
5541  else
5542    return 0;
5543}
5544
5545/*
5546 * Consume a Prolog atom.
5547 * Return the number of bytes consumed, or -1 if there was an error.
5548 *
5549 * A prolog atom, in this context, could be one of:
5550 * - An alphanumeric sequence, starting with a lower case letter.
5551 * - A quoted arbitrary string. Single quotes can escape themselves.
5552 *   Backslash quotes everything.
5553 */
5554static int
5555prolog_atom (s, pos)
5556     char *s;
5557     int pos;
5558{
5559  int origpos;
5560
5561  origpos = pos;
5562
5563  if (ISLOWER(s[pos]) || (s[pos] == '_'))
5564    {
5565      /* The atom is unquoted. */
5566      pos++;
5567      while (ISALNUM(s[pos]) || (s[pos] == '_'))
5568	{
5569	  pos++;
5570	}
5571      return pos - origpos;
5572    }
5573  else if (s[pos] == '\'')
5574    {
5575      pos++;
5576
5577      for (;;)
5578	{
5579	  if (s[pos] == '\'')
5580	    {
5581	      pos++;
5582	      if (s[pos] != '\'')
5583		break;
5584	      pos++;		/* A double quote */
5585	    }
5586	  else if (s[pos] == '\0')
5587	    /* Multiline quoted atoms are ignored. */
5588	    return -1;
5589	  else if (s[pos] == '\\')
5590	    {
5591	      if (s[pos+1] == '\0')
5592		return -1;
5593	      pos += 2;
5594	    }
5595	  else
5596	    pos++;
5597	}
5598      return pos - origpos;
5599    }
5600  else
5601    return -1;
5602}
5603
5604
5605/*
5606 * Support for Erlang
5607 *
5608 * Generates tags for functions, defines, and records.
5609 * Assumes that Erlang functions start at column 0.
5610 * Original code by Anders Lindgren (1996)
5611 */
5612static int erlang_func __P((char *, char *));
5613static void erlang_attribute __P((char *));
5614static int erlang_atom __P((char *));
5615
5616static void
5617Erlang_functions (inf)
5618     FILE *inf;
5619{
5620  char *cp, *last;
5621  int len;
5622  int allocated;
5623
5624  allocated = 0;
5625  len = 0;
5626  last = NULL;
5627
5628  LOOP_ON_INPUT_LINES (inf, lb, cp)
5629    {
5630      if (cp[0] == '\0')	/* Empty line */
5631	continue;
5632      else if (iswhite (cp[0])) /* Not function nor attribute */
5633	continue;
5634      else if (cp[0] == '%')	/* comment */
5635	continue;
5636      else if (cp[0] == '"')	/* Sometimes, strings start in column one */
5637	continue;
5638      else if (cp[0] == '-') 	/* attribute, e.g. "-define" */
5639	{
5640	  erlang_attribute (cp);
5641	  if (last != NULL)
5642	    {
5643	      free (last);
5644	      last = NULL;
5645	    }
5646	}
5647      else if ((len = erlang_func (cp, last)) > 0)
5648	{
5649	  /*
5650	   * Function.  Store the function name so that we only
5651	   * generates a tag for the first clause.
5652	   */
5653	  if (last == NULL)
5654	    last = xnew (len + 1, char);
5655	  else if (len + 1 > allocated)
5656	    xrnew (last, len + 1, char);
5657	  allocated = len + 1;
5658	  strncpy (last, cp, len);
5659	  last[len] = '\0';
5660	}
5661    }
5662  if (last != NULL)
5663    free (last);
5664}
5665
5666
5667/*
5668 * A function definition is added if it matches:
5669 *     <beginning of line><Erlang Atom><whitespace>(
5670 *
5671 * It is added to the tags database if it doesn't match the
5672 * name of the previous clause header.
5673 *
5674 * Return the size of the name of the function, or 0 if no function
5675 * was found.
5676 */
5677static int
5678erlang_func (s, last)
5679     char *s;
5680     char *last;		/* Name of last clause. */
5681{
5682  int pos;
5683  int len;
5684
5685  pos = erlang_atom (s);
5686  if (pos < 1)
5687    return 0;
5688
5689  len = pos;
5690  pos = skip_spaces (s + pos) - s;
5691
5692  /* Save only the first clause. */
5693  if (s[pos++] == '('
5694      && (last == NULL
5695	  || len != (int)strlen (last)
5696	  || !strneq (s, last, len)))
5697	{
5698	  make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5699	  return len;
5700	}
5701
5702  return 0;
5703}
5704
5705
5706/*
5707 * Handle attributes.  Currently, tags are generated for defines
5708 * and records.
5709 *
5710 * They are on the form:
5711 * -define(foo, bar).
5712 * -define(Foo(M, N), M+N).
5713 * -record(graph, {vtab = notable, cyclic = true}).
5714 */
5715static void
5716erlang_attribute (s)
5717     char *s;
5718{
5719  char *cp = s;
5720
5721  if ((LOOKING_AT (cp, "-define") || LOOKING_AT (cp, "-record"))
5722      && *cp++ == '(')
5723    {
5724      int len = erlang_atom (skip_spaces (cp));
5725      if (len > 0)
5726	make_tag (cp, len, TRUE, s, cp + len - s, lineno, linecharno);
5727    }
5728  return;
5729}
5730
5731
5732/*
5733 * Consume an Erlang atom (or variable).
5734 * Return the number of bytes consumed, or -1 if there was an error.
5735 */
5736static int
5737erlang_atom (s)
5738     char *s;
5739{
5740  int pos = 0;
5741
5742  if (ISALPHA (s[pos]) || s[pos] == '_')
5743    {
5744      /* The atom is unquoted. */
5745      do
5746	pos++;
5747      while (ISALNUM (s[pos]) || s[pos] == '_');
5748    }
5749  else if (s[pos] == '\'')
5750    {
5751      for (pos++; s[pos] != '\''; pos++)
5752	if (s[pos] == '\0'	/* multiline quoted atoms are ignored */
5753	    || (s[pos] == '\\' && s[++pos] == '\0'))
5754	  return 0;
5755      pos++;
5756    }
5757
5758  return pos;
5759}
5760
5761
5762static char *scan_separators __P((char *));
5763static void add_regex __P((char *, language *));
5764static char *substitute __P((char *, char *, struct re_registers *));
5765
5766/*
5767 * Take a string like "/blah/" and turn it into "blah", verifying
5768 * that the first and last characters are the same, and handling
5769 * quoted separator characters.  Actually, stops on the occurrence of
5770 * an unquoted separator.  Also process \t, \n, etc. and turn into
5771 * appropriate characters. Works in place.  Null terminates name string.
5772 * Returns pointer to terminating separator, or NULL for
5773 * unterminated regexps.
5774 */
5775static char *
5776scan_separators (name)
5777     char *name;
5778{
5779  char sep = name[0];
5780  char *copyto = name;
5781  bool quoted = FALSE;
5782
5783  for (++name; *name != '\0'; ++name)
5784    {
5785      if (quoted)
5786	{
5787	  switch (*name)
5788	    {
5789	    case 'a': *copyto++ = '\007'; break; /* BEL (bell)		 */
5790	    case 'b': *copyto++ = '\b'; break;	 /* BS (back space)	 */
5791	    case 'd': *copyto++ = 0177; break;	 /* DEL (delete)	 */
5792	    case 'e': *copyto++ = 033; break;	 /* ESC (delete)	 */
5793	    case 'f': *copyto++ = '\f'; break;	 /* FF (form feed)	 */
5794	    case 'n': *copyto++ = '\n'; break;	 /* NL (new line)	 */
5795	    case 'r': *copyto++ = '\r'; break;	 /* CR (carriage return) */
5796	    case 't': *copyto++ = '\t'; break;	 /* TAB (horizontal tab) */
5797	    case 'v': *copyto++ = '\v'; break;	 /* VT (vertical tab)    */
5798	    default:
5799	      if (*name == sep)
5800		*copyto++ = sep;
5801	      else
5802		{
5803		  /* Something else is quoted, so preserve the quote. */
5804		  *copyto++ = '\\';
5805		  *copyto++ = *name;
5806		}
5807	      break;
5808	    }
5809	  quoted = FALSE;
5810	}
5811      else if (*name == '\\')
5812	quoted = TRUE;
5813      else if (*name == sep)
5814	break;
5815      else
5816	*copyto++ = *name;
5817    }
5818  if (*name != sep)
5819    name = NULL;		/* signal unterminated regexp */
5820
5821  /* Terminate copied string. */
5822  *copyto = '\0';
5823  return name;
5824}
5825
5826/* Look at the argument of --regex or --no-regex and do the right
5827   thing.  Same for each line of a regexp file. */
5828static void
5829analyse_regex (regex_arg)
5830     char *regex_arg;
5831{
5832  if (regex_arg == NULL)
5833    {
5834      free_regexps ();		/* --no-regex: remove existing regexps */
5835      return;
5836    }
5837
5838  /* A real --regexp option or a line in a regexp file. */
5839  switch (regex_arg[0])
5840    {
5841      /* Comments in regexp file or null arg to --regex. */
5842    case '\0':
5843    case ' ':
5844    case '\t':
5845      break;
5846
5847      /* Read a regex file.  This is recursive and may result in a
5848	 loop, which will stop when the file descriptors are exhausted. */
5849    case '@':
5850      {
5851	FILE *regexfp;
5852	linebuffer regexbuf;
5853	char *regexfile = regex_arg + 1;
5854
5855	/* regexfile is a file containing regexps, one per line. */
5856	regexfp = fopen (regexfile, "r");
5857	if (regexfp == NULL)
5858	  {
5859	    pfatal (regexfile);
5860	    return;
5861	  }
5862	linebuffer_init (&regexbuf);
5863	while (readline_internal (&regexbuf, regexfp) > 0)
5864	  analyse_regex (regexbuf.buffer);
5865	free (regexbuf.buffer);
5866	fclose (regexfp);
5867      }
5868      break;
5869
5870      /* Regexp to be used for a specific language only. */
5871    case '{':
5872      {
5873	language *lang;
5874	char *lang_name = regex_arg + 1;
5875	char *cp;
5876
5877	for (cp = lang_name; *cp != '}'; cp++)
5878	  if (*cp == '\0')
5879	    {
5880	      error ("unterminated language name in regex: %s", regex_arg);
5881	      return;
5882	    }
5883	*cp++ = '\0';
5884	lang = get_language_from_langname (lang_name);
5885	if (lang == NULL)
5886	  return;
5887	add_regex (cp, lang);
5888      }
5889      break;
5890
5891      /* Regexp to be used for any language. */
5892    default:
5893      add_regex (regex_arg, NULL);
5894      break;
5895    }
5896}
5897
5898/* Separate the regexp pattern, compile it,
5899   and care for optional name and modifiers. */
5900static void
5901add_regex (regexp_pattern, lang)
5902     char *regexp_pattern;
5903     language *lang;
5904{
5905  static struct re_pattern_buffer zeropattern;
5906  char sep, *pat, *name, *modifiers;
5907  const char *err;
5908  struct re_pattern_buffer *patbuf;
5909  regexp *rp;
5910  bool
5911    force_explicit_name = TRUE, /* do not use implicit tag names */
5912    ignore_case = FALSE,	/* case is significant */
5913    multi_line = FALSE,		/* matches are done one line at a time */
5914    single_line = FALSE;	/* dot does not match newline */
5915
5916
5917  if (strlen(regexp_pattern) < 3)
5918    {
5919      error ("null regexp", (char *)NULL);
5920      return;
5921    }
5922  sep = regexp_pattern[0];
5923  name = scan_separators (regexp_pattern);
5924  if (name == NULL)
5925    {
5926      error ("%s: unterminated regexp", regexp_pattern);
5927      return;
5928    }
5929  if (name[1] == sep)
5930    {
5931      error ("null name for regexp \"%s\"", regexp_pattern);
5932      return;
5933    }
5934  modifiers = scan_separators (name);
5935  if (modifiers == NULL)	/* no terminating separator --> no name */
5936    {
5937      modifiers = name;
5938      name = "";
5939    }
5940  else
5941    modifiers += 1;		/* skip separator */
5942
5943  /* Parse regex modifiers. */
5944  for (; modifiers[0] != '\0'; modifiers++)
5945    switch (modifiers[0])
5946      {
5947      case 'N':
5948	if (modifiers == name)
5949	  error ("forcing explicit tag name but no name, ignoring", NULL);
5950	force_explicit_name = TRUE;
5951	break;
5952      case 'i':
5953	ignore_case = TRUE;
5954	break;
5955      case 's':
5956	single_line = TRUE;
5957	/* FALLTHRU */
5958      case 'm':
5959	multi_line = TRUE;
5960	need_filebuf = TRUE;
5961	break;
5962      default:
5963	{
5964	  char wrongmod [2];
5965	  wrongmod[0] = modifiers[0];
5966	  wrongmod[1] = '\0';
5967	  error ("invalid regexp modifier `%s', ignoring", wrongmod);
5968	}
5969	break;
5970      }
5971
5972  patbuf = xnew (1, struct re_pattern_buffer);
5973  *patbuf = zeropattern;
5974  if (ignore_case)
5975    {
5976      static char lc_trans[CHARS];
5977      int i;
5978      for (i = 0; i < CHARS; i++)
5979	lc_trans[i] = lowcase (i);
5980      patbuf->translate = lc_trans;	/* translation table to fold case  */
5981    }
5982
5983  if (multi_line)
5984    pat = concat ("^", regexp_pattern, ""); /* anchor to beginning of line */
5985  else
5986    pat = regexp_pattern;
5987
5988  if (single_line)
5989    re_set_syntax (RE_SYNTAX_EMACS | RE_DOT_NEWLINE);
5990  else
5991    re_set_syntax (RE_SYNTAX_EMACS);
5992
5993  err = re_compile_pattern (pat, strlen (regexp_pattern), patbuf);
5994  if (multi_line)
5995    free (pat);
5996  if (err != NULL)
5997    {
5998      error ("%s while compiling pattern", err);
5999      return;
6000    }
6001
6002  rp = p_head;
6003  p_head = xnew (1, regexp);
6004  p_head->pattern = savestr (regexp_pattern);
6005  p_head->p_next = rp;
6006  p_head->lang = lang;
6007  p_head->pat = patbuf;
6008  p_head->name = savestr (name);
6009  p_head->error_signaled = FALSE;
6010  p_head->force_explicit_name = force_explicit_name;
6011  p_head->ignore_case = ignore_case;
6012  p_head->multi_line = multi_line;
6013}
6014
6015/*
6016 * Do the substitutions indicated by the regular expression and
6017 * arguments.
6018 */
6019static char *
6020substitute (in, out, regs)
6021     char *in, *out;
6022     struct re_registers *regs;
6023{
6024  char *result, *t;
6025  int size, dig, diglen;
6026
6027  result = NULL;
6028  size = strlen (out);
6029
6030  /* Pass 1: figure out how much to allocate by finding all \N strings. */
6031  if (out[size - 1] == '\\')
6032    fatal ("pattern error in \"%s\"", out);
6033  for (t = etags_strchr (out, '\\');
6034       t != NULL;
6035       t = etags_strchr (t + 2, '\\'))
6036    if (ISDIGIT (t[1]))
6037      {
6038	dig = t[1] - '0';
6039	diglen = regs->end[dig] - regs->start[dig];
6040	size += diglen - 2;
6041      }
6042    else
6043      size -= 1;
6044
6045  /* Allocate space and do the substitutions. */
6046  assert (size >= 0);
6047  result = xnew (size + 1, char);
6048
6049  for (t = result; *out != '\0'; out++)
6050    if (*out == '\\' && ISDIGIT (*++out))
6051      {
6052	dig = *out - '0';
6053	diglen = regs->end[dig] - regs->start[dig];
6054	strncpy (t, in + regs->start[dig], diglen);
6055	t += diglen;
6056      }
6057    else
6058      *t++ = *out;
6059  *t = '\0';
6060
6061  assert (t <= result + size);
6062  assert (t - result == (int)strlen (result));
6063
6064  return result;
6065}
6066
6067/* Deallocate all regexps. */
6068static void
6069free_regexps ()
6070{
6071  regexp *rp;
6072  while (p_head != NULL)
6073    {
6074      rp = p_head->p_next;
6075      free (p_head->pattern);
6076      free (p_head->name);
6077      free (p_head);
6078      p_head = rp;
6079    }
6080  return;
6081}
6082
6083/*
6084 * Reads the whole file as a single string from `filebuf' and looks for
6085 * multi-line regular expressions, creating tags on matches.
6086 * readline already dealt with normal regexps.
6087 *
6088 * Idea by Ben Wing <ben@666.com> (2002).
6089 */
6090static void
6091regex_tag_multiline ()
6092{
6093  char *buffer = filebuf.buffer;
6094  regexp *rp;
6095  char *name;
6096
6097  for (rp = p_head; rp != NULL; rp = rp->p_next)
6098    {
6099      int match = 0;
6100
6101      if (!rp->multi_line)
6102	continue;		/* skip normal regexps */
6103
6104      /* Generic initialisations before parsing file from memory. */
6105      lineno = 1;		/* reset global line number */
6106      charno = 0;		/* reset global char number */
6107      linecharno = 0;		/* reset global char number of line start */
6108
6109      /* Only use generic regexps or those for the current language. */
6110      if (rp->lang != NULL && rp->lang != curfdp->lang)
6111	continue;
6112
6113      while (match >= 0 && match < filebuf.len)
6114	{
6115	  match = re_search (rp->pat, buffer, filebuf.len, charno,
6116			     filebuf.len - match, &rp->regs);
6117	  switch (match)
6118	    {
6119	    case -2:
6120	      /* Some error. */
6121	      if (!rp->error_signaled)
6122		{
6123		  error ("regexp stack overflow while matching \"%s\"",
6124			 rp->pattern);
6125		  rp->error_signaled = TRUE;
6126		}
6127	      break;
6128	    case -1:
6129	      /* No match. */
6130	      break;
6131	    default:
6132	      if (match == rp->regs.end[0])
6133		{
6134		  if (!rp->error_signaled)
6135		    {
6136		      error ("regexp matches the empty string: \"%s\"",
6137			     rp->pattern);
6138		      rp->error_signaled = TRUE;
6139		    }
6140		  match = -3;	/* exit from while loop */
6141		  break;
6142		}
6143
6144	      /* Match occurred.  Construct a tag. */
6145	      while (charno < rp->regs.end[0])
6146		if (buffer[charno++] == '\n')
6147		  lineno++, linecharno = charno;
6148	      name = rp->name;
6149	      if (name[0] == '\0')
6150		name = NULL;
6151	      else /* make a named tag */
6152		name = substitute (buffer, rp->name, &rp->regs);
6153	      if (rp->force_explicit_name)
6154		/* Force explicit tag name, if a name is there. */
6155		pfnote (name, TRUE, buffer + linecharno,
6156			charno - linecharno + 1, lineno, linecharno);
6157	      else
6158		make_tag (name, strlen (name), TRUE, buffer + linecharno,
6159			  charno - linecharno + 1, lineno, linecharno);
6160	      break;
6161	    }
6162	}
6163    }
6164}
6165
6166
6167static bool
6168nocase_tail (cp)
6169     char *cp;
6170{
6171  register int len = 0;
6172
6173  while (*cp != '\0' && lowcase (*cp) == lowcase (dbp[len]))
6174    cp++, len++;
6175  if (*cp == '\0' && !intoken (dbp[len]))
6176    {
6177      dbp += len;
6178      return TRUE;
6179    }
6180  return FALSE;
6181}
6182
6183static void
6184get_tag (bp, namepp)
6185     register char *bp;
6186     char **namepp;
6187{
6188  register char *cp = bp;
6189
6190  if (*bp != '\0')
6191    {
6192      /* Go till you get to white space or a syntactic break */
6193      for (cp = bp + 1; !notinname (*cp); cp++)
6194	continue;
6195      make_tag (bp, cp - bp, TRUE,
6196		lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
6197    }
6198
6199  if (namepp != NULL)
6200    *namepp = savenstr (bp, cp - bp);
6201}
6202
6203/*
6204 * Read a line of text from `stream' into `lbp', excluding the
6205 * newline or CR-NL, if any.  Return the number of characters read from
6206 * `stream', which is the length of the line including the newline.
6207 *
6208 * On DOS or Windows we do not count the CR character, if any before the
6209 * NL, in the returned length; this mirrors the behavior of Emacs on those
6210 * platforms (for text files, it translates CR-NL to NL as it reads in the
6211 * file).
6212 *
6213 * If multi-line regular expressions are requested, each line read is
6214 * appended to `filebuf'.
6215 */
6216static long
6217readline_internal (lbp, stream)
6218     linebuffer *lbp;
6219     register FILE *stream;
6220{
6221  char *buffer = lbp->buffer;
6222  register char *p = lbp->buffer;
6223  register char *pend;
6224  int chars_deleted;
6225
6226  pend = p + lbp->size;		/* Separate to avoid 386/IX compiler bug.  */
6227
6228  for (;;)
6229    {
6230      register int c = getc (stream);
6231      if (p == pend)
6232	{
6233	  /* We're at the end of linebuffer: expand it. */
6234	  lbp->size *= 2;
6235	  xrnew (buffer, lbp->size, char);
6236	  p += buffer - lbp->buffer;
6237	  pend = buffer + lbp->size;
6238	  lbp->buffer = buffer;
6239	}
6240      if (c == EOF)
6241	{
6242	  *p = '\0';
6243	  chars_deleted = 0;
6244	  break;
6245	}
6246      if (c == '\n')
6247	{
6248	  if (p > buffer && p[-1] == '\r')
6249	    {
6250	      p -= 1;
6251#ifdef DOS_NT
6252	     /* Assume CRLF->LF translation will be performed by Emacs
6253		when loading this file, so CRs won't appear in the buffer.
6254		It would be cleaner to compensate within Emacs;
6255		however, Emacs does not know how many CRs were deleted
6256		before any given point in the file.  */
6257	      chars_deleted = 1;
6258#else
6259	      chars_deleted = 2;
6260#endif
6261	    }
6262	  else
6263	    {
6264	      chars_deleted = 1;
6265	    }
6266	  *p = '\0';
6267	  break;
6268	}
6269      *p++ = c;
6270    }
6271  lbp->len = p - buffer;
6272
6273  if (need_filebuf		/* we need filebuf for multi-line regexps */
6274      && chars_deleted > 0)	/* not at EOF */
6275    {
6276      while (filebuf.size <= filebuf.len + lbp->len + 1) /* +1 for \n */
6277	{
6278	  /* Expand filebuf. */
6279	  filebuf.size *= 2;
6280	  xrnew (filebuf.buffer, filebuf.size, char);
6281	}
6282      strncpy (filebuf.buffer + filebuf.len, lbp->buffer, lbp->len);
6283      filebuf.len += lbp->len;
6284      filebuf.buffer[filebuf.len++] = '\n';
6285      filebuf.buffer[filebuf.len] = '\0';
6286    }
6287
6288  return lbp->len + chars_deleted;
6289}
6290
6291/*
6292 * Like readline_internal, above, but in addition try to match the
6293 * input line against relevant regular expressions and manage #line
6294 * directives.
6295 */
6296static void
6297readline (lbp, stream)
6298     linebuffer *lbp;
6299     FILE *stream;
6300{
6301  long result;
6302
6303  linecharno = charno;		/* update global char number of line start */
6304  result = readline_internal (lbp, stream); /* read line */
6305  lineno += 1;			/* increment global line number */
6306  charno += result;		/* increment global char number */
6307
6308  /* Honour #line directives. */
6309  if (!no_line_directive)
6310    {
6311      static bool discard_until_line_directive;
6312
6313      /* Check whether this is a #line directive. */
6314      if (result > 12 && strneq (lbp->buffer, "#line ", 6))
6315	{
6316	  unsigned int lno;
6317	  int start = 0;
6318
6319	  if (sscanf (lbp->buffer, "#line %u \"%n", &lno, &start) >= 1
6320	      && start > 0)	/* double quote character found */
6321	    {
6322	      char *endp = lbp->buffer + start;
6323
6324	      while ((endp = etags_strchr (endp, '"')) != NULL
6325		     && endp[-1] == '\\')
6326		endp++;
6327	      if (endp != NULL)
6328		/* Ok, this is a real #line directive.  Let's deal with it. */
6329		{
6330		  char *taggedabsname;	/* absolute name of original file */
6331		  char *taggedfname;	/* name of original file as given */
6332		  char *name;		/* temp var */
6333
6334		  discard_until_line_directive = FALSE; /* found it */
6335		  name = lbp->buffer + start;
6336		  *endp = '\0';
6337		  canonicalize_filename (name); /* for DOS */
6338		  taggedabsname = absolute_filename (name, tagfiledir);
6339		  if (filename_is_absolute (name)
6340		      || filename_is_absolute (curfdp->infname))
6341		    taggedfname = savestr (taggedabsname);
6342		  else
6343		    taggedfname = relative_filename (taggedabsname,tagfiledir);
6344
6345		  if (streq (curfdp->taggedfname, taggedfname))
6346		    /* The #line directive is only a line number change.  We
6347		       deal with this afterwards. */
6348		    free (taggedfname);
6349		  else
6350		    /* The tags following this #line directive should be
6351		       attributed to taggedfname.  In order to do this, set
6352		       curfdp accordingly. */
6353		    {
6354		      fdesc *fdp; /* file description pointer */
6355
6356		      /* Go look for a file description already set up for the
6357			 file indicated in the #line directive.  If there is
6358			 one, use it from now until the next #line
6359			 directive. */
6360		      for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6361			if (streq (fdp->infname, curfdp->infname)
6362			    && streq (fdp->taggedfname, taggedfname))
6363			  /* If we remove the second test above (after the &&)
6364			     then all entries pertaining to the same file are
6365			     coalesced in the tags file.  If we use it, then
6366			     entries pertaining to the same file but generated
6367			     from different files (via #line directives) will
6368			     go into separate sections in the tags file.  These
6369			     alternatives look equivalent.  The first one
6370			     destroys some apparently useless information. */
6371			  {
6372			    curfdp = fdp;
6373			    free (taggedfname);
6374			    break;
6375			  }
6376		      /* Else, if we already tagged the real file, skip all
6377			 input lines until the next #line directive. */
6378		      if (fdp == NULL) /* not found */
6379			for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6380			  if (streq (fdp->infabsname, taggedabsname))
6381			    {
6382			      discard_until_line_directive = TRUE;
6383			      free (taggedfname);
6384			      break;
6385			    }
6386		      /* Else create a new file description and use that from
6387			 now on, until the next #line directive. */
6388		      if (fdp == NULL) /* not found */
6389			{
6390			  fdp = fdhead;
6391			  fdhead = xnew (1, fdesc);
6392			  *fdhead = *curfdp; /* copy curr. file description */
6393			  fdhead->next = fdp;
6394			  fdhead->infname = savestr (curfdp->infname);
6395			  fdhead->infabsname = savestr (curfdp->infabsname);
6396			  fdhead->infabsdir = savestr (curfdp->infabsdir);
6397			  fdhead->taggedfname = taggedfname;
6398			  fdhead->usecharno = FALSE;
6399			  fdhead->prop = NULL;
6400			  fdhead->written = FALSE;
6401			  curfdp = fdhead;
6402			}
6403		    }
6404		  free (taggedabsname);
6405		  lineno = lno - 1;
6406		  readline (lbp, stream);
6407		  return;
6408		} /* if a real #line directive */
6409	    } /* if #line is followed by a a number */
6410	} /* if line begins with "#line " */
6411
6412      /* If we are here, no #line directive was found. */
6413      if (discard_until_line_directive)
6414	{
6415	  if (result > 0)
6416	    {
6417	      /* Do a tail recursion on ourselves, thus discarding the contents
6418		 of the line buffer. */
6419	      readline (lbp, stream);
6420	      return;
6421	    }
6422	  /* End of file. */
6423	  discard_until_line_directive = FALSE;
6424	  return;
6425	}
6426    } /* if #line directives should be considered */
6427
6428  {
6429    int match;
6430    regexp *rp;
6431    char *name;
6432
6433    /* Match against relevant regexps. */
6434    if (lbp->len > 0)
6435      for (rp = p_head; rp != NULL; rp = rp->p_next)
6436	{
6437	  /* Only use generic regexps or those for the current language.
6438	     Also do not use multiline regexps, which is the job of
6439	     regex_tag_multiline. */
6440	  if ((rp->lang != NULL && rp->lang != fdhead->lang)
6441	      || rp->multi_line)
6442	    continue;
6443
6444	  match = re_match (rp->pat, lbp->buffer, lbp->len, 0, &rp->regs);
6445	  switch (match)
6446	    {
6447	    case -2:
6448	      /* Some error. */
6449	      if (!rp->error_signaled)
6450		{
6451		  error ("regexp stack overflow while matching \"%s\"",
6452			 rp->pattern);
6453		  rp->error_signaled = TRUE;
6454		}
6455	      break;
6456	    case -1:
6457	      /* No match. */
6458	      break;
6459	    case 0:
6460	      /* Empty string matched. */
6461	      if (!rp->error_signaled)
6462		{
6463		  error ("regexp matches the empty string: \"%s\"", rp->pattern);
6464		  rp->error_signaled = TRUE;
6465		}
6466	      break;
6467	    default:
6468	      /* Match occurred.  Construct a tag. */
6469	      name = rp->name;
6470	      if (name[0] == '\0')
6471		name = NULL;
6472	      else /* make a named tag */
6473		name = substitute (lbp->buffer, rp->name, &rp->regs);
6474	      if (rp->force_explicit_name)
6475		/* Force explicit tag name, if a name is there. */
6476		pfnote (name, TRUE, lbp->buffer, match, lineno, linecharno);
6477	      else
6478		make_tag (name, strlen (name), TRUE,
6479			  lbp->buffer, match, lineno, linecharno);
6480	      break;
6481	    }
6482	}
6483  }
6484}
6485
6486
6487/*
6488 * Return a pointer to a space of size strlen(cp)+1 allocated
6489 * with xnew where the string CP has been copied.
6490 */
6491static char *
6492savestr (cp)
6493     char *cp;
6494{
6495  return savenstr (cp, strlen (cp));
6496}
6497
6498/*
6499 * Return a pointer to a space of size LEN+1 allocated with xnew where
6500 * the string CP has been copied for at most the first LEN characters.
6501 */
6502static char *
6503savenstr (cp, len)
6504     char *cp;
6505     int len;
6506{
6507  register char *dp;
6508
6509  dp = xnew (len + 1, char);
6510  strncpy (dp, cp, len);
6511  dp[len] = '\0';
6512  return dp;
6513}
6514
6515/*
6516 * Return the ptr in sp at which the character c last
6517 * appears; NULL if not found
6518 *
6519 * Identical to POSIX strrchr, included for portability.
6520 */
6521static char *
6522etags_strrchr (sp, c)
6523     register const char *sp;
6524     register int c;
6525{
6526  register const char *r;
6527
6528  r = NULL;
6529  do
6530    {
6531      if (*sp == c)
6532	r = sp;
6533  } while (*sp++);
6534  return (char *)r;
6535}
6536
6537/*
6538 * Return the ptr in sp at which the character c first
6539 * appears; NULL if not found
6540 *
6541 * Identical to POSIX strchr, included for portability.
6542 */
6543static char *
6544etags_strchr (sp, c)
6545     register const char *sp;
6546     register int c;
6547{
6548  do
6549    {
6550      if (*sp == c)
6551	return (char *)sp;
6552    } while (*sp++);
6553  return NULL;
6554}
6555
6556/*
6557 * Compare two strings, ignoring case for alphabetic characters.
6558 *
6559 * Same as BSD's strcasecmp, included for portability.
6560 */
6561static int
6562etags_strcasecmp (s1, s2)
6563     register const char *s1;
6564     register const char *s2;
6565{
6566  while (*s1 != '\0'
6567	 && (ISALPHA (*s1) && ISALPHA (*s2)
6568	     ? lowcase (*s1) == lowcase (*s2)
6569	     : *s1 == *s2))
6570    s1++, s2++;
6571
6572  return (ISALPHA (*s1) && ISALPHA (*s2)
6573	  ? lowcase (*s1) - lowcase (*s2)
6574	  : *s1 - *s2);
6575}
6576
6577/*
6578 * Compare two strings, ignoring case for alphabetic characters.
6579 * Stop after a given number of characters
6580 *
6581 * Same as BSD's strncasecmp, included for portability.
6582 */
6583static int
6584etags_strncasecmp (s1, s2, n)
6585     register const char *s1;
6586     register const char *s2;
6587     register int n;
6588{
6589  while (*s1 != '\0' && n-- > 0
6590	 && (ISALPHA (*s1) && ISALPHA (*s2)
6591	     ? lowcase (*s1) == lowcase (*s2)
6592	     : *s1 == *s2))
6593    s1++, s2++;
6594
6595  if (n < 0)
6596    return 0;
6597  else
6598    return (ISALPHA (*s1) && ISALPHA (*s2)
6599	    ? lowcase (*s1) - lowcase (*s2)
6600	    : *s1 - *s2);
6601}
6602
6603/* Skip spaces (end of string is not space), return new pointer. */
6604static char *
6605skip_spaces (cp)
6606     char *cp;
6607{
6608  while (iswhite (*cp))
6609    cp++;
6610  return cp;
6611}
6612
6613/* Skip non spaces, except end of string, return new pointer. */
6614static char *
6615skip_non_spaces (cp)
6616     char *cp;
6617{
6618  while (*cp != '\0' && !iswhite (*cp))
6619    cp++;
6620  return cp;
6621}
6622
6623/* Print error message and exit.  */
6624void
6625fatal (s1, s2)
6626     char *s1, *s2;
6627{
6628  error (s1, s2);
6629  exit (EXIT_FAILURE);
6630}
6631
6632static void
6633pfatal (s1)
6634     char *s1;
6635{
6636  perror (s1);
6637  exit (EXIT_FAILURE);
6638}
6639
6640static void
6641suggest_asking_for_help ()
6642{
6643  fprintf (stderr, "\tTry `%s %s' for a complete list of options.\n",
6644	   progname, NO_LONG_OPTIONS ? "-h" : "--help");
6645  exit (EXIT_FAILURE);
6646}
6647
6648/* Print error message.  `s1' is printf control string, `s2' is arg for it. */
6649static void
6650error (s1, s2)
6651     const char *s1, *s2;
6652{
6653  fprintf (stderr, "%s: ", progname);
6654  fprintf (stderr, s1, s2);
6655  fprintf (stderr, "\n");
6656}
6657
6658/* Return a newly-allocated string whose contents
6659   concatenate those of s1, s2, s3.  */
6660static char *
6661concat (s1, s2, s3)
6662     char *s1, *s2, *s3;
6663{
6664  int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
6665  char *result = xnew (len1 + len2 + len3 + 1, char);
6666
6667  strcpy (result, s1);
6668  strcpy (result + len1, s2);
6669  strcpy (result + len1 + len2, s3);
6670  result[len1 + len2 + len3] = '\0';
6671
6672  return result;
6673}
6674
6675
6676/* Does the same work as the system V getcwd, but does not need to
6677   guess the buffer size in advance. */
6678static char *
6679etags_getcwd ()
6680{
6681#ifdef HAVE_GETCWD
6682  int bufsize = 200;
6683  char *path = xnew (bufsize, char);
6684
6685  while (getcwd (path, bufsize) == NULL)
6686    {
6687      if (errno != ERANGE)
6688	pfatal ("getcwd");
6689      bufsize *= 2;
6690      free (path);
6691      path = xnew (bufsize, char);
6692    }
6693
6694  canonicalize_filename (path);
6695  return path;
6696
6697#else /* not HAVE_GETCWD */
6698#if MSDOS
6699
6700  char *p, path[MAXPATHLEN + 1]; /* Fixed size is safe on MSDOS.  */
6701
6702  getwd (path);
6703
6704  for (p = path; *p != '\0'; p++)
6705    if (*p == '\\')
6706      *p = '/';
6707    else
6708      *p = lowcase (*p);
6709
6710  return strdup (path);
6711#else /* not MSDOS */
6712  linebuffer path;
6713  FILE *pipe;
6714
6715  linebuffer_init (&path);
6716  pipe = (FILE *) popen ("pwd 2>/dev/null", "r");
6717  if (pipe == NULL || readline_internal (&path, pipe) == 0)
6718    pfatal ("pwd");
6719  pclose (pipe);
6720
6721  return path.buffer;
6722#endif /* not MSDOS */
6723#endif /* not HAVE_GETCWD */
6724}
6725
6726/* Return a newly allocated string containing the file name of FILE
6727   relative to the absolute directory DIR (which should end with a slash). */
6728static char *
6729relative_filename (file, dir)
6730     char *file, *dir;
6731{
6732  char *fp, *dp, *afn, *res;
6733  int i;
6734
6735  /* Find the common root of file and dir (with a trailing slash). */
6736  afn = absolute_filename (file, cwd);
6737  fp = afn;
6738  dp = dir;
6739  while (*fp++ == *dp++)
6740    continue;
6741  fp--, dp--;			/* back to the first differing char */
6742#ifdef DOS_NT
6743  if (fp == afn && afn[0] != '/') /* cannot build a relative name */
6744    return afn;
6745#endif
6746  do				/* look at the equal chars until '/' */
6747    fp--, dp--;
6748  while (*fp != '/');
6749
6750  /* Build a sequence of "../" strings for the resulting relative file name. */
6751  i = 0;
6752  while ((dp = etags_strchr (dp + 1, '/')) != NULL)
6753    i += 1;
6754  res = xnew (3*i + strlen (fp + 1) + 1, char);
6755  res[0] = '\0';
6756  while (i-- > 0)
6757    strcat (res, "../");
6758
6759  /* Add the file name relative to the common root of file and dir. */
6760  strcat (res, fp + 1);
6761  free (afn);
6762
6763  return res;
6764}
6765
6766/* Return a newly allocated string containing the absolute file name
6767   of FILE given DIR (which should end with a slash). */
6768static char *
6769absolute_filename (file, dir)
6770     char *file, *dir;
6771{
6772  char *slashp, *cp, *res;
6773
6774  if (filename_is_absolute (file))
6775    res = savestr (file);
6776#ifdef DOS_NT
6777  /* We don't support non-absolute file names with a drive
6778     letter, like `d:NAME' (it's too much hassle).  */
6779  else if (file[1] == ':')
6780    fatal ("%s: relative file names with drive letters not supported", file);
6781#endif
6782  else
6783    res = concat (dir, file, "");
6784
6785  /* Delete the "/dirname/.." and "/." substrings. */
6786  slashp = etags_strchr (res, '/');
6787  while (slashp != NULL && slashp[0] != '\0')
6788    {
6789      if (slashp[1] == '.')
6790	{
6791	  if (slashp[2] == '.'
6792	      && (slashp[3] == '/' || slashp[3] == '\0'))
6793	    {
6794	      cp = slashp;
6795	      do
6796		cp--;
6797	      while (cp >= res && !filename_is_absolute (cp));
6798	      if (cp < res)
6799		cp = slashp;	/* the absolute name begins with "/.." */
6800#ifdef DOS_NT
6801	      /* Under MSDOS and NT we get `d:/NAME' as absolute
6802		 file name, so the luser could say `d:/../NAME'.
6803		 We silently treat this as `d:/NAME'.  */
6804	      else if (cp[0] != '/')
6805		cp = slashp;
6806#endif
6807	      memmove (cp, slashp + 3, strlen(slashp + 3)  + 1);
6808	      slashp = cp;
6809	      continue;
6810	    }
6811	  else if (slashp[2] == '/' || slashp[2] == '\0')
6812	    {
6813	      memmove (slashp, slashp + 2, strlen(slashp + 2) + 1);
6814	      continue;
6815	    }
6816	}
6817
6818      slashp = etags_strchr (slashp + 1, '/');
6819    }
6820
6821  if (res[0] == '\0')		/* just a safety net: should never happen */
6822    {
6823      free (res);
6824      return savestr ("/");
6825    }
6826  else
6827    return res;
6828}
6829
6830/* Return a newly allocated string containing the absolute
6831   file name of dir where FILE resides given DIR (which should
6832   end with a slash). */
6833static char *
6834absolute_dirname (file, dir)
6835     char *file, *dir;
6836{
6837  char *slashp, *res;
6838  char save;
6839
6840  canonicalize_filename (file);
6841  slashp = etags_strrchr (file, '/');
6842  if (slashp == NULL)
6843    return savestr (dir);
6844  save = slashp[1];
6845  slashp[1] = '\0';
6846  res = absolute_filename (file, dir);
6847  slashp[1] = save;
6848
6849  return res;
6850}
6851
6852/* Whether the argument string is an absolute file name.  The argument
6853   string must have been canonicalized with canonicalize_filename. */
6854static bool
6855filename_is_absolute (fn)
6856     char *fn;
6857{
6858  return (fn[0] == '/'
6859#ifdef DOS_NT
6860	  || (ISALPHA(fn[0]) && fn[1] == ':' && fn[2] == '/')
6861#endif
6862	  );
6863}
6864
6865/* Translate backslashes into slashes.  Works in place. */
6866static void
6867canonicalize_filename (fn)
6868     register char *fn;
6869{
6870#ifdef DOS_NT
6871  /* Canonicalize drive letter case.  */
6872  if (fn[0] != '\0' && fn[1] == ':' && ISLOWER (fn[0]))
6873    fn[0] = upcase (fn[0]);
6874  /* Convert backslashes to slashes.  */
6875  for (; *fn != '\0'; fn++)
6876    if (*fn == '\\')
6877      *fn = '/';
6878#else
6879  /* No action. */
6880  fn = NULL;			/* shut up the compiler */
6881#endif
6882}
6883
6884
6885/* Initialize a linebuffer for use */
6886static void
6887linebuffer_init (lbp)
6888     linebuffer *lbp;
6889{
6890  lbp->size = (DEBUG) ? 3 : 200;
6891  lbp->buffer = xnew (lbp->size, char);
6892  lbp->buffer[0] = '\0';
6893  lbp->len = 0;
6894}
6895
6896/* Set the minimum size of a string contained in a linebuffer. */
6897static void
6898linebuffer_setlen (lbp, toksize)
6899     linebuffer *lbp;
6900     int toksize;
6901{
6902  while (lbp->size <= toksize)
6903    {
6904      lbp->size *= 2;
6905      xrnew (lbp->buffer, lbp->size, char);
6906    }
6907  lbp->len = toksize;
6908}
6909
6910/* Like malloc but get fatal error if memory is exhausted. */
6911static PTR
6912xmalloc (size)
6913     unsigned int size;
6914{
6915  PTR result = (PTR) malloc (size);
6916  if (result == NULL)
6917    fatal ("virtual memory exhausted", (char *)NULL);
6918  return result;
6919}
6920
6921static PTR
6922xrealloc (ptr, size)
6923     char *ptr;
6924     unsigned int size;
6925{
6926  PTR result = (PTR) realloc (ptr, size);
6927  if (result == NULL)
6928    fatal ("virtual memory exhausted", (char *)NULL);
6929  return result;
6930}
6931
6932/*
6933 * Local Variables:
6934 * indent-tabs-mode: t
6935 * tab-width: 8
6936 * fill-column: 79
6937 * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node" "regexp")
6938 * c-file-style: "gnu"
6939 * End:
6940 */
6941
6942/* arch-tag: 8a9b748d-390c-4922-99db-2eeefa921051
6943   (do not change this comment) */
6944
6945/* etags.c ends here */
6946