1114402Sru// -*- C++ -*-
2151497Sru/* Copyright (C) 1989, 1990, 1991, 1992, 2001, 2004
3151497Sru   Free Software Foundation, Inc.
4114402Sru     Written by James Clark (jjc@jclark.com)
5114402Sru
6114402SruThis file is part of groff.
7114402Sru
8114402Srugroff is free software; you can redistribute it and/or modify it under
9114402Sruthe terms of the GNU General Public License as published by the Free
10114402SruSoftware Foundation; either version 2, or (at your option) any later
11114402Sruversion.
12114402Sru
13114402Srugroff is distributed in the hope that it will be useful, but WITHOUT ANY
14114402SruWARRANTY; without even the implied warranty of MERCHANTABILITY or
15114402SruFITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
16114402Srufor more details.
17114402Sru
18114402SruYou should have received a copy of the GNU General Public License along
19114402Sruwith groff; see the file COPYING.  If not, write to the Free Software
20151497SruFoundation, 51 Franklin St - Fifth Floor, Boston, MA 02110-1301, USA. */
21114402Sru
22114402Sru#include "lib.h"
23114402Sru
24114402Sru#include <stdlib.h>
25114402Sru#include <errno.h>
26114402Sru
27114402Sru#include "posix.h"
28114402Sru#include "cset.h"
29114402Sru#include "cmap.h"
30114402Sru#include "errarg.h"
31114402Sru#include "error.h"
32114402Sru
33114402Sru#include "refid.h"
34114402Sru#include "search.h"
35114402Sru#include "index.h"
36114402Sru#include "defs.h"
37114402Sru
38114402Sru#include "nonposix.h"
39114402Sru
40114402Sru// Interface to mmap.
41114402Sruextern "C" {
42114402Sru  void *mapread(int fd, int len);
43114402Sru  int unmap(void *, int len);
44114402Sru}
45114402Sru
46114402Sru#if 0
47114402Sruconst
48114402Sru#endif
49114402Sruint minus_one = -1;
50114402Sru
51114402Sruint verify_flag = 0;
52114402Sru
53114402Srustruct word_list;
54114402Sru
55114402Sruclass index_search_item : public search_item {
56114402Sru  search_item *out_of_date_files;
57114402Sru  index_header header;
58114402Sru  char *buffer;
59114402Sru  void *map_addr;
60114402Sru  int map_len;
61114402Sru  tag *tags;
62114402Sru  int *table;
63114402Sru  int *lists;
64114402Sru  char *pool;
65114402Sru  char *key_buffer;
66114402Sru  char *filename_buffer;
67114402Sru  int filename_buflen;
68114402Sru  char **common_words_table;
69114402Sru  int common_words_table_size;
70114402Sru  const char *ignore_fields;
71114402Sru  time_t mtime;
72114402Sru
73114402Sru  const char *do_verify();
74114402Sru  const int *search1(const char **pp, const char *end);
75114402Sru  const int *search(const char *ptr, int length, int **temp_listp);
76114402Sru  const char *munge_filename(const char *);
77114402Sru  void read_common_words_file();
78114402Sru  void add_out_of_date_file(int fd, const char *filename, int fid);
79114402Srupublic:
80114402Sru  index_search_item(const char *, int);
81114402Sru  ~index_search_item();
82114402Sru  int load(int fd);
83114402Sru  search_item_iterator *make_search_item_iterator(const char *);
84114402Sru  int verify();
85114402Sru  void check_files();
86114402Sru  int next_filename_id() const;
87114402Sru  friend class index_search_item_iterator;
88114402Sru};
89114402Sru
90114402Sruclass index_search_item_iterator : public search_item_iterator {
91114402Sru  index_search_item *indx;
92114402Sru  search_item_iterator *out_of_date_files_iter;
93114402Sru  search_item *next_out_of_date_file;
94114402Sru  const int *found_list;
95114402Sru  int *temp_list;
96114402Sru  char *buf;
97114402Sru  int buflen;
98114402Sru  linear_searcher searcher;
99114402Sru  char *query;
100114402Sru  int get_tag(int tagno, const linear_searcher &, const char **, int *,
101114402Sru	      reference_id *);
102114402Srupublic:
103114402Sru  index_search_item_iterator(index_search_item *, const char *);
104114402Sru  ~index_search_item_iterator();
105114402Sru  int next(const linear_searcher &, const char **, int *, reference_id *);
106114402Sru};
107114402Sru
108114402Sru
109114402Sruindex_search_item::index_search_item(const char *filename, int fid)
110114402Sru: search_item(filename, fid), out_of_date_files(0), buffer(0), map_addr(0),
111114402Sru  map_len(0), key_buffer(0), filename_buffer(0), filename_buflen(0),
112114402Sru  common_words_table(0)
113114402Sru{
114114402Sru}
115114402Sru
116114402Sruindex_search_item::~index_search_item()
117114402Sru{
118114402Sru  if (buffer)
119114402Sru    free(buffer);
120114402Sru  if (map_addr) {
121114402Sru    if (unmap(map_addr, map_len) < 0)
122114402Sru      error("unmap: %1", strerror(errno));
123114402Sru  }
124114402Sru  while (out_of_date_files) {
125114402Sru    search_item *tem = out_of_date_files;
126114402Sru    out_of_date_files = out_of_date_files->next;
127114402Sru    delete tem;
128114402Sru  }
129114402Sru  a_delete filename_buffer;
130114402Sru  a_delete key_buffer;
131114402Sru  if (common_words_table) {
132114402Sru    for (int i = 0; i < common_words_table_size; i++)
133114402Sru      a_delete common_words_table[i];
134114402Sru    a_delete common_words_table;
135114402Sru  }
136114402Sru}
137114402Sru
138114402Sruclass file_closer {
139114402Sru  int *fdp;
140114402Srupublic:
141114402Sru  file_closer(int &fd) : fdp(&fd) { }
142114402Sru  ~file_closer() { close(*fdp); }
143114402Sru};
144114402Sru
145114402Sru// Tell the compiler that a variable is intentionally unused.
146114402Sruinline void unused(void *) { }
147114402Sru
148114402Sruint index_search_item::load(int fd)
149114402Sru{
150114402Sru  file_closer fd_closer(fd);	// close fd on return
151114402Sru  unused(&fd_closer);
152114402Sru  struct stat sb;
153114402Sru  if (fstat(fd, &sb) < 0) {
154114402Sru    error("can't fstat `%1': %2", name, strerror(errno));
155114402Sru    return 0;
156114402Sru  }
157114402Sru  if (!S_ISREG(sb.st_mode)) {
158114402Sru    error("`%1' is not a regular file", name);
159114402Sru    return 0;
160114402Sru  }
161114402Sru  mtime = sb.st_mtime;
162114402Sru  int size = int(sb.st_size);
163114402Sru  char *addr;
164114402Sru  map_addr = mapread(fd, size);
165114402Sru  if (map_addr) {
166114402Sru    addr = (char *)map_addr;
167114402Sru    map_len = size;
168114402Sru  }
169114402Sru  else {
170114402Sru    addr = buffer = (char *)malloc(size);
171114402Sru    if (buffer == 0) {
172114402Sru      error("can't allocate buffer for `%1'", name);
173114402Sru      return 0;
174114402Sru    }
175114402Sru    char *ptr = buffer;
176114402Sru    int bytes_to_read = size;
177114402Sru    while (bytes_to_read > 0) {
178114402Sru      int nread = read(fd, ptr, bytes_to_read);
179114402Sru      if (nread == 0) {
180114402Sru	error("unexpected EOF on `%1'", name);
181114402Sru	return 0;
182114402Sru      }
183114402Sru      if (nread < 0) {
184114402Sru	error("read error on `%1': %2", name, strerror(errno));
185114402Sru	return 0;
186114402Sru      }
187114402Sru      bytes_to_read -= nread;
188114402Sru      ptr += nread;
189114402Sru    }
190114402Sru  }
191114402Sru  header = *(index_header *)addr;
192114402Sru  if (header.magic != INDEX_MAGIC) {
193114402Sru    error("`%1' is not an index file: wrong magic number", name);
194114402Sru    return 0;
195114402Sru  }
196114402Sru  if (header.version != INDEX_VERSION) {
197114402Sru    error("version number in `%1' is wrong: was %2, should be %3",
198114402Sru	  name, header.version, INDEX_VERSION);
199114402Sru    return 0;
200114402Sru  }
201114402Sru  int sz = (header.tags_size * sizeof(tag)
202114402Sru	    + header.lists_size * sizeof(int)
203114402Sru	    + header.table_size * sizeof(int)
204114402Sru	    + header.strings_size
205114402Sru	    + sizeof(header));
206114402Sru  if (sz != size) {
207114402Sru    error("size of `%1' is wrong: was %2, should be %3",
208114402Sru	  name, size, sz);
209114402Sru    return 0;
210114402Sru  }
211114402Sru  tags = (tag *)(addr + sizeof(header));
212114402Sru  lists = (int *)(tags + header.tags_size);
213114402Sru  table = (int *)(lists + header.lists_size);
214114402Sru  pool = (char *)(table + header.table_size);
215114402Sru  ignore_fields = strchr(strchr(pool, '\0') + 1, '\0') + 1;
216114402Sru  key_buffer = new char[header.truncate];
217114402Sru  read_common_words_file();
218114402Sru  return 1;
219114402Sru}
220114402Sru
221114402Sruconst char *index_search_item::do_verify()
222114402Sru{
223114402Sru  if (tags == 0)
224114402Sru    return "not loaded";
225114402Sru  if (lists[header.lists_size - 1] >= 0)
226114402Sru    return "last list element not negative";
227114402Sru  int i;
228114402Sru  for (i = 0; i < header.table_size; i++) {
229114402Sru    int li = table[i];
230114402Sru    if (li >= header.lists_size)
231114402Sru      return "bad list index";
232114402Sru    if (li >= 0) {
233114402Sru      for (int *ptr = lists + li; *ptr >= 0; ptr++) {
234114402Sru	if (*ptr >= header.tags_size)
235114402Sru	  return "bad tag index";
236114402Sru	if (*ptr >= ptr[1] && ptr[1] >= 0)
237114402Sru	  return "list not ordered";
238114402Sru      }
239114402Sru    }
240114402Sru  }
241114402Sru  for (i = 0; i < header.tags_size; i++) {
242114402Sru    if (tags[i].filename_index >= header.strings_size)
243114402Sru      return "bad index in tags";
244114402Sru    if (tags[i].length < 0)
245114402Sru      return "bad length in tags";
246114402Sru    if (tags[i].start < 0)
247114402Sru      return "bad start in tags";
248114402Sru  }
249114402Sru  if (pool[header.strings_size - 1] != '\0')
250114402Sru    return "last character in pool not nul";
251114402Sru  return 0;
252114402Sru}
253114402Sru
254114402Sruint index_search_item::verify()
255114402Sru{
256114402Sru  const char *reason = do_verify();
257114402Sru  if (!reason)
258114402Sru    return 1;
259114402Sru  error("`%1' is bad: %2", name, reason);
260114402Sru  return 0;
261114402Sru}
262114402Sru
263114402Sruint index_search_item::next_filename_id() const
264114402Sru{
265114402Sru  return filename_id + header.strings_size + 1;
266114402Sru}
267114402Sru
268114402Srusearch_item_iterator *index_search_item::make_search_item_iterator(
269114402Sru  const char *query)
270114402Sru{
271114402Sru  return new index_search_item_iterator(this, query);
272114402Sru}
273114402Sru
274114402Srusearch_item *make_index_search_item(const char *filename, int fid)
275114402Sru{
276114402Sru  char *index_filename = new char[strlen(filename) + sizeof(INDEX_SUFFIX)];
277114402Sru  strcpy(index_filename, filename);
278114402Sru  strcat(index_filename, INDEX_SUFFIX);
279114402Sru  int fd = open(index_filename, O_RDONLY | O_BINARY);
280114402Sru  if (fd < 0)
281114402Sru    return 0;
282114402Sru  index_search_item *item = new index_search_item(index_filename, fid);
283114402Sru  a_delete index_filename;
284114402Sru  if (!item->load(fd)) {
285114402Sru    close(fd);
286114402Sru    delete item;
287114402Sru    return 0;
288114402Sru  }
289114402Sru  else if (verify_flag && !item->verify()) {
290114402Sru    delete item;
291114402Sru    return 0;
292114402Sru  }
293114402Sru  else {
294114402Sru    item->check_files();
295114402Sru    return item;
296114402Sru  }
297114402Sru}
298114402Sru
299114402Sru
300114402Sruindex_search_item_iterator::index_search_item_iterator(index_search_item *ind,
301114402Sru						       const char *q)
302114402Sru: indx(ind), out_of_date_files_iter(0), next_out_of_date_file(0), temp_list(0),
303114402Sru  buf(0), buflen(0),
304114402Sru  searcher(q, strlen(q), ind->ignore_fields, ind->header.truncate),
305114402Sru  query(strsave(q))
306114402Sru{
307114402Sru  found_list = indx->search(q, strlen(q), &temp_list);
308114402Sru  if (!found_list) {
309114402Sru    found_list = &minus_one;
310114402Sru    warning("all keys would have been discarded in constructing index `%1'",
311114402Sru	    indx->name);
312114402Sru  }
313114402Sru}
314114402Sru
315114402Sruindex_search_item_iterator::~index_search_item_iterator()
316114402Sru{
317114402Sru  a_delete temp_list;
318114402Sru  a_delete buf;
319114402Sru  a_delete query;
320114402Sru  delete out_of_date_files_iter;
321114402Sru}
322114402Sru
323114402Sruint index_search_item_iterator::next(const linear_searcher &,
324114402Sru				     const char **pp, int *lenp,
325114402Sru				     reference_id *ridp)
326114402Sru{
327114402Sru  if (found_list) {
328114402Sru    for (;;) {
329114402Sru      int tagno = *found_list;
330114402Sru      if (tagno == -1)
331114402Sru	break;
332114402Sru      found_list++;
333114402Sru      if (get_tag(tagno, searcher, pp, lenp, ridp))
334114402Sru	return 1;
335114402Sru    }
336114402Sru    found_list = 0;
337114402Sru    next_out_of_date_file = indx->out_of_date_files;
338114402Sru  }
339114402Sru  while (next_out_of_date_file) {
340114402Sru    if (out_of_date_files_iter == 0)
341114402Sru      out_of_date_files_iter
342114402Sru	= next_out_of_date_file->make_search_item_iterator(query);
343114402Sru    if (out_of_date_files_iter->next(searcher, pp, lenp, ridp))
344114402Sru      return 1;
345114402Sru    delete out_of_date_files_iter;
346114402Sru    out_of_date_files_iter = 0;
347114402Sru    next_out_of_date_file = next_out_of_date_file->next;
348114402Sru  }
349114402Sru  return 0;
350114402Sru}
351114402Sru
352114402Sruint index_search_item_iterator::get_tag(int tagno,
353151497Sru					const linear_searcher &searchr,
354114402Sru					const char **pp, int *lenp,
355114402Sru					reference_id *ridp)
356114402Sru{
357114402Sru  if (tagno < 0 || tagno >= indx->header.tags_size) {
358114402Sru    error("bad tag number");
359114402Sru    return 0;
360114402Sru  }
361114402Sru  tag *tp = indx->tags + tagno;
362114402Sru  const char *filename = indx->munge_filename(indx->pool + tp->filename_index);
363114402Sru  int fd = open(filename, O_RDONLY | O_BINARY);
364114402Sru  if (fd < 0) {
365114402Sru    error("can't open `%1': %2", filename, strerror(errno));
366114402Sru    return 0;
367114402Sru  }
368114402Sru  struct stat sb;
369114402Sru  if (fstat(fd, &sb) < 0) {
370114402Sru    error("can't fstat: %1", strerror(errno));
371114402Sru    close(fd);
372114402Sru    return 0;
373114402Sru  }
374114402Sru  time_t mtime = sb.st_mtime;
375114402Sru  if (mtime > indx->mtime) {
376114402Sru    indx->add_out_of_date_file(fd, filename,
377114402Sru			       indx->filename_id + tp->filename_index);
378114402Sru    return 0;
379114402Sru  }
380114402Sru  int res = 0;
381114402Sru  FILE *fp = fdopen(fd, FOPEN_RB);
382114402Sru  if (!fp) {
383114402Sru    error("fdopen failed");
384114402Sru    close(fd);
385114402Sru    return 0;
386114402Sru  }
387114402Sru  if (tp->start != 0 && fseek(fp, long(tp->start), 0) < 0)
388114402Sru    error("can't seek on `%1': %2", filename, strerror(errno));
389114402Sru  else {
390114402Sru    int length = tp->length;
391114402Sru    int err = 0;
392114402Sru    if (length == 0) {
393114402Sru      if (fstat(fileno(fp), &sb) < 0) {
394114402Sru	error("can't stat `%1': %2", filename, strerror(errno));
395114402Sru	err = 1;
396114402Sru      }
397114402Sru      else if (!S_ISREG(sb.st_mode)) {
398114402Sru	error("`%1' is not a regular file", filename);
399114402Sru	err = 1;
400114402Sru      }
401114402Sru      else
402114402Sru	length = int(sb.st_size);
403114402Sru    }
404114402Sru    if (!err) {
405114402Sru      if (length + 2 > buflen) {
406114402Sru	a_delete buf;
407114402Sru	buflen = length + 2;
408114402Sru	buf = new char[buflen];
409114402Sru      }
410114402Sru      if (fread(buf + 1, 1, length, fp) != (size_t)length)
411114402Sru	error("fread on `%1' failed: %2", filename, strerror(errno));
412114402Sru      else {
413114402Sru	buf[0] = '\n';
414114402Sru	// Remove the CR characters from CRLF pairs.
415114402Sru	int sidx = 1, didx = 1;
416114402Sru	for ( ; sidx < length + 1; sidx++, didx++)
417114402Sru	  {
418114402Sru	    if (buf[sidx] == '\r')
419114402Sru	      {
420114402Sru		if (buf[++sidx] != '\n')
421114402Sru		  buf[didx++] = '\r';
422114402Sru		else
423114402Sru		  length--;
424114402Sru	      }
425114402Sru	    if (sidx != didx)
426114402Sru	      buf[didx] = buf[sidx];
427114402Sru	  }
428114402Sru	buf[length + 1] = '\n';
429151497Sru	res = searchr.search(buf + 1, buf + 2 + length, pp, lenp);
430114402Sru	if (res && ridp)
431114402Sru	  *ridp = reference_id(indx->filename_id + tp->filename_index,
432114402Sru			       tp->start);
433114402Sru      }
434114402Sru    }
435114402Sru  }
436114402Sru  fclose(fp);
437114402Sru  return res;
438114402Sru}
439114402Sru
440114402Sruconst char *index_search_item::munge_filename(const char *filename)
441114402Sru{
442114402Sru  if (IS_ABSOLUTE(filename))
443114402Sru    return filename;
444114402Sru  const char *cwd = pool;
445114402Sru  int need_slash = (cwd[0] != 0
446114402Sru		    && strchr(DIR_SEPS, strchr(cwd, '\0')[-1]) == 0);
447114402Sru  int len = strlen(cwd) + strlen(filename) + need_slash + 1;
448114402Sru  if (len > filename_buflen) {
449114402Sru    a_delete filename_buffer;
450114402Sru    filename_buflen = len;
451114402Sru    filename_buffer = new char[len];
452114402Sru  }
453114402Sru  strcpy(filename_buffer, cwd);
454114402Sru  if (need_slash)
455114402Sru    strcat(filename_buffer, "/");
456114402Sru  strcat(filename_buffer, filename);
457114402Sru  return filename_buffer;
458114402Sru}
459114402Sru
460114402Sruconst int *index_search_item::search1(const char **pp, const char *end)
461114402Sru{
462114402Sru  while (*pp < end && !csalnum(**pp))
463114402Sru    *pp += 1;
464114402Sru  if (*pp >= end)
465114402Sru    return 0;
466114402Sru  const char *start = *pp;
467114402Sru  while (*pp < end && csalnum(**pp))
468114402Sru    *pp += 1;
469114402Sru  int len = *pp - start;
470114402Sru  if (len < header.shortest)
471114402Sru    return 0;
472114402Sru  if (len > header.truncate)
473114402Sru    len = header.truncate;
474114402Sru  int is_number = 1;
475114402Sru  for (int i = 0; i < len; i++)
476114402Sru    if (csdigit(start[i]))
477114402Sru      key_buffer[i] = start[i];
478114402Sru    else {
479114402Sru      key_buffer[i] = cmlower(start[i]);
480114402Sru      is_number = 0;
481114402Sru    }
482114402Sru  if (is_number && !(len == 4 && start[0] == '1' && start[1] == '9'))
483114402Sru    return 0;
484114402Sru  unsigned hc = hash(key_buffer, len);
485114402Sru  if (common_words_table) {
486114402Sru    for (int h = hc % common_words_table_size;
487114402Sru	 common_words_table[h];
488114402Sru	 --h) {
489114402Sru      if (strlen(common_words_table[h]) == (size_t)len
490114402Sru	  && memcmp(common_words_table[h], key_buffer, len) == 0)
491114402Sru	return 0;
492114402Sru      if (h == 0)
493114402Sru	h = common_words_table_size;
494114402Sru    }
495114402Sru  }
496114402Sru  int li = table[int(hc % header.table_size)];
497114402Sru  return li < 0 ? &minus_one : lists + li;
498114402Sru}
499114402Sru
500114402Srustatic void merge(int *result, const int *s1, const int *s2)
501114402Sru{
502114402Sru  for (; *s1 >= 0; s1++) {
503114402Sru    while (*s2 >= 0 && *s2 < *s1)
504114402Sru      s2++;
505114402Sru    if (*s2 == *s1)
506114402Sru      *result++ = *s2;
507114402Sru  }
508114402Sru  *result++ = -1;
509114402Sru}
510114402Sru
511114402Sruconst int *index_search_item::search(const char *ptr, int length,
512114402Sru				     int **temp_listp)
513114402Sru{
514114402Sru  const char *end = ptr + length;
515114402Sru  if (*temp_listp) {
516114402Sru    a_delete *temp_listp;
517114402Sru    *temp_listp = 0;
518114402Sru  }
519114402Sru  const int *first_list = 0;
520114402Sru  while (ptr < end && (first_list = search1(&ptr, end)) == 0)
521114402Sru    ;
522114402Sru  if (!first_list)
523114402Sru    return 0;
524114402Sru  if (*first_list < 0)
525114402Sru    return first_list;
526114402Sru  const int *second_list = 0;
527114402Sru  while (ptr < end && (second_list = search1(&ptr, end)) == 0)
528114402Sru    ;
529114402Sru  if (!second_list)
530114402Sru    return first_list;
531114402Sru  if (*second_list < 0)
532114402Sru    return second_list;
533114402Sru  const int *p;
534114402Sru  for (p = first_list; *p >= 0; p++)
535114402Sru    ;
536114402Sru  int len = p - first_list;
537114402Sru  for (p = second_list; *p >= 0; p++)
538114402Sru    ;
539114402Sru  if (p - second_list < len)
540114402Sru    len = p - second_list;
541114402Sru  int *matches = new int[len + 1];
542114402Sru  merge(matches, first_list, second_list);
543114402Sru  while (ptr < end) {
544114402Sru    const int *list = search1(&ptr, end);
545114402Sru    if (list != 0) {
546114402Sru      if (*list < 0) {
547114402Sru	a_delete matches;
548114402Sru	return list;
549114402Sru      }
550114402Sru      merge(matches, matches, list);
551114402Sru      if (*matches < 0) {
552114402Sru	a_delete matches;
553114402Sru	return &minus_one;
554114402Sru      }
555114402Sru    }
556114402Sru  }
557114402Sru  *temp_listp = matches;
558114402Sru  return matches;
559114402Sru}
560114402Sru
561114402Sruvoid index_search_item::read_common_words_file()
562114402Sru{
563114402Sru  if (header.common <= 0)
564114402Sru    return;
565114402Sru  const char *common_words_file = munge_filename(strchr(pool, '\0') + 1);
566114402Sru  errno = 0;
567114402Sru  FILE *fp = fopen(common_words_file, "r");
568114402Sru  if (!fp) {
569114402Sru    error("can't open `%1': %2", common_words_file, strerror(errno));
570114402Sru    return;
571114402Sru  }
572114402Sru  common_words_table_size = 2*header.common + 1;
573114402Sru  while (!is_prime(common_words_table_size))
574114402Sru    common_words_table_size++;
575114402Sru  common_words_table = new char *[common_words_table_size];
576114402Sru  for (int i = 0; i < common_words_table_size; i++)
577114402Sru    common_words_table[i] = 0;
578114402Sru  int count = 0;
579114402Sru  int key_len = 0;
580114402Sru  for (;;) {
581114402Sru    int c = getc(fp);
582114402Sru    while (c != EOF && !csalnum(c))
583114402Sru      c = getc(fp);
584114402Sru    if (c == EOF)
585114402Sru      break;
586114402Sru    do {
587114402Sru      if (key_len < header.truncate)
588114402Sru	key_buffer[key_len++] = cmlower(c);
589114402Sru      c = getc(fp);
590114402Sru    } while (c != EOF && csalnum(c));
591114402Sru    if (key_len >= header.shortest) {
592114402Sru      int h = hash(key_buffer, key_len) % common_words_table_size;
593114402Sru      while (common_words_table[h]) {
594114402Sru	if (h == 0)
595114402Sru	  h = common_words_table_size;
596114402Sru	--h;
597114402Sru      }
598114402Sru      common_words_table[h] = new char[key_len + 1];
599114402Sru      memcpy(common_words_table[h], key_buffer, key_len);
600114402Sru      common_words_table[h][key_len] = '\0';
601114402Sru    }
602114402Sru    if (++count >= header.common)
603114402Sru      break;
604114402Sru    key_len = 0;
605114402Sru    if (c == EOF)
606114402Sru      break;
607114402Sru  }
608114402Sru  fclose(fp);
609114402Sru}
610114402Sru
611114402Sruvoid index_search_item::add_out_of_date_file(int fd, const char *filename,
612114402Sru					     int fid)
613114402Sru{
614114402Sru  search_item **pp;
615114402Sru  for (pp = &out_of_date_files; *pp; pp = &(*pp)->next)
616114402Sru    if ((*pp)->is_named(filename))
617114402Sru      return;
618114402Sru  *pp = make_linear_search_item(fd, filename, fid);
619114402Sru  warning("`%1' modified since `%2' created", filename, name);
620114402Sru}
621114402Sru
622114402Sruvoid index_search_item::check_files()
623114402Sru{
624114402Sru  const char *pool_end = pool + header.strings_size;
625114402Sru  for (const char *ptr = strchr(ignore_fields, '\0') + 1;
626114402Sru       ptr < pool_end;
627114402Sru       ptr = strchr(ptr, '\0') + 1) {
628114402Sru    const char *path = munge_filename(ptr);
629114402Sru    struct stat sb;
630114402Sru    if (stat(path, &sb) < 0)
631114402Sru      error("can't stat `%1': %2", path, strerror(errno));
632114402Sru    else if (sb.st_mtime > mtime) {
633114402Sru      int fd = open(path, O_RDONLY | O_BINARY);
634114402Sru      if (fd < 0)
635114402Sru	error("can't open `%1': %2", path, strerror(errno));
636114402Sru      else
637114402Sru	add_out_of_date_file(fd, path, filename_id + (ptr - pool));
638114402Sru    }
639114402Sru  }
640114402Sru}
641