1/* readtokens.c  -- Functions for reading tokens from an input stream.
2
3   Copyright (C) 1990-1991, 1999-2004, 2006, 2009-2010 Free Software
4   Foundation, Inc.
5
6   This program is free software: you can redistribute it and/or modify
7   it under the terms of the GNU General Public License as published by
8   the Free Software Foundation; either version 3 of the License, or
9   (at your option) any later version.
10
11   This program is distributed in the hope that it will be useful,
12   but WITHOUT ANY WARRANTY; without even the implied warranty of
13   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14   GNU General Public License for more details.
15
16   You should have received a copy of the GNU General Public License
17   along with this program.  If not, see <http://www.gnu.org/licenses/>.
18
19   Written by Jim Meyering. */
20
21/* This almost supercedes xreadline stuff -- using delim="\n"
22   gives the same functionality, except that these functions
23   would never return empty lines. */
24
25#include <config.h>
26
27#include "readtokens.h"
28
29#include <stdio.h>
30#include <stdlib.h>
31#include <string.h>
32#include <stdbool.h>
33
34#include "xalloc.h"
35
36#if USE_UNLOCKED_IO
37# include "unlocked-io.h"
38#endif
39
40/* Initialize a tokenbuffer. */
41
42void
43init_tokenbuffer (token_buffer *tokenbuffer)
44{
45  tokenbuffer->size = 0;
46  tokenbuffer->buffer = NULL;
47}
48
49/* Read a token from STREAM into TOKENBUFFER.
50   A token is delimited by any of the N_DELIM bytes in DELIM.
51   Upon return, the token is in tokenbuffer->buffer and
52   has a trailing '\0' instead of any original delimiter.
53   The function value is the length of the token not including
54   the final '\0'.  Upon EOF (i.e. on the call after the last
55   token is read) or error, return -1 without modifying tokenbuffer.
56   The EOF and error conditions may be distinguished in the caller
57   by testing ferror (STREAM).
58
59   This function works properly on lines containing NUL bytes
60   and on files do not end with a delimiter.  */
61
62size_t
63readtoken (FILE *stream,
64           const char *delim,
65           size_t n_delim,
66           token_buffer *tokenbuffer)
67{
68  char *p;
69  int c;
70  size_t i, n;
71  static const char *saved_delim = NULL;
72  static char isdelim[256];
73  bool same_delimiters;
74
75  if (delim == NULL && saved_delim == NULL)
76    abort ();
77
78  same_delimiters = false;
79  if (delim != saved_delim && saved_delim != NULL)
80    {
81      same_delimiters = true;
82      for (i = 0; i < n_delim; i++)
83        {
84          if (delim[i] != saved_delim[i])
85            {
86              same_delimiters = false;
87              break;
88            }
89        }
90    }
91
92  if (!same_delimiters)
93    {
94      size_t j;
95      saved_delim = delim;
96      memset (isdelim, 0, sizeof isdelim);
97      for (j = 0; j < n_delim; j++)
98        {
99          unsigned char ch = delim[j];
100          isdelim[ch] = 1;
101        }
102    }
103
104  /* FIXME: don't fool with this caching.  Use strchr instead.  */
105  /* skip over any leading delimiters */
106  for (c = getc (stream); c >= 0 && isdelim[c]; c = getc (stream))
107    {
108      /* empty */
109    }
110
111  p = tokenbuffer->buffer;
112  n = tokenbuffer->size;
113  i = 0;
114  for (;;)
115    {
116      if (c < 0 && i == 0)
117        return -1;
118
119      if (i == n)
120        p = x2nrealloc (p, &n, sizeof *p);
121
122      if (c < 0)
123        {
124          p[i] = 0;
125          break;
126        }
127      if (isdelim[c])
128        {
129          p[i] = 0;
130          break;
131        }
132      p[i++] = c;
133      c = getc (stream);
134    }
135
136  tokenbuffer->buffer = p;
137  tokenbuffer->size = n;
138  return i;
139}
140
141/* Build a NULL-terminated array of pointers to tokens
142   read from STREAM.  Return the number of tokens read.
143   All storage is obtained through calls to xmalloc-like functions.
144
145   %%% Question: is it worth it to do a single
146   %%% realloc() of `tokens' just before returning? */
147
148size_t
149readtokens (FILE *stream,
150            size_t projected_n_tokens,
151            const char *delim,
152            size_t n_delim,
153            char ***tokens_out,
154            size_t **token_lengths)
155{
156  token_buffer tb, *token = &tb;
157  char **tokens;
158  size_t *lengths;
159  size_t sz;
160  size_t n_tokens;
161
162  if (projected_n_tokens == 0)
163    projected_n_tokens = 64;
164  else
165    projected_n_tokens++;       /* add one for trailing NULL pointer */
166
167  sz = projected_n_tokens;
168  tokens = xnmalloc (sz, sizeof *tokens);
169  lengths = xnmalloc (sz, sizeof *lengths);
170
171  n_tokens = 0;
172  init_tokenbuffer (token);
173  for (;;)
174    {
175      char *tmp;
176      size_t token_length = readtoken (stream, delim, n_delim, token);
177      if (n_tokens >= sz)
178        {
179          tokens = x2nrealloc (tokens, &sz, sizeof *tokens);
180          lengths = xnrealloc (lengths, sz, sizeof *lengths);
181        }
182
183      if (token_length == (size_t) -1)
184        {
185          /* don't increment n_tokens for NULL entry */
186          tokens[n_tokens] = NULL;
187          lengths[n_tokens] = 0;
188          break;
189        }
190      tmp = xnmalloc (token_length + 1, sizeof *tmp);
191      lengths[n_tokens] = token_length;
192      tokens[n_tokens] = memcpy (tmp, token->buffer, token_length + 1);
193      n_tokens++;
194    }
195
196  free (token->buffer);
197  *tokens_out = tokens;
198  if (token_lengths != NULL)
199    *token_lengths = lengths;
200  return n_tokens;
201}
202