1241675Suqs/*	$Id: tbl_data.c,v 1.24 2011/03/20 16:02:05 kristaps Exp $ */
2241675Suqs/*
3241675Suqs * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4241675Suqs * Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org>
5241675Suqs *
6241675Suqs * Permission to use, copy, modify, and distribute this software for any
7241675Suqs * purpose with or without fee is hereby granted, provided that the above
8241675Suqs * copyright notice and this permission notice appear in all copies.
9241675Suqs *
10241675Suqs * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11241675Suqs * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12241675Suqs * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13241675Suqs * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14241675Suqs * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15241675Suqs * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16241675Suqs * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17241675Suqs */
18241675Suqs#ifdef HAVE_CONFIG_H
19241675Suqs#include "config.h"
20241675Suqs#endif
21241675Suqs
22241675Suqs#include <assert.h>
23241675Suqs#include <ctype.h>
24241675Suqs#include <stdlib.h>
25241675Suqs#include <string.h>
26241675Suqs#include <time.h>
27241675Suqs
28241675Suqs#include "mandoc.h"
29241675Suqs#include "libmandoc.h"
30241675Suqs#include "libroff.h"
31241675Suqs
32241675Suqsstatic	int		 data(struct tbl_node *, struct tbl_span *,
33241675Suqs				int, const char *, int *);
34241675Suqsstatic	struct tbl_span	*newspan(struct tbl_node *, int,
35241675Suqs				struct tbl_row *);
36241675Suqs
37241675Suqsstatic int
38241675Suqsdata(struct tbl_node *tbl, struct tbl_span *dp,
39241675Suqs		int ln, const char *p, int *pos)
40241675Suqs{
41241675Suqs	struct tbl_dat	*dat;
42241675Suqs	struct tbl_cell	*cp;
43241675Suqs	int		 sv, spans;
44241675Suqs
45241675Suqs	cp = NULL;
46241675Suqs	if (dp->last && dp->last->layout)
47241675Suqs		cp = dp->last->layout->next;
48241675Suqs	else if (NULL == dp->last)
49241675Suqs		cp = dp->layout->first;
50241675Suqs
51241675Suqs	/*
52241675Suqs	 * Skip over spanners and vertical lines to data formats, since
53241675Suqs	 * we want to match data with data layout cells in the header.
54241675Suqs	 */
55241675Suqs
56241675Suqs	while (cp && (TBL_CELL_VERT == cp->pos ||
57241675Suqs				TBL_CELL_DVERT == cp->pos ||
58241675Suqs				TBL_CELL_SPAN == cp->pos))
59241675Suqs		cp = cp->next;
60241675Suqs
61241675Suqs	/*
62241675Suqs	 * Stop processing when we reach the end of the available layout
63241675Suqs	 * cells.  This means that we have extra input.
64241675Suqs	 */
65241675Suqs
66241675Suqs	if (NULL == cp) {
67241675Suqs		mandoc_msg(MANDOCERR_TBLEXTRADAT,
68241675Suqs				tbl->parse, ln, *pos, NULL);
69241675Suqs		/* Skip to the end... */
70241675Suqs		while (p[*pos])
71241675Suqs			(*pos)++;
72241675Suqs		return(1);
73241675Suqs	}
74241675Suqs
75241675Suqs	dat = mandoc_calloc(1, sizeof(struct tbl_dat));
76241675Suqs	dat->layout = cp;
77241675Suqs	dat->pos = TBL_DATA_NONE;
78241675Suqs
79241675Suqs	assert(TBL_CELL_SPAN != cp->pos);
80241675Suqs
81241675Suqs	for (spans = 0, cp = cp->next; cp; cp = cp->next)
82241675Suqs		if (TBL_CELL_SPAN == cp->pos)
83241675Suqs			spans++;
84241675Suqs		else
85241675Suqs			break;
86241675Suqs
87241675Suqs	dat->spans = spans;
88241675Suqs
89241675Suqs	if (dp->last) {
90241675Suqs		dp->last->next = dat;
91241675Suqs		dp->last = dat;
92241675Suqs	} else
93241675Suqs		dp->last = dp->first = dat;
94241675Suqs
95241675Suqs	sv = *pos;
96241675Suqs	while (p[*pos] && p[*pos] != tbl->opts.tab)
97241675Suqs		(*pos)++;
98241675Suqs
99241675Suqs	/*
100241675Suqs	 * Check for a continued-data scope opening.  This consists of a
101241675Suqs	 * trailing `T{' at the end of the line.  Subsequent lines,
102241675Suqs	 * until a standalone `T}', are included in our cell.
103241675Suqs	 */
104241675Suqs
105241675Suqs	if (*pos - sv == 2 && 'T' == p[sv] && '{' == p[sv + 1]) {
106241675Suqs		tbl->part = TBL_PART_CDATA;
107241675Suqs		return(0);
108241675Suqs	}
109241675Suqs
110241675Suqs	assert(*pos - sv >= 0);
111241675Suqs
112241675Suqs	dat->string = mandoc_malloc((size_t)(*pos - sv + 1));
113241675Suqs	memcpy(dat->string, &p[sv], (size_t)(*pos - sv));
114241675Suqs	dat->string[*pos - sv] = '\0';
115241675Suqs
116241675Suqs	if (p[*pos])
117241675Suqs		(*pos)++;
118241675Suqs
119241675Suqs	if ( ! strcmp(dat->string, "_"))
120241675Suqs		dat->pos = TBL_DATA_HORIZ;
121241675Suqs	else if ( ! strcmp(dat->string, "="))
122241675Suqs		dat->pos = TBL_DATA_DHORIZ;
123241675Suqs	else if ( ! strcmp(dat->string, "\\_"))
124241675Suqs		dat->pos = TBL_DATA_NHORIZ;
125241675Suqs	else if ( ! strcmp(dat->string, "\\="))
126241675Suqs		dat->pos = TBL_DATA_NDHORIZ;
127241675Suqs	else
128241675Suqs		dat->pos = TBL_DATA_DATA;
129241675Suqs
130241675Suqs	if (TBL_CELL_HORIZ == dat->layout->pos ||
131241675Suqs			TBL_CELL_DHORIZ == dat->layout->pos ||
132241675Suqs			TBL_CELL_DOWN == dat->layout->pos)
133241675Suqs		if (TBL_DATA_DATA == dat->pos && '\0' != *dat->string)
134241675Suqs			mandoc_msg(MANDOCERR_TBLIGNDATA,
135241675Suqs					tbl->parse, ln, sv, NULL);
136241675Suqs
137241675Suqs	return(1);
138241675Suqs}
139241675Suqs
140241675Suqs/* ARGSUSED */
141241675Suqsint
142241675Suqstbl_cdata(struct tbl_node *tbl, int ln, const char *p)
143241675Suqs{
144241675Suqs	struct tbl_dat	*dat;
145241675Suqs	size_t	 	 sz;
146241675Suqs	int		 pos;
147241675Suqs
148241675Suqs	pos = 0;
149241675Suqs
150241675Suqs	dat = tbl->last_span->last;
151241675Suqs
152241675Suqs	if (p[pos] == 'T' && p[pos + 1] == '}') {
153241675Suqs		pos += 2;
154241675Suqs		if (p[pos] == tbl->opts.tab) {
155241675Suqs			tbl->part = TBL_PART_DATA;
156241675Suqs			pos++;
157241675Suqs			return(data(tbl, tbl->last_span, ln, p, &pos));
158241675Suqs		} else if ('\0' == p[pos]) {
159241675Suqs			tbl->part = TBL_PART_DATA;
160241675Suqs			return(1);
161241675Suqs		}
162241675Suqs
163241675Suqs		/* Fallthrough: T} is part of a word. */
164241675Suqs	}
165241675Suqs
166241675Suqs	dat->pos = TBL_DATA_DATA;
167241675Suqs
168241675Suqs	if (dat->string) {
169241675Suqs		sz = strlen(p) + strlen(dat->string) + 2;
170241675Suqs		dat->string = mandoc_realloc(dat->string, sz);
171241675Suqs		strlcat(dat->string, " ", sz);
172241675Suqs		strlcat(dat->string, p, sz);
173241675Suqs	} else
174241675Suqs		dat->string = mandoc_strdup(p);
175241675Suqs
176241675Suqs	if (TBL_CELL_DOWN == dat->layout->pos)
177241675Suqs		mandoc_msg(MANDOCERR_TBLIGNDATA,
178241675Suqs				tbl->parse, ln, pos, NULL);
179241675Suqs
180241675Suqs	return(0);
181241675Suqs}
182241675Suqs
183241675Suqsstatic struct tbl_span *
184241675Suqsnewspan(struct tbl_node *tbl, int line, struct tbl_row *rp)
185241675Suqs{
186241675Suqs	struct tbl_span	*dp;
187241675Suqs
188241675Suqs	dp = mandoc_calloc(1, sizeof(struct tbl_span));
189241675Suqs	dp->line = line;
190241675Suqs	dp->tbl = &tbl->opts;
191241675Suqs	dp->layout = rp;
192241675Suqs	dp->head = tbl->first_head;
193241675Suqs
194241675Suqs	if (tbl->last_span) {
195241675Suqs		tbl->last_span->next = dp;
196241675Suqs		tbl->last_span = dp;
197241675Suqs	} else {
198241675Suqs		tbl->last_span = tbl->first_span = dp;
199241675Suqs		tbl->current_span = NULL;
200241675Suqs		dp->flags |= TBL_SPAN_FIRST;
201241675Suqs	}
202241675Suqs
203241675Suqs	return(dp);
204241675Suqs}
205241675Suqs
206241675Suqsint
207241675Suqstbl_data(struct tbl_node *tbl, int ln, const char *p)
208241675Suqs{
209241675Suqs	struct tbl_span	*dp;
210241675Suqs	struct tbl_row	*rp;
211241675Suqs	int		 pos;
212241675Suqs
213241675Suqs	pos = 0;
214241675Suqs
215241675Suqs	if ('\0' == p[pos]) {
216241675Suqs		mandoc_msg(MANDOCERR_TBL, tbl->parse, ln, pos, NULL);
217241675Suqs		return(0);
218241675Suqs	}
219241675Suqs
220241675Suqs	/*
221241675Suqs	 * Choose a layout row: take the one following the last parsed
222241675Suqs	 * span's.  If that doesn't exist, use the last parsed span's.
223241675Suqs	 * If there's no last parsed span, use the first row.  Lastly,
224241675Suqs	 * if the last span was a horizontal line, use the same layout
225241675Suqs	 * (it doesn't "consume" the layout).
226241675Suqs	 */
227241675Suqs
228241675Suqs	if (tbl->last_span) {
229241675Suqs		assert(tbl->last_span->layout);
230241675Suqs		if (tbl->last_span->pos == TBL_SPAN_DATA) {
231241675Suqs			for (rp = tbl->last_span->layout->next;
232241675Suqs					rp && rp->first; rp = rp->next) {
233241675Suqs				switch (rp->first->pos) {
234241675Suqs				case (TBL_CELL_HORIZ):
235241675Suqs					dp = newspan(tbl, ln, rp);
236241675Suqs					dp->pos = TBL_SPAN_HORIZ;
237241675Suqs					continue;
238241675Suqs				case (TBL_CELL_DHORIZ):
239241675Suqs					dp = newspan(tbl, ln, rp);
240241675Suqs					dp->pos = TBL_SPAN_DHORIZ;
241241675Suqs					continue;
242241675Suqs				default:
243241675Suqs					break;
244241675Suqs				}
245241675Suqs				break;
246241675Suqs			}
247241675Suqs		} else
248241675Suqs			rp = tbl->last_span->layout;
249241675Suqs
250241675Suqs		if (NULL == rp)
251241675Suqs			rp = tbl->last_span->layout;
252241675Suqs	} else
253241675Suqs		rp = tbl->first_row;
254241675Suqs
255241675Suqs	assert(rp);
256241675Suqs
257241675Suqs	dp = newspan(tbl, ln, rp);
258241675Suqs
259241675Suqs	if ( ! strcmp(p, "_")) {
260241675Suqs		dp->pos = TBL_SPAN_HORIZ;
261241675Suqs		return(1);
262241675Suqs	} else if ( ! strcmp(p, "=")) {
263241675Suqs		dp->pos = TBL_SPAN_DHORIZ;
264241675Suqs		return(1);
265241675Suqs	}
266241675Suqs
267241675Suqs	dp->pos = TBL_SPAN_DATA;
268241675Suqs
269241675Suqs	/* This returns 0 when TBL_PART_CDATA is entered. */
270241675Suqs
271241675Suqs	while ('\0' != p[pos])
272241675Suqs		if ( ! data(tbl, dp, ln, p, &pos))
273241675Suqs			return(0);
274241675Suqs
275241675Suqs	return(1);
276241675Suqs}
277