1/***********************************************************************
2*                                                                      *
3*               This software is part of the ast package               *
4*          Copyright (c) 1992-2011 AT&T Intellectual Property          *
5*                      and is licensed under the                       *
6*                  Common Public License, Version 1.0                  *
7*                    by AT&T Intellectual Property                     *
8*                                                                      *
9*                A copy of the License is available at                 *
10*            http://www.opensource.org/licenses/cpl1.0.txt             *
11*         (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9)         *
12*                                                                      *
13*              Information and Software Systems Research               *
14*                            AT&T Research                             *
15*                           Florham Park NJ                            *
16*                                                                      *
17*                 Glenn Fowler <gsf@research.att.com>                  *
18*                  David Korn <dgk@research.att.com>                   *
19*                                                                      *
20***********************************************************************/
21#pragma prototyped
22/*
23 * David Korn
24 * Glenn Fowler
25 * AT&T Research
26 *
27 * join
28 */
29
30static const char usage[] =
31"[-?\n@(#)$Id: join (AT&T Research) 2009-12-10 $\n]"
32USAGE_LICENSE
33"[+NAME?join - relational database operator]"
34"[+DESCRIPTION?\bjoin\b performs an \aequality join\a on the files \afile1\a "
35	"and \afile2\a and writes the resulting joined files to standard "
36	"output.  By default, a field is delimited by one or more spaces "
37	"and tabs with leading spaces and/or tabs ignored.  The \b-t\b option "
38	"can be used to change the field delimiter.]"
39"[+?The \ajoin field\a is a field in each file on which files are compared. "
40	"By default \bjoin\b writes one line in the output for each pair "
41	"of lines in \afiles1\a and \afiles2\a that have identical join "
42	"fields.  The default output line consists of the join field, "
43	"then the remaining fields from \afile1\a, then the remaining "
44	"fields from \afile2\a, but this can be changed with the \b-o\b "
45	"option.  The \b-a\b option can be used to add unmatched lines "
46	"to the output.  The \b-v\b option can be used to output only "
47	"unmatched lines.]"
48"[+?The files \afile1\a and \afile2\a must be ordered in the collating "
49	"sequence of \bsort -b\b on the fields on which they are to be "
50	"joined otherwise the results are unspecified.]"
51"[+?If either \afile1\a or \afile2\a is \b-\b, \bjoin\b "
52        "uses standard input starting at the current location.]"
53
54"[e:empty]:[string?Replace empty output fields in the list selected with"
55"	\b-o\b with \astring\a.]"
56"[o:output]:[list?Construct the output line to comprise the fields specified "
57	"in a blank or comma separated list \alist\a.  Each element in "
58	"\alist\a consists of a file number (either 1 or 2), a period, "
59	"and a field number or \b0\b representing the join field.  "
60	"As an obsolete feature multiple occurrences of \b-o\b can "
61	"be specified.]"
62"[t:separator|tabs]:[delim?Use \adelim\a as the field separator for both input"
63"	and output.]"
64"[1:j1]#[field?Join on field \afield\a of \afile1\a.  Fields start at 1.]"
65"[2:j2]#[field?Join on field \afield\a of \afile2\a.  Fields start at 1.]"
66"[j:join]#[field?Equivalent to \b-1\b \afield\a \b-2\b \afield\a.]"
67"[a:unpairable]#[fileno?Write a line for each unpairable line in file"
68"	\afileno\a, where \afileno\a is either 1 or 2, in addition to the"
69"	normal output.  If \b-a\b options appear for both 1 and 2, then "
70	"all unpairable lines will be output.]"
71"[v:suppress]#[fileno?Write a line for each unpairable line in file"
72"	\afileno\a, where \afileno\a is either 1 or 2, instead of the normal "
73	"output.  If \b-v\b options appear for both 1 and 2, then "
74	"all unpairable lines will be output.] ]"
75"[i:ignorecase?Ignore case in field comparisons.]"
76"[B!:mmap?Enable memory mapped reads instead of buffered.]"
77
78"[+?The following obsolete option forms are also recognized: \b-j\b \afield\a"
79"	is equivalent to \b-1\b \afield\a \b-2\b \afield\a, \b-j1\b \afield\a"
80"	is equivalent to \b-1\b \afield\a, and \b-j2\b \afield\a is"
81"	equivalent to \b-2\b \afield\a.]"
82
83"\n"
84"\nfile1 file2\n"
85"\n"
86"[+EXIT STATUS?]{"
87	"[+0?Both files processed successfully.]"
88	"[+>0?An error occurred.]"
89"}"
90"[+SEE ALSO?\bcut\b(1), \bcomm\b(1), \bpaste\b(1), \bsort\b(1), \buniq\b(1)]"
91;
92
93#include <cmd.h>
94#include <sfdisc.h>
95
96#if _hdr_wchar && _hdr_wctype && _lib_iswctype
97
98#include <wchar.h>
99#include <wctype.h>
100
101#else
102
103#include <ctype.h>
104
105#ifndef iswspace
106#define iswspace(x)	isspace(x)
107#endif
108
109#endif
110
111#define C_FILE1		001
112#define C_FILE2		002
113#define C_COMMON	004
114#define C_ALL		(C_FILE1|C_FILE2|C_COMMON)
115
116#define NFIELD		10
117#define JOINFIELD	2
118
119#define S_DELIM		1
120#define S_SPACE		2
121#define S_NL		3
122#define S_WIDE		4
123
124typedef struct Field_s
125{
126	char*		beg;
127	char*		end;
128} Field_t;
129
130typedef struct File_s
131{
132	Sfio_t*		iop;
133	char*		name;
134	char*		recptr;
135	int		reclen;
136	int		field;
137	int		fieldlen;
138	int		nfields;
139	int		maxfields;
140	int		spaces;
141	int		hit;
142	int		discard;
143	Field_t*	fields;
144} File_t;
145
146typedef struct Join_s
147{
148	unsigned char	state[1<<CHAR_BIT];
149	Sfio_t*		outfile;
150	int*		outlist;
151	int		outmode;
152	int		ooutmode;
153	char*		nullfield;
154	char*		delimstr;
155	int		delim;
156	int		delimlen;
157	int		buffered;
158	int		ignorecase;
159	int		mb;
160	char*		same;
161	int		samesize;
162	void*		context;
163	File_t		file[2];
164} Join_t;
165
166static void
167done(register Join_t* jp)
168{
169	if (jp->file[0].iop && jp->file[0].iop != sfstdin)
170		sfclose(jp->file[0].iop);
171	if (jp->file[1].iop && jp->file[1].iop != sfstdin)
172		sfclose(jp->file[1].iop);
173	if (jp->outlist)
174		free(jp->outlist);
175	if (jp->file[0].fields)
176		free(jp->file[0].fields);
177	if (jp->file[1].fields)
178		free(jp->file[1].fields);
179	if (jp->same)
180		free(jp->same);
181	free(jp);
182}
183
184static Join_t*
185init(void)
186{
187	register Join_t*	jp;
188	register int		i;
189
190	setlocale(LC_ALL, "");
191	if (jp = newof(0, Join_t, 1, 0))
192	{
193		if (jp->mb = mbwide())
194			for (i = 0x80; i <= 0xff; i++)
195				jp->state[i] = S_WIDE;
196		jp->state[' '] = jp->state['\t'] = S_SPACE;
197		jp->state['\n'] = S_NL;
198		jp->delim = -1;
199		jp->nullfield = 0;
200		if (!(jp->file[0].fields = newof(0, Field_t, NFIELD + 1, 0)) ||
201		    !(jp->file[1].fields = newof(0, Field_t, NFIELD + 1, 0)))
202		{
203			done(jp);
204			return 0;
205		}
206		jp->file[0].maxfields = NFIELD;
207		jp->file[1].maxfields = NFIELD;
208		jp->outmode = C_COMMON;
209	}
210	return jp;
211}
212
213static int
214getolist(Join_t* jp, const char* first, char** arglist)
215{
216	register const char*	cp = first;
217	char**			argv = arglist;
218	register int		c;
219	int*			outptr;
220	int*			outmax;
221	int			nfield = NFIELD;
222	char*			str;
223
224	outptr = jp->outlist = newof(0, int, NFIELD + 1, 0);
225	outmax = outptr + NFIELD;
226	while (c = *cp++)
227	{
228		if (c==' ' || c=='\t' || c==',')
229			continue;
230		str = (char*)--cp;
231		if (*cp=='0' && ((c=cp[1])==0 || c==' ' || c=='\t' || c==','))
232		{
233			str++;
234			c = JOINFIELD;
235			goto skip;
236		}
237		if (cp[1]!='.' || (*cp!='1' && *cp!='2') || (c=strtol(cp+2,&str,10)) <=0)
238		{
239			error(2,"%s: invalid field list",first);
240			break;
241		}
242		c--;
243		c <<=2;
244		if (*cp=='2')
245			c |=1;
246	skip:
247		if (outptr >= outmax)
248		{
249			jp->outlist = newof(jp->outlist, int, 2 * nfield + 1, 0);
250			outptr = jp->outlist + nfield;
251			nfield *= 2;
252			outmax = jp->outlist + nfield;
253		}
254		*outptr++ = c;
255		cp = str;
256	}
257	/* need to accept obsolescent command syntax */
258	while (1)
259	{
260		if (!(cp= *argv) || cp[1]!='.' || (*cp!='1' && *cp!='2'))
261		{
262			if (*cp=='0' && cp[1]==0)
263			{
264				c = JOINFIELD;
265				goto skip2;
266			}
267			break;
268		}
269		str = (char*)cp;
270		c = strtol(cp+2, &str,10);
271		if (*str || --c<0)
272			break;
273		argv++;
274		c <<= 2;
275		if (*cp=='2')
276			c |=1;
277	skip2:
278		if (outptr >= outmax)
279		{
280			jp->outlist = newof(jp->outlist, int, 2 * nfield + 1, 0);
281			outptr = jp->outlist + nfield;
282			nfield *= 2;
283			outmax = jp->outlist + nfield;
284		}
285		*outptr++ = c;
286	}
287	*outptr = -1;
288	return argv-arglist;
289}
290
291/*
292 * read in a record from file <index> and split into fields
293 */
294static unsigned char*
295getrec(Join_t* jp, int index, int discard)
296{
297	register unsigned char*	sp = jp->state;
298	register File_t*	fp = &jp->file[index];
299	register Field_t*	field = fp->fields;
300	register Field_t*	fieldmax = field + fp->maxfields;
301	register char*		cp;
302	register int		n;
303	char*			tp;
304
305	if (sh_checksig(jp->context))
306		return 0;
307	if (discard && fp->discard)
308		sfraise(fp->iop, SFSK_DISCARD, NiL);
309	fp->spaces = 0;
310	fp->hit = 0;
311	if (!(cp = sfgetr(fp->iop, '\n', 0)))
312	{
313		jp->outmode &= ~(1<<index);
314		return 0;
315	}
316	fp->recptr = cp;
317	fp->reclen = sfvalue(fp->iop);
318	if (jp->delim == '\n')	/* handle new-line delimiter specially */
319	{
320		field->beg = cp;
321		cp += fp->reclen;
322		field->end = cp - 1;
323		field++;
324	}
325	else
326		do /* separate into fields */
327		{
328			if (field >= fieldmax)
329			{
330				n = 2 * fp->maxfields;
331				fp->fields = newof(fp->fields, Field_t, n + 1, 0);
332				field = fp->fields + fp->maxfields;
333				fp->maxfields = n;
334				fieldmax = fp->fields + n;
335			}
336			field->beg = cp;
337			if (jp->delim == -1)
338			{
339				switch (sp[*(unsigned char*)cp])
340				{
341				case S_SPACE:
342					cp++;
343					break;
344				case S_WIDE:
345					tp = cp;
346					if (iswspace(mbchar(tp)))
347					{
348						cp = tp;
349						break;
350					}
351					/*FALLTHROUGH*/
352				default:
353					goto next;
354				}
355				fp->spaces = 1;
356				if (jp->mb)
357					for (;;)
358					{
359						switch (sp[*(unsigned char*)cp++])
360						{
361						case S_SPACE:
362							continue;
363						case S_WIDE:
364							tp = cp - 1;
365							if (iswspace(mbchar(tp)))
366							{
367								cp = tp;
368								continue;
369							}
370							break;
371						}
372						break;
373					}
374				else
375					while (sp[*(unsigned char*)cp++]==S_SPACE);
376				cp--;
377			}
378		next:
379			if (jp->mb)
380			{
381				for (;;)
382				{
383					tp = cp;
384					switch (n = sp[*(unsigned char*)cp++])
385					{
386					case 0:
387						continue;
388					case S_WIDE:
389						cp--;
390						n = mbchar(cp);
391						if (n == jp->delim)
392						{
393							n = S_DELIM;
394							break;
395						}
396						if (jp->delim == -1 && iswspace(n))
397						{
398							n = S_SPACE;
399							break;
400						}
401						continue;
402					}
403					break;
404				}
405				field->end = tp;
406			}
407			else
408			{
409				while (!(n = sp[*(unsigned char*)cp++]));
410				field->end = cp - 1;
411			}
412			field++;
413		} while (n != S_NL);
414	fp->nfields = field - fp->fields;
415	if ((n = fp->field) < fp->nfields)
416	{
417		cp = fp->fields[n].beg;
418		/* eliminate leading spaces */
419		if (fp->spaces)
420		{
421			if (jp->mb)
422				for (;;)
423				{
424					switch (sp[*(unsigned char*)cp++])
425					{
426					case S_SPACE:
427						continue;
428					case S_WIDE:
429						tp = cp - 1;
430						if (iswspace(mbchar(tp)))
431						{
432							cp = tp;
433							continue;
434						}
435						break;
436					}
437					break;
438				}
439			else
440				while (sp[*(unsigned char*)cp++]==S_SPACE);
441			cp--;
442		}
443		fp->fieldlen = fp->fields[n].end - cp;
444		return (unsigned char*)cp;
445	}
446	fp->fieldlen = 0;
447	return (unsigned char*)"";
448}
449
450static unsigned char*
451_trace_getrec(Join_t* jp, int index, int discard)
452{
453	unsigned char*	r;
454
455	r = getrec(jp, index, discard);
456	return r;
457}
458#define getrec	_trace_getrec
459
460#if DEBUG_TRACE
461static unsigned char* u1,u2,u3;
462#define getrec(p,n,d)	(u1 = getrec(p, n, d), sfprintf(sfstdout, "[G%d#%d@%I*d:%-.8s]", __LINE__, n, sizeof(Sfoff_t), sftell(p->file[n].iop), u1), u1)
463#endif
464
465/*
466 * print field <n> from file <index>
467 */
468static int
469outfield(Join_t* jp, int index, register int n, int last)
470{
471	register File_t*	fp = &jp->file[index];
472	register char*		cp;
473	register char*		cpmax;
474	register int		size;
475	register Sfio_t*	iop = jp->outfile;
476	char*			tp;
477
478	if (n < fp->nfields)
479	{
480		cp = fp->fields[n].beg;
481		cpmax = fp->fields[n].end + 1;
482	}
483	else
484		cp = 0;
485	if ((n = jp->delim) == -1)
486	{
487		if (cp && fp->spaces)
488		{
489			register unsigned char*	sp = jp->state;
490
491			/*eliminate leading spaces */
492			if (jp->mb)
493				for (;;)
494				{
495					switch (sp[*(unsigned char*)cp++])
496					{
497					case S_SPACE:
498						continue;
499					case S_WIDE:
500						tp = cp - 1;
501						if (iswspace(mbchar(tp)))
502						{
503							cp = tp;
504							continue;
505						}
506						break;
507					}
508					break;
509				}
510			else
511				while (sp[*(unsigned char*)cp++]==S_SPACE);
512			cp--;
513		}
514		n = ' ';
515	}
516	else if (jp->delimstr)
517		n = -1;
518	if (last)
519		n = '\n';
520	if (cp)
521		size = cpmax - cp;
522	else
523		size = 0;
524	if (n == -1)
525	{
526		if (size<=1)
527		{
528			if (jp->nullfield && sfputr(iop, jp->nullfield, -1) < 0)
529				return -1;
530		}
531		else if (sfwrite(iop, cp, size) < 0)
532			return -1;
533		if (sfwrite(iop, jp->delimstr, jp->delimlen) < 0)
534			return -1;
535	}
536	else if (size <= 1)
537	{
538		if (!jp->nullfield)
539			sfputc(iop, n);
540		else if (sfputr(iop, jp->nullfield, n) < 0)
541			return -1;
542	}
543	else
544	{
545		last = cp[size-1];
546		cp[size-1] = n;
547		if (sfwrite(iop, cp, size) < 0)
548			return -1;
549		cp[size-1] = last;
550	}
551	return 0;
552}
553
554#if DEBUG_TRACE
555static int i1,i2,i3;
556#define outfield(p,i,n,f)	(sfprintf(sfstdout, "[F%d#%d:%d,%d]", __LINE__, i1=i, i2=n, i3=f), outfield(p, i1, i2, i3))
557#endif
558
559static int
560outrec(register Join_t* jp, int mode)
561{
562	register File_t*	fp;
563	register int		i;
564	register int		j;
565	register int		k;
566	register int		n;
567	int*			out;
568
569	if (mode < 0 && jp->file[0].hit++)
570		return 0;
571	if (mode > 0 && jp->file[1].hit++)
572		return 0;
573	if (out = jp->outlist)
574	{
575		while ((n = *out++) >= 0)
576		{
577			if (n == JOINFIELD)
578			{
579				i = mode >= 0;
580				j = jp->file[i].field;
581			}
582			else
583			{
584				i = n & 1;
585				j = (mode<0 && i || mode>0 && !i) ?
586					jp->file[i].nfields :
587					n >> 2;
588			}
589			if (outfield(jp, i, j, *out < 0) < 0)
590				return -1;
591		}
592		return 0;
593	}
594	k = jp->file[0].nfields;
595	if (mode >= 0)
596		k += jp->file[1].nfields - 1;
597	for (i=0; i<2; i++)
598	{
599		fp = &jp->file[i];
600		if (mode>0 && i==0)
601		{
602			k -= (fp->nfields - 1);
603			continue;
604		}
605		n = fp->field;
606		if (mode||i==0)
607		{
608			/* output join field first */
609			if (outfield(jp,i,n,!--k) < 0)
610				return -1;
611			if (!k)
612				return 0;
613			for (j=0; j<n; j++)
614			{
615				if (outfield(jp,i,j,!--k) < 0)
616					return -1;
617				if (!k)
618					return 0;
619			}
620			j = n + 1;
621		}
622		else
623			j = 0;
624		for (;j<fp->nfields; j++)
625		{
626			if (j!=n && outfield(jp,i,j,!--k) < 0)
627				return -1;
628			if (!k)
629				return 0;
630		}
631	}
632	return 0;
633}
634
635#if DEBUG_TRACE
636#define outrec(p,n)	(sfprintf(sfstdout, "[R#%d,%d,%lld,%lld:%-.*s{%d}:%-.*s{%d}]", __LINE__, i1=n, lo, hi, jp->file[0].fieldlen, cp1, jp->file[0].hit, jp->file[1].fieldlen, cp2, jp->file[1].hit), outrec(p, i1))
637#endif
638
639static int
640join(Join_t* jp)
641{
642	register unsigned char*	cp1;
643	register unsigned char*	cp2;
644	register int		n1;
645	register int		n2;
646	register int		n;
647	register int		cmp;
648	register int		same;
649	int			o2;
650	Sfoff_t			lo = -1;
651	Sfoff_t			hi = -1;
652
653	if ((cp1 = getrec(jp, 0, 0)) && (cp2 = getrec(jp, 1, 0)) || (cp2 = 0))
654	{
655		n1 = jp->file[0].fieldlen;
656		n2 = jp->file[1].fieldlen;
657		same = 0;
658		for (;;)
659		{
660			n = n1 < n2 ? n1 : n2;
661#if DEBUG_TRACE
662			if (!n && !(cmp = n1 < n2 ? -1 : (n1 > n2)) || n && !(cmp = (int)*cp1 - (int)*cp2) && !(cmp = jp->ignorecase ? strncasecmp((char*)cp1, (char*)cp2, n) : memcmp(cp1, cp2, n)))
663				cmp = n1 - n2;
664sfprintf(sfstdout, "[C#%d:%d(%c-%c),%d,%lld,%lld%s]", __LINE__, cmp, *cp1, *cp2, same, lo, hi, (jp->outmode & C_COMMON) ? ",COMMON" : "");
665			if (!cmp)
666#else
667			if (!n && !(cmp = n1 < n2 ? -1 : (n1 > n2)) || n && !(cmp = (int)*cp1 - (int)*cp2) && !(cmp = jp->ignorecase ? strncasecmp((char*)cp1, (char*)cp2, n) : memcmp(cp1, cp2, n)) && !(cmp = n1 - n2))
668#endif
669			{
670				if (!(jp->outmode & C_COMMON))
671				{
672					if (cp1 = getrec(jp, 0, 1))
673					{
674						n1 = jp->file[0].fieldlen;
675						same = 1;
676						continue;
677					}
678					if ((jp->ooutmode & (C_FILE1|C_FILE2)) != C_FILE2)
679						break;
680					if (sfseek(jp->file[0].iop, (Sfoff_t)-jp->file[0].reclen, SEEK_CUR) < 0 || !(cp1 = getrec(jp, 0, 0)))
681					{
682						error(ERROR_SYSTEM|2, "%s: seek error", jp->file[0].name);
683						return -1;
684					}
685				}
686				else if (outrec(jp, 0) < 0)
687					return -1;
688				else if (lo < 0 && (jp->outmode & C_COMMON))
689				{
690					if ((lo = sfseek(jp->file[1].iop, (Sfoff_t)0, SEEK_CUR)) < 0)
691					{
692						error(ERROR_SYSTEM|2, "%s: seek error", jp->file[1].name);
693						return -1;
694					}
695					lo -= jp->file[1].reclen;
696				}
697				if (cp2 = getrec(jp, 1, lo < 0))
698				{
699					n2 = jp->file[1].fieldlen;
700					continue;
701				}
702#if DEBUG_TRACE
703sfprintf(sfstdout, "[2#%d:0,%lld,%lld]", __LINE__, lo, hi);
704#endif
705			}
706			else if (cmp > 0)
707			{
708				if (same)
709				{
710					same = 0;
711				next:
712					if (n2 > jp->samesize)
713					{
714						jp->samesize = roundof(n2, 16);
715						if (!(jp->same = newof(jp->same, char, jp->samesize, 0)))
716						{
717							error(ERROR_SYSTEM|2, "out of space");
718							return -1;
719						}
720					}
721					memcpy(jp->same, cp2, o2 = n2);
722					if (!(cp2 = getrec(jp, 1, 0)))
723						break;
724					n2 = jp->file[1].fieldlen;
725					if (n2 == o2 && *cp2 == *jp->same && !memcmp(cp2, jp->same, n2))
726						goto next;
727					continue;
728				}
729				if (hi >= 0)
730				{
731					if (sfseek(jp->file[1].iop, hi, SEEK_SET) != hi)
732					{
733						error(ERROR_SYSTEM|2, "%s: seek error", jp->file[1].name);
734						return -1;
735					}
736					hi = -1;
737				}
738				else if ((jp->outmode & C_FILE2) && outrec(jp, 1) < 0)
739					return -1;
740				lo = -1;
741				if (cp2 = getrec(jp, 1, 1))
742				{
743					n2 = jp->file[1].fieldlen;
744					continue;
745				}
746#if DEBUG_TRACE
747sfprintf(sfstdout, "[2#%d:0,%lld,%lld]", __LINE__, lo, hi);
748#endif
749			}
750			else if (same)
751			{
752				same = 0;
753				if (!(cp1 = getrec(jp, 0, 0)))
754					break;
755				n1 = jp->file[0].fieldlen;
756				continue;
757			}
758			if (lo >= 0)
759			{
760				if ((hi = sfseek(jp->file[1].iop, (Sfoff_t)0, SEEK_CUR)) < 0 ||
761				    (hi -= jp->file[1].reclen) < 0 ||
762				    sfseek(jp->file[1].iop, lo, SEEK_SET) != lo ||
763				    !(cp2 = getrec(jp, 1, 0)))
764				{
765					error(ERROR_SYSTEM|2, "%s: seek error", jp->file[1].name);
766					return -1;
767				}
768				n2 = jp->file[1].fieldlen;
769				lo = -1;
770				if (jp->file[1].discard)
771					sfseek(jp->file[1].iop, (Sfoff_t)-1, SEEK_SET);
772			}
773			else if (!cp2)
774				break;
775			else if ((jp->outmode & C_FILE1) && outrec(jp, -1) < 0)
776				return -1;
777			if (!(cp1 = getrec(jp, 0, 1)))
778				break;
779			n1 = jp->file[0].fieldlen;
780		}
781	}
782#if DEBUG_TRACE
783sfprintf(sfstdout, "[X#%d:?,%p,%p,%d%,%d,%d%s]", __LINE__, cp1, cp2, cmp, lo, hi, (jp->outmode & C_COMMON) ? ",COMMON" : "");
784#endif
785	if (cp2)
786	{
787		if (hi >= 0 &&
788		    sfseek(jp->file[1].iop, (Sfoff_t)0, SEEK_CUR) < hi &&
789		    sfseek(jp->file[1].iop, hi, SEEK_SET) != hi)
790		{
791			error(ERROR_SYSTEM|2, "%s: seek error", jp->file[1].name);
792			return -1;
793		}
794#if DEBUG_TRACE
795sfprintf(sfstdout, "[O#%d:%02o:%02o]", __LINE__, jp->ooutmode, jp->outmode);
796#endif
797		cp1 = (!cp1 && cmp && hi < 0 && !jp->file[1].hit && ((jp->ooutmode ^ C_ALL) <= 1 || jp->outmode == 2)) ? cp2 : getrec(jp, 1, 0);
798		cmp = 1;
799		n = 1;
800	}
801	else
802	{
803		cmp = -1;
804		n = 0;
805	}
806#if DEBUG_TRACE
807sfprintf(sfstdout, "[X#%d:%d,%p,%p,%d,%02o,%02o%s]", __LINE__, n, cp1, cp2, cmp, jp->ooutmode, jp->outmode, (jp->outmode & C_COMMON) ? ",COMMON" : "");
808#endif
809	if (!cp1 || !(jp->outmode & (1<<n)))
810	{
811		if (cp1 && jp->file[n].iop == sfstdin)
812			sfseek(sfstdin, (Sfoff_t)0, SEEK_END);
813		return 0;
814	}
815	if (outrec(jp, cmp) < 0)
816		return -1;
817	do
818	{
819		if (!getrec(jp, n, 1))
820			return 0;
821	} while (outrec(jp, cmp) >= 0);
822	return -1;
823}
824
825int
826b_join(int argc, char** argv, void* context)
827{
828	register int		n;
829	register char*		cp;
830	register Join_t*	jp;
831	char*			e;
832
833#if !DEBUG_TRACE
834	cmdinit(argc, argv, context, ERROR_CATALOG, ERROR_NOTIFY);
835#endif
836	if (!(jp = init()))
837		error(ERROR_system(1),"out of space");
838	jp->context = context;
839	for (;;)
840	{
841		switch (n = optget(argv, usage))
842		{
843 		case 'j':
844			/*
845			 * check for obsolete "-j1 field" and "-j2 field"
846			 */
847
848			if (opt_info.offset == 0)
849			{
850				cp = argv[opt_info.index - 1];
851				for (n = strlen(cp) - 1; n > 0 && cp[n] != 'j'; n--);
852				n = cp[n] == 'j';
853			}
854			else
855				n = 0;
856			if (n)
857			{
858				if (opt_info.num!=1 && opt_info.num!=2)
859					error(2,"-jfileno field: fileno must be 1 or 2");
860				n = '0' + opt_info.num;
861				if (!(cp = argv[opt_info.index]))
862				{
863					argc = 0;
864					break;
865				}
866				opt_info.num = strtol(cp, &e, 10);
867				if (*e)
868				{
869					argc = 0;
870					break;
871				}
872				opt_info.index++;
873			}
874			else
875			{
876				jp->file[0].field = (int)(opt_info.num-1);
877				n = '2';
878			}
879			/*FALLTHROUGH*/
880 		case '1':
881		case '2':
882			if (opt_info.num <=0)
883				error(2,"field number must positive");
884			jp->file[n-'1'].field = (int)(opt_info.num-1);
885			continue;
886		case 'v':
887			jp->outmode &= ~C_COMMON;
888			/*FALLTHROUGH*/
889		case 'a':
890			if (opt_info.num!=1 && opt_info.num!=2)
891				error(2,"%s: file number must be 1 or 2", opt_info.name);
892			jp->outmode |= 1<<(opt_info.num-1);
893			continue;
894		case 'e':
895			jp->nullfield = opt_info.arg;
896			continue;
897		case 'o':
898			/* need to accept obsolescent command syntax */
899			n = getolist(jp, opt_info.arg, argv+opt_info.index);
900			opt_info.index += n;
901			continue;
902		case 't':
903			jp->state[' '] = jp->state['\t'] = 0;
904			if (jp->mb)
905			{
906				cp = opt_info.arg;
907				jp->delim = mbchar(cp);
908				if ((n = cp - opt_info.arg) > 1)
909				{
910					jp->delimlen = n;
911					jp->delimstr = opt_info.arg;
912					continue;
913				}
914			}
915			n = *(unsigned char*)opt_info.arg;
916			jp->state[n] = S_DELIM;
917			jp->delim = n;
918			continue;
919		case 'i':
920			jp->ignorecase = !opt_info.num;
921			continue;
922		case 'B':
923			jp->buffered = !opt_info.num;
924			continue;
925		case ':':
926			error(2, "%s", opt_info.arg);
927			break;
928		case '?':
929			done(jp);
930			error(ERROR_usage(2), "%s", opt_info.arg);
931			break;
932		}
933		break;
934	}
935	argv += opt_info.index;
936	argc -= opt_info.index;
937	if (error_info.errors || argc!=2)
938	{
939		done(jp);
940		error(ERROR_usage(2),"%s", optusage(NiL));
941	}
942	jp->ooutmode = jp->outmode;
943	jp->file[0].name = cp = *argv++;
944	if (streq(cp,"-"))
945	{
946		if (sfseek(sfstdin,(Sfoff_t)0,SEEK_CUR) < 0)
947		{
948			if (sfdcseekable(sfstdin))
949				error(ERROR_warn(0),"%s: seek may fail",cp);
950			else
951				jp->file[0].discard = 1;
952		}
953		jp->file[0].iop = sfstdin;
954	}
955	else if (!(jp->file[0].iop = sfopen(NiL, cp, "r")))
956	{
957		done(jp);
958		error(ERROR_system(1),"%s: cannot open",cp);
959	}
960	jp->file[1].name = cp = *argv;
961	if (streq(cp,"-"))
962	{
963		if (sfseek(sfstdin,(Sfoff_t)0,SEEK_CUR) < 0)
964		{
965			if (sfdcseekable(sfstdin))
966				error(ERROR_warn(0),"%s: seek may fail",cp);
967			else
968				jp->file[1].discard = 1;
969		}
970		jp->file[1].iop = sfstdin;
971	}
972	else if (!(jp->file[1].iop = sfopen(NiL, cp, "r")))
973	{
974		done(jp);
975		error(ERROR_system(1),"%s: cannot open",cp);
976	}
977	if (jp->buffered)
978	{
979		sfsetbuf(jp->file[0].iop, jp->file[0].iop, SF_UNBOUND);
980		sfsetbuf(jp->file[1].iop, jp->file[1].iop, SF_UNBOUND);
981	}
982	jp->outfile = sfstdout;
983	if (!jp->outlist)
984		jp->nullfield = 0;
985	if (join(jp) < 0)
986	{
987		done(jp);
988		error(ERROR_system(1),"write error");
989	}
990	else if (jp->file[0].iop==sfstdin || jp->file[1].iop==sfstdin)
991		sfseek(sfstdin,(Sfoff_t)0,SEEK_END);
992	done(jp);
993	return error_info.errors;
994}
995