1/***********************************************************************
2*                                                                      *
3*               This software is part of the ast package               *
4*          Copyright (c) 1992-2012 AT&T Intellectual Property          *
5*                      and is licensed under the                       *
6*                 Eclipse Public License, Version 1.0                  *
7*                    by AT&T Intellectual Property                     *
8*                                                                      *
9*                A copy of the License is available at                 *
10*          http://www.eclipse.org/org/documents/epl-v10.html           *
11*         (with md5 checksum b35adb5213ca9657e911e9befb180842)         *
12*                                                                      *
13*              Information and Software Systems Research               *
14*                            AT&T Research                             *
15*                           Florham Park NJ                            *
16*                                                                      *
17*                 Glenn Fowler <gsf@research.att.com>                  *
18*                  David Korn <dgk@research.att.com>                   *
19*                                                                      *
20***********************************************************************/
21#pragma prototyped
22/*
23 * David Korn
24 * Glenn Fowler
25 * AT&T Research
26 *
27 * join
28 */
29
30static const char usage[] =
31"[-?\n@(#)$Id: join (AT&T Research) 2009-12-10 $\n]"
32USAGE_LICENSE
33"[+NAME?join - relational database operator]"
34"[+DESCRIPTION?\bjoin\b performs an \aequality join\a on the files \afile1\a "
35	"and \afile2\a and writes the resulting joined files to standard "
36	"output.  By default, a field is delimited by one or more spaces "
37	"and tabs with leading spaces and/or tabs ignored.  The \b-t\b option "
38	"can be used to change the field delimiter.]"
39"[+?The \ajoin field\a is a field in each file on which files are compared. "
40	"By default \bjoin\b writes one line in the output for each pair "
41	"of lines in \afiles1\a and \afiles2\a that have identical join "
42	"fields.  The default output line consists of the join field, "
43	"then the remaining fields from \afile1\a, then the remaining "
44	"fields from \afile2\a, but this can be changed with the \b-o\b "
45	"option.  The \b-a\b option can be used to add unmatched lines "
46	"to the output.  The \b-v\b option can be used to output only "
47	"unmatched lines.]"
48"[+?The files \afile1\a and \afile2\a must be ordered in the collating "
49	"sequence of \bsort -b\b on the fields on which they are to be "
50	"joined otherwise the results are unspecified.]"
51"[+?If either \afile1\a or \afile2\a is \b-\b, \bjoin\b "
52        "uses standard input starting at the current location.]"
53
54"[e:empty]:[string?Replace empty output fields in the list selected with"
55"	\b-o\b with \astring\a.]"
56"[o:output]:[list?Construct the output line to comprise the fields specified "
57	"in a blank or comma separated list \alist\a.  Each element in "
58	"\alist\a consists of a file number (either 1 or 2), a period, "
59	"and a field number or \b0\b representing the join field.  "
60	"As an obsolete feature multiple occurrences of \b-o\b can "
61	"be specified.]"
62"[t:separator|tabs]:[delim?Use \adelim\a as the field separator for both input"
63"	and output.]"
64"[1:j1]#[field?Join on field \afield\a of \afile1\a.  Fields start at 1.]"
65"[2:j2]#[field?Join on field \afield\a of \afile2\a.  Fields start at 1.]"
66"[j:join]#[field?Equivalent to \b-1\b \afield\a \b-2\b \afield\a.]"
67"[a:unpairable]#[fileno?Write a line for each unpairable line in file"
68"	\afileno\a, where \afileno\a is either 1 or 2, in addition to the"
69"	normal output.  If \b-a\b options appear for both 1 and 2, then "
70	"all unpairable lines will be output.]"
71"[v:suppress]#[fileno?Write a line for each unpairable line in file"
72"	\afileno\a, where \afileno\a is either 1 or 2, instead of the normal "
73	"output.  If \b-v\b options appear for both 1 and 2, then "
74	"all unpairable lines will be output.] ]"
75"[i:ignorecase?Ignore case in field comparisons.]"
76"[B!:mmap?Enable memory mapped reads instead of buffered.]"
77
78"[+?The following obsolete option forms are also recognized: \b-j\b \afield\a"
79"	is equivalent to \b-1\b \afield\a \b-2\b \afield\a, \b-j1\b \afield\a"
80"	is equivalent to \b-1\b \afield\a, and \b-j2\b \afield\a is"
81"	equivalent to \b-2\b \afield\a.]"
82
83"\n"
84"\nfile1 file2\n"
85"\n"
86"[+EXIT STATUS?]{"
87	"[+0?Both files processed successfully.]"
88	"[+>0?An error occurred.]"
89"}"
90"[+SEE ALSO?\bcut\b(1), \bcomm\b(1), \bpaste\b(1), \bsort\b(1), \buniq\b(1)]"
91;
92
93#include <cmd.h>
94#include <sfdisc.h>
95
96#if _hdr_wchar && _hdr_wctype && _lib_iswctype
97
98#include <wchar.h>
99#include <wctype.h>
100
101#else
102
103#include <ctype.h>
104
105#ifndef iswspace
106#define iswspace(x)	isspace(x)
107#endif
108
109#endif
110
111#define C_FILE1		001
112#define C_FILE2		002
113#define C_COMMON	004
114#define C_ALL		(C_FILE1|C_FILE2|C_COMMON)
115
116#define NFIELD		10
117#define JOINFIELD	2
118
119#define S_DELIM		1
120#define S_SPACE		2
121#define S_NL		3
122#define S_WIDE		4
123
124typedef struct Field_s
125{
126	char*		beg;
127	char*		end;
128} Field_t;
129
130typedef struct File_s
131{
132	Sfio_t*		iop;
133	char*		name;
134	char*		recptr;
135	int		reclen;
136	int		field;
137	int		fieldlen;
138	int		nfields;
139	int		maxfields;
140	int		spaces;
141	int		hit;
142	int		discard;
143	Field_t*	fields;
144} File_t;
145
146typedef struct Join_s
147{
148	unsigned char	state[1<<CHAR_BIT];
149	Sfio_t*		outfile;
150	int*		outlist;
151	int		outmode;
152	int		ooutmode;
153	char*		nullfield;
154	char*		delimstr;
155	int		delim;
156	int		delimlen;
157	int		buffered;
158	int		ignorecase;
159	int		mb;
160	char*		same;
161	int		samesize;
162	Shbltin_t*	context;
163	File_t		file[2];
164} Join_t;
165
166static void
167done(register Join_t* jp)
168{
169	if (jp->file[0].iop && jp->file[0].iop != sfstdin)
170		sfclose(jp->file[0].iop);
171	if (jp->file[1].iop && jp->file[1].iop != sfstdin)
172		sfclose(jp->file[1].iop);
173	if (jp->outlist)
174		free(jp->outlist);
175	if (jp->file[0].fields)
176		free(jp->file[0].fields);
177	if (jp->file[1].fields)
178		free(jp->file[1].fields);
179	if (jp->same)
180		free(jp->same);
181	free(jp);
182}
183
184static Join_t*
185init(void)
186{
187	register Join_t*	jp;
188	register int		i;
189
190	setlocale(LC_ALL, "");
191	if (jp = newof(0, Join_t, 1, 0))
192	{
193		if (jp->mb = mbwide())
194			for (i = 0x80; i <= 0xff; i++)
195				jp->state[i] = S_WIDE;
196		jp->state[' '] = jp->state['\t'] = S_SPACE;
197		jp->state['\n'] = S_NL;
198		jp->delim = -1;
199		jp->nullfield = 0;
200		if (!(jp->file[0].fields = newof(0, Field_t, NFIELD + 1, 0)) ||
201		    !(jp->file[1].fields = newof(0, Field_t, NFIELD + 1, 0)))
202		{
203			done(jp);
204			return 0;
205		}
206		jp->file[0].maxfields = NFIELD;
207		jp->file[1].maxfields = NFIELD;
208		jp->outmode = C_COMMON;
209	}
210	return jp;
211}
212
213static int
214getolist(Join_t* jp, const char* first, char** arglist)
215{
216	register const char*	cp = first;
217	char**			argv = arglist;
218	register int		c;
219	int*			outptr;
220	int*			outmax;
221	int			nfield = NFIELD;
222	char*			str;
223
224	outptr = jp->outlist = newof(0, int, NFIELD + 1, 0);
225	outmax = outptr + NFIELD;
226	while (c = *cp++)
227	{
228		if (c==' ' || c=='\t' || c==',')
229			continue;
230		str = (char*)--cp;
231		if (*cp=='0' && ((c=cp[1])==0 || c==' ' || c=='\t' || c==','))
232		{
233			str++;
234			c = JOINFIELD;
235			goto skip;
236		}
237		if (cp[1]!='.' || (*cp!='1' && *cp!='2') || (c=strtol(cp+2,&str,10)) <=0)
238		{
239			error(2,"%s: invalid field list",first);
240			break;
241		}
242		c--;
243		c <<=2;
244		if (*cp=='2')
245			c |=1;
246	skip:
247		if (outptr >= outmax)
248		{
249			jp->outlist = newof(jp->outlist, int, 2 * nfield + 1, 0);
250			outptr = jp->outlist + nfield;
251			nfield *= 2;
252			outmax = jp->outlist + nfield;
253		}
254		*outptr++ = c;
255		cp = str;
256	}
257	/* need to accept obsolescent command syntax */
258	while (cp = *argv)
259	{
260		if (cp[1]!='.' || (*cp!='1' && *cp!='2'))
261		{
262			if (*cp=='0' && cp[1]==0)
263			{
264				c = JOINFIELD;
265				goto skip2;
266			}
267			break;
268		}
269		str = (char*)cp;
270		c = strtol(cp+2, &str,10);
271		if (*str || --c<0)
272			break;
273		argv++;
274		c <<= 2;
275		if (*cp=='2')
276			c |=1;
277	skip2:
278		if (outptr >= outmax)
279		{
280			jp->outlist = newof(jp->outlist, int, 2 * nfield + 1, 0);
281			outptr = jp->outlist + nfield;
282			nfield *= 2;
283			outmax = jp->outlist + nfield;
284		}
285		*outptr++ = c;
286	}
287	*outptr = -1;
288	return argv-arglist;
289}
290
291/*
292 * read in a record from file <index> and split into fields
293 */
294static unsigned char*
295getrec(Join_t* jp, int index, int discard)
296{
297	register unsigned char*	sp = jp->state;
298	register File_t*	fp = &jp->file[index];
299	register Field_t*	field = fp->fields;
300	register Field_t*	fieldmax = field + fp->maxfields;
301	register char*		cp;
302	register int		n;
303	char*			tp;
304
305	if (sh_checksig(jp->context))
306		return 0;
307	if (discard && fp->discard)
308		sfraise(fp->iop, SFSK_DISCARD, NiL);
309	fp->spaces = 0;
310	fp->hit = 0;
311	if (!(cp = sfgetr(fp->iop, '\n', 0)))
312	{
313		jp->outmode &= ~(1<<index);
314		return 0;
315	}
316	fp->recptr = cp;
317	fp->reclen = sfvalue(fp->iop);
318	if (jp->delim == '\n')	/* handle new-line delimiter specially */
319	{
320		field->beg = cp;
321		cp += fp->reclen;
322		field->end = cp - 1;
323		field++;
324	}
325	else
326		do /* separate into fields */
327		{
328			if (field >= fieldmax)
329			{
330				n = 2 * fp->maxfields;
331				fp->fields = newof(fp->fields, Field_t, n + 1, 0);
332				field = fp->fields + fp->maxfields;
333				fp->maxfields = n;
334				fieldmax = fp->fields + n;
335			}
336			field->beg = cp;
337			if (jp->delim == -1)
338			{
339				switch (sp[*(unsigned char*)cp])
340				{
341				case S_SPACE:
342					cp++;
343					break;
344				case S_WIDE:
345					tp = cp;
346					if (iswspace(mbchar(tp)))
347					{
348						cp = tp;
349						break;
350					}
351					/*FALLTHROUGH*/
352				default:
353					goto next;
354				}
355				fp->spaces = 1;
356				if (jp->mb)
357					for (;;)
358					{
359						switch (sp[*(unsigned char*)cp++])
360						{
361						case S_SPACE:
362							continue;
363						case S_WIDE:
364							tp = cp - 1;
365							if (iswspace(mbchar(tp)))
366							{
367								cp = tp;
368								continue;
369							}
370							break;
371						}
372						break;
373					}
374				else
375					while (sp[*(unsigned char*)cp++]==S_SPACE);
376				cp--;
377			}
378		next:
379			if (jp->mb)
380			{
381				for (;;)
382				{
383					tp = cp;
384					switch (n = sp[*(unsigned char*)cp++])
385					{
386					case 0:
387						continue;
388					case S_WIDE:
389						cp--;
390						n = mbchar(cp);
391						if (n == jp->delim)
392						{
393							n = S_DELIM;
394							break;
395						}
396						if (jp->delim == -1 && iswspace(n))
397						{
398							n = S_SPACE;
399							break;
400						}
401						continue;
402					}
403					break;
404				}
405				field->end = tp;
406			}
407			else
408			{
409				while (!(n = sp[*(unsigned char*)cp++]));
410				field->end = cp - 1;
411			}
412			field++;
413		} while (n != S_NL);
414	fp->nfields = field - fp->fields;
415	if ((n = fp->field) < fp->nfields)
416	{
417		cp = fp->fields[n].beg;
418		/* eliminate leading spaces */
419		if (fp->spaces)
420		{
421			if (jp->mb)
422				for (;;)
423				{
424					switch (sp[*(unsigned char*)cp++])
425					{
426					case S_SPACE:
427						continue;
428					case S_WIDE:
429						tp = cp - 1;
430						if (iswspace(mbchar(tp)))
431						{
432							cp = tp;
433							continue;
434						}
435						break;
436					}
437					break;
438				}
439			else
440				while (sp[*(unsigned char*)cp++]==S_SPACE);
441			cp--;
442		}
443		fp->fieldlen = fp->fields[n].end - cp;
444		return (unsigned char*)cp;
445	}
446	fp->fieldlen = 0;
447	return (unsigned char*)"";
448}
449
450#if DEBUG_TRACE
451static unsigned char* u1;
452#define getrec(p,n,d)	(u1 = getrec(p, n, d), sfprintf(sfstdout, "[G%d#%d@%I*d:%-.8s]", __LINE__, n, sizeof(Sfoff_t), sftell(p->file[n].iop), u1), u1)
453#endif
454
455/*
456 * print field <n> from file <index>
457 */
458static int
459outfield(Join_t* jp, int index, register int n, int last)
460{
461	register File_t*	fp = &jp->file[index];
462	register char*		cp;
463	register char*		cpmax;
464	register int		size;
465	register Sfio_t*	iop = jp->outfile;
466	char*			tp;
467
468	if (n < fp->nfields)
469	{
470		cp = fp->fields[n].beg;
471		cpmax = fp->fields[n].end + 1;
472	}
473	else
474		cp = 0;
475	if ((n = jp->delim) == -1)
476	{
477		if (cp && fp->spaces)
478		{
479			register unsigned char*	sp = jp->state;
480
481			/*eliminate leading spaces */
482			if (jp->mb)
483				for (;;)
484				{
485					switch (sp[*(unsigned char*)cp++])
486					{
487					case S_SPACE:
488						continue;
489					case S_WIDE:
490						tp = cp - 1;
491						if (iswspace(mbchar(tp)))
492						{
493							cp = tp;
494							continue;
495						}
496						break;
497					}
498					break;
499				}
500			else
501				while (sp[*(unsigned char*)cp++]==S_SPACE);
502			cp--;
503		}
504		n = ' ';
505	}
506	else if (jp->delimstr)
507		n = -1;
508	if (last)
509		n = '\n';
510	if (cp)
511		size = cpmax - cp;
512	else
513		size = 0;
514	if (n == -1)
515	{
516		if (size<=1)
517		{
518			if (jp->nullfield && sfputr(iop, jp->nullfield, -1) < 0)
519				return -1;
520		}
521		else if (sfwrite(iop, cp, size) < 0)
522			return -1;
523		if (sfwrite(iop, jp->delimstr, jp->delimlen) < 0)
524			return -1;
525	}
526	else if (size <= 1)
527	{
528		if (!jp->nullfield)
529			sfputc(iop, n);
530		else if (sfputr(iop, jp->nullfield, n) < 0)
531			return -1;
532	}
533	else
534	{
535		last = cp[size-1];
536		cp[size-1] = n;
537		if (sfwrite(iop, cp, size) < 0)
538			return -1;
539		cp[size-1] = last;
540	}
541	return 0;
542}
543
544#if DEBUG_TRACE
545static int i1,i2,i3;
546#define outfield(p,i,n,f)	(sfprintf(sfstdout, "[F%d#%d:%d,%d]", __LINE__, i1=i, i2=n, i3=f), outfield(p, i1, i2, i3))
547#endif
548
549static int
550outrec(register Join_t* jp, int mode)
551{
552	register File_t*	fp;
553	register int		i;
554	register int		j;
555	register int		k;
556	register int		n;
557	int*			out;
558
559	if (mode < 0 && jp->file[0].hit++)
560		return 0;
561	if (mode > 0 && jp->file[1].hit++)
562		return 0;
563	if (out = jp->outlist)
564	{
565		while ((n = *out++) >= 0)
566		{
567			if (n == JOINFIELD)
568			{
569				i = mode >= 0;
570				j = jp->file[i].field;
571			}
572			else
573			{
574				i = n & 1;
575				j = (mode<0 && i || mode>0 && !i) ?
576					jp->file[i].nfields :
577					n >> 2;
578			}
579			if (outfield(jp, i, j, *out < 0) < 0)
580				return -1;
581		}
582		return 0;
583	}
584	k = jp->file[0].nfields;
585	if (mode >= 0)
586		k += jp->file[1].nfields - 1;
587	for (i=0; i<2; i++)
588	{
589		fp = &jp->file[i];
590		if (mode>0 && i==0)
591		{
592			k -= (fp->nfields - 1);
593			continue;
594		}
595		n = fp->field;
596		if (mode||i==0)
597		{
598			/* output join field first */
599			if (outfield(jp,i,n,!--k) < 0)
600				return -1;
601			if (!k)
602				return 0;
603			for (j=0; j<n; j++)
604			{
605				if (outfield(jp,i,j,!--k) < 0)
606					return -1;
607				if (!k)
608					return 0;
609			}
610			j = n + 1;
611		}
612		else
613			j = 0;
614		for (;j<fp->nfields; j++)
615		{
616			if (j!=n && outfield(jp,i,j,!--k) < 0)
617				return -1;
618			if (!k)
619				return 0;
620		}
621	}
622	return 0;
623}
624
625#if DEBUG_TRACE
626#define outrec(p,n)	(sfprintf(sfstdout, "[R#%d,%d,%lld,%lld:%-.*s{%d}:%-.*s{%d}]", __LINE__, i1=n, lo, hi, jp->file[0].fieldlen, cp1, jp->file[0].hit, jp->file[1].fieldlen, cp2, jp->file[1].hit), outrec(p, i1))
627#endif
628
629static int
630join(Join_t* jp)
631{
632	register unsigned char*	cp1;
633	register unsigned char*	cp2;
634	register int		n1;
635	register int		n2;
636	register int		n;
637	register int		cmp;
638	register int		same;
639	int			o2;
640	Sfoff_t			lo = -1;
641	Sfoff_t			hi = -1;
642
643	if ((cp1 = getrec(jp, 0, 0)) && (cp2 = getrec(jp, 1, 0)) || (cp2 = 0))
644	{
645		n1 = jp->file[0].fieldlen;
646		n2 = jp->file[1].fieldlen;
647		same = 0;
648		for (;;)
649		{
650			n = n1 < n2 ? n1 : n2;
651#if DEBUG_TRACE
652			if (!n && !(cmp = n1 < n2 ? -1 : (n1 > n2)) || n && !(cmp = (int)*cp1 - (int)*cp2) && !(cmp = jp->ignorecase ? strncasecmp((char*)cp1, (char*)cp2, n) : memcmp(cp1, cp2, n)))
653				cmp = n1 - n2;
654sfprintf(sfstdout, "[C#%d:%d(%c-%c),%d,%lld,%lld%s]", __LINE__, cmp, *cp1, *cp2, same, lo, hi, (jp->outmode & C_COMMON) ? ",COMMON" : "");
655			if (!cmp)
656#else
657			if (!n && !(cmp = n1 < n2 ? -1 : (n1 > n2)) || n && !(cmp = (int)*cp1 - (int)*cp2) && !(cmp = jp->ignorecase ? strncasecmp((char*)cp1, (char*)cp2, n) : memcmp(cp1, cp2, n)) && !(cmp = n1 - n2))
658#endif
659			{
660				if (!(jp->outmode & C_COMMON))
661				{
662					if (cp1 = getrec(jp, 0, 1))
663					{
664						n1 = jp->file[0].fieldlen;
665						same = 1;
666						continue;
667					}
668					if ((jp->ooutmode & (C_FILE1|C_FILE2)) != C_FILE2)
669						break;
670					if (sfseek(jp->file[0].iop, (Sfoff_t)-jp->file[0].reclen, SEEK_CUR) < 0 || !(cp1 = getrec(jp, 0, 0)))
671					{
672						error(ERROR_SYSTEM|2, "%s: seek error", jp->file[0].name);
673						return -1;
674					}
675				}
676				else if (outrec(jp, 0) < 0)
677					return -1;
678				else if (lo < 0 && (jp->outmode & C_COMMON))
679				{
680					if ((lo = sfseek(jp->file[1].iop, (Sfoff_t)0, SEEK_CUR)) < 0)
681					{
682						error(ERROR_SYSTEM|2, "%s: seek error", jp->file[1].name);
683						return -1;
684					}
685					lo -= jp->file[1].reclen;
686				}
687				if (cp2 = getrec(jp, 1, lo < 0))
688				{
689					n2 = jp->file[1].fieldlen;
690					continue;
691				}
692#if DEBUG_TRACE
693sfprintf(sfstdout, "[2#%d:0,%lld,%lld]", __LINE__, lo, hi);
694#endif
695			}
696			else if (cmp > 0)
697			{
698				if (same)
699				{
700					same = 0;
701				next:
702					if (n2 > jp->samesize)
703					{
704						jp->samesize = roundof(n2, 16);
705						if (!(jp->same = newof(jp->same, char, jp->samesize, 0)))
706						{
707							error(ERROR_SYSTEM|2, "out of space");
708							return -1;
709						}
710					}
711					memcpy(jp->same, cp2, o2 = n2);
712					if (!(cp2 = getrec(jp, 1, 0)))
713						break;
714					n2 = jp->file[1].fieldlen;
715					if (n2 == o2 && *cp2 == *jp->same && !memcmp(cp2, jp->same, n2))
716						goto next;
717					continue;
718				}
719				if (hi >= 0)
720				{
721					if (sfseek(jp->file[1].iop, hi, SEEK_SET) != hi)
722					{
723						error(ERROR_SYSTEM|2, "%s: seek error", jp->file[1].name);
724						return -1;
725					}
726					hi = -1;
727				}
728				else if ((jp->outmode & C_FILE2) && outrec(jp, 1) < 0)
729					return -1;
730				lo = -1;
731				if (cp2 = getrec(jp, 1, 1))
732				{
733					n2 = jp->file[1].fieldlen;
734					continue;
735				}
736#if DEBUG_TRACE
737sfprintf(sfstdout, "[2#%d:0,%lld,%lld]", __LINE__, lo, hi);
738#endif
739			}
740			else if (same)
741			{
742				same = 0;
743				if (!(cp1 = getrec(jp, 0, 0)))
744					break;
745				n1 = jp->file[0].fieldlen;
746				continue;
747			}
748			if (lo >= 0)
749			{
750				if ((hi = sfseek(jp->file[1].iop, (Sfoff_t)0, SEEK_CUR)) < 0 ||
751				    (hi -= jp->file[1].reclen) < 0 ||
752				    sfseek(jp->file[1].iop, lo, SEEK_SET) != lo ||
753				    !(cp2 = getrec(jp, 1, 0)))
754				{
755					error(ERROR_SYSTEM|2, "%s: seek error", jp->file[1].name);
756					return -1;
757				}
758				n2 = jp->file[1].fieldlen;
759				lo = -1;
760				if (jp->file[1].discard)
761					sfseek(jp->file[1].iop, (Sfoff_t)-1, SEEK_SET);
762			}
763			else if (!cp2)
764				break;
765			else if ((jp->outmode & C_FILE1) && outrec(jp, -1) < 0)
766				return -1;
767			if (!(cp1 = getrec(jp, 0, 1)))
768				break;
769			n1 = jp->file[0].fieldlen;
770		}
771	}
772#if DEBUG_TRACE
773sfprintf(sfstdout, "[X#%d:?,%p,%p,%d,%d,%d%s]", __LINE__, cp1, cp2, cmp, lo, hi, (jp->outmode & C_COMMON) ? ",COMMON" : "");
774#endif
775	if (cp2)
776	{
777		if (hi >= 0 &&
778		    sfseek(jp->file[1].iop, (Sfoff_t)0, SEEK_CUR) < hi &&
779		    sfseek(jp->file[1].iop, hi, SEEK_SET) != hi)
780		{
781			error(ERROR_SYSTEM|2, "%s: seek error", jp->file[1].name);
782			return -1;
783		}
784#if DEBUG_TRACE
785sfprintf(sfstdout, "[O#%d:%02o:%02o]", __LINE__, jp->ooutmode, jp->outmode);
786#endif
787		cp1 = (!cp1 && cmp && hi < 0 && !jp->file[1].hit && ((jp->ooutmode ^ C_ALL) <= 1 || jp->outmode == 2)) ? cp2 : getrec(jp, 1, 0);
788		cmp = 1;
789		n = 1;
790	}
791	else
792	{
793		cmp = -1;
794		n = 0;
795	}
796#if DEBUG_TRACE
797sfprintf(sfstdout, "[X#%d:%d,%p,%p,%d,%02o,%02o%s]", __LINE__, n, cp1, cp2, cmp, jp->ooutmode, jp->outmode, (jp->outmode & C_COMMON) ? ",COMMON" : "");
798#endif
799	if (!cp1 || !(jp->outmode & (1<<n)))
800	{
801		if (cp1 && jp->file[n].iop == sfstdin)
802			sfseek(sfstdin, (Sfoff_t)0, SEEK_END);
803		return 0;
804	}
805	if (outrec(jp, cmp) < 0)
806		return -1;
807	do
808	{
809		if (!getrec(jp, n, 1))
810			return 0;
811	} while (outrec(jp, cmp) >= 0);
812	return -1;
813}
814
815int
816b_join(int argc, char** argv, Shbltin_t* context)
817{
818	register int		n;
819	register char*		cp;
820	register Join_t*	jp;
821	char*			e;
822
823#if !DEBUG_TRACE
824	cmdinit(argc, argv, context, ERROR_CATALOG, ERROR_NOTIFY);
825#endif
826	if (!(jp = init()))
827		error(ERROR_system(1),"out of space");
828	jp->context = context;
829	for (;;)
830	{
831		switch (n = optget(argv, usage))
832		{
833 		case 'j':
834			/*
835			 * check for obsolete "-j1 field" and "-j2 field"
836			 */
837
838			if (opt_info.offset == 0)
839			{
840				cp = argv[opt_info.index - 1];
841				for (n = strlen(cp) - 1; n > 0 && cp[n] != 'j'; n--);
842				n = cp[n] == 'j';
843			}
844			else
845				n = 0;
846			if (n)
847			{
848				if (opt_info.num!=1 && opt_info.num!=2)
849					error(2,"-jfileno field: fileno must be 1 or 2");
850				n = '0' + opt_info.num;
851				if (!(cp = argv[opt_info.index]))
852				{
853					argc = 0;
854					break;
855				}
856				opt_info.num = strtol(cp, &e, 10);
857				if (*e)
858				{
859					argc = 0;
860					break;
861				}
862				opt_info.index++;
863			}
864			else
865			{
866				jp->file[0].field = (int)(opt_info.num-1);
867				n = '2';
868			}
869			/*FALLTHROUGH*/
870 		case '1':
871		case '2':
872			if (opt_info.num <=0)
873				error(2,"field number must positive");
874			jp->file[n-'1'].field = (int)(opt_info.num-1);
875			continue;
876		case 'v':
877			jp->outmode &= ~C_COMMON;
878			/*FALLTHROUGH*/
879		case 'a':
880			if (opt_info.num!=1 && opt_info.num!=2)
881				error(2,"%s: file number must be 1 or 2", opt_info.name);
882			jp->outmode |= 1<<(opt_info.num-1);
883			continue;
884		case 'e':
885			jp->nullfield = opt_info.arg;
886			continue;
887		case 'o':
888			/* need to accept obsolescent command syntax */
889			n = getolist(jp, opt_info.arg, argv+opt_info.index);
890			opt_info.index += n;
891			continue;
892		case 't':
893			jp->state[' '] = jp->state['\t'] = 0;
894			if (jp->mb)
895			{
896				cp = opt_info.arg;
897				jp->delim = mbchar(cp);
898				if ((n = cp - opt_info.arg) > 1)
899				{
900					jp->delimlen = n;
901					jp->delimstr = opt_info.arg;
902					continue;
903				}
904			}
905			n = *(unsigned char*)opt_info.arg;
906			jp->state[n] = S_DELIM;
907			jp->delim = n;
908			continue;
909		case 'i':
910			jp->ignorecase = !opt_info.num;
911			continue;
912		case 'B':
913			jp->buffered = !opt_info.num;
914			continue;
915		case ':':
916			error(2, "%s", opt_info.arg);
917			break;
918		case '?':
919			done(jp);
920			error(ERROR_usage(2), "%s", opt_info.arg);
921			break;
922		}
923		break;
924	}
925	argv += opt_info.index;
926	argc -= opt_info.index;
927	if (error_info.errors || argc!=2)
928	{
929		done(jp);
930		error(ERROR_usage(2),"%s", optusage(NiL));
931	}
932	jp->ooutmode = jp->outmode;
933	jp->file[0].name = cp = *argv++;
934	if (streq(cp,"-"))
935	{
936		if (sfseek(sfstdin,(Sfoff_t)0,SEEK_CUR) < 0)
937		{
938			if (sfdcseekable(sfstdin))
939				error(ERROR_warn(0),"%s: seek may fail",cp);
940			else
941				jp->file[0].discard = 1;
942		}
943		jp->file[0].iop = sfstdin;
944	}
945	else if (!(jp->file[0].iop = sfopen(NiL, cp, "r")))
946	{
947		done(jp);
948		error(ERROR_system(1),"%s: cannot open",cp);
949	}
950	jp->file[1].name = cp = *argv;
951	if (streq(cp,"-"))
952	{
953		if (sfseek(sfstdin,(Sfoff_t)0,SEEK_CUR) < 0)
954		{
955			if (sfdcseekable(sfstdin))
956				error(ERROR_warn(0),"%s: seek may fail",cp);
957			else
958				jp->file[1].discard = 1;
959		}
960		jp->file[1].iop = sfstdin;
961	}
962	else if (!(jp->file[1].iop = sfopen(NiL, cp, "r")))
963	{
964		done(jp);
965		error(ERROR_system(1),"%s: cannot open",cp);
966	}
967	if (jp->buffered)
968	{
969		sfsetbuf(jp->file[0].iop, jp->file[0].iop, SF_UNBOUND);
970		sfsetbuf(jp->file[1].iop, jp->file[1].iop, SF_UNBOUND);
971	}
972	jp->outfile = sfstdout;
973	if (!jp->outlist)
974		jp->nullfield = 0;
975	if (join(jp) < 0)
976	{
977		done(jp);
978		error(ERROR_system(1),"write error");
979	}
980	else if (jp->file[0].iop==sfstdin || jp->file[1].iop==sfstdin)
981		sfseek(sfstdin,(Sfoff_t)0,SEEK_END);
982	done(jp);
983	return error_info.errors;
984}
985