1/*********************************************************************** 2* * 3* This software is part of the ast package * 4* Copyright (c) 1992-2011 AT&T Intellectual Property * 5* and is licensed under the * 6* Common Public License, Version 1.0 * 7* by AT&T Intellectual Property * 8* * 9* A copy of the License is available at * 10* http://www.opensource.org/licenses/cpl1.0.txt * 11* (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) * 12* * 13* Information and Software Systems Research * 14* AT&T Research * 15* Florham Park NJ * 16* * 17* Glenn Fowler <gsf@research.att.com> * 18* David Korn <dgk@research.att.com> * 19* * 20***********************************************************************/ 21#pragma prototyped 22/* 23 * David Korn 24 * Glenn Fowler 25 * AT&T Research 26 * 27 * join 28 */ 29 30static const char usage[] = 31"[-?\n@(#)$Id: join (AT&T Research) 2009-12-10 $\n]" 32USAGE_LICENSE 33"[+NAME?join - relational database operator]" 34"[+DESCRIPTION?\bjoin\b performs an \aequality join\a on the files \afile1\a " 35 "and \afile2\a and writes the resulting joined files to standard " 36 "output. By default, a field is delimited by one or more spaces " 37 "and tabs with leading spaces and/or tabs ignored. The \b-t\b option " 38 "can be used to change the field delimiter.]" 39"[+?The \ajoin field\a is a field in each file on which files are compared. " 40 "By default \bjoin\b writes one line in the output for each pair " 41 "of lines in \afiles1\a and \afiles2\a that have identical join " 42 "fields. The default output line consists of the join field, " 43 "then the remaining fields from \afile1\a, then the remaining " 44 "fields from \afile2\a, but this can be changed with the \b-o\b " 45 "option. The \b-a\b option can be used to add unmatched lines " 46 "to the output. The \b-v\b option can be used to output only " 47 "unmatched lines.]" 48"[+?The files \afile1\a and \afile2\a must be ordered in the collating " 49 "sequence of \bsort -b\b on the fields on which they are to be " 50 "joined otherwise the results are unspecified.]" 51"[+?If either \afile1\a or \afile2\a is \b-\b, \bjoin\b " 52 "uses standard input starting at the current location.]" 53 54"[e:empty]:[string?Replace empty output fields in the list selected with" 55" \b-o\b with \astring\a.]" 56"[o:output]:[list?Construct the output line to comprise the fields specified " 57 "in a blank or comma separated list \alist\a. Each element in " 58 "\alist\a consists of a file number (either 1 or 2), a period, " 59 "and a field number or \b0\b representing the join field. " 60 "As an obsolete feature multiple occurrences of \b-o\b can " 61 "be specified.]" 62"[t:separator|tabs]:[delim?Use \adelim\a as the field separator for both input" 63" and output.]" 64"[1:j1]#[field?Join on field \afield\a of \afile1\a. Fields start at 1.]" 65"[2:j2]#[field?Join on field \afield\a of \afile2\a. Fields start at 1.]" 66"[j:join]#[field?Equivalent to \b-1\b \afield\a \b-2\b \afield\a.]" 67"[a:unpairable]#[fileno?Write a line for each unpairable line in file" 68" \afileno\a, where \afileno\a is either 1 or 2, in addition to the" 69" normal output. If \b-a\b options appear for both 1 and 2, then " 70 "all unpairable lines will be output.]" 71"[v:suppress]#[fileno?Write a line for each unpairable line in file" 72" \afileno\a, where \afileno\a is either 1 or 2, instead of the normal " 73 "output. If \b-v\b options appear for both 1 and 2, then " 74 "all unpairable lines will be output.] ]" 75"[i:ignorecase?Ignore case in field comparisons.]" 76"[B!:mmap?Enable memory mapped reads instead of buffered.]" 77 78"[+?The following obsolete option forms are also recognized: \b-j\b \afield\a" 79" is equivalent to \b-1\b \afield\a \b-2\b \afield\a, \b-j1\b \afield\a" 80" is equivalent to \b-1\b \afield\a, and \b-j2\b \afield\a is" 81" equivalent to \b-2\b \afield\a.]" 82 83"\n" 84"\nfile1 file2\n" 85"\n" 86"[+EXIT STATUS?]{" 87 "[+0?Both files processed successfully.]" 88 "[+>0?An error occurred.]" 89"}" 90"[+SEE ALSO?\bcut\b(1), \bcomm\b(1), \bpaste\b(1), \bsort\b(1), \buniq\b(1)]" 91; 92 93#include <cmd.h> 94#include <sfdisc.h> 95 96#if _hdr_wchar && _hdr_wctype && _lib_iswctype 97 98#include <wchar.h> 99#include <wctype.h> 100 101#else 102 103#include <ctype.h> 104 105#ifndef iswspace 106#define iswspace(x) isspace(x) 107#endif 108 109#endif 110 111#define C_FILE1 001 112#define C_FILE2 002 113#define C_COMMON 004 114#define C_ALL (C_FILE1|C_FILE2|C_COMMON) 115 116#define NFIELD 10 117#define JOINFIELD 2 118 119#define S_DELIM 1 120#define S_SPACE 2 121#define S_NL 3 122#define S_WIDE 4 123 124typedef struct Field_s 125{ 126 char* beg; 127 char* end; 128} Field_t; 129 130typedef struct File_s 131{ 132 Sfio_t* iop; 133 char* name; 134 char* recptr; 135 int reclen; 136 int field; 137 int fieldlen; 138 int nfields; 139 int maxfields; 140 int spaces; 141 int hit; 142 int discard; 143 Field_t* fields; 144} File_t; 145 146typedef struct Join_s 147{ 148 unsigned char state[1<<CHAR_BIT]; 149 Sfio_t* outfile; 150 int* outlist; 151 int outmode; 152 int ooutmode; 153 char* nullfield; 154 char* delimstr; 155 int delim; 156 int delimlen; 157 int buffered; 158 int ignorecase; 159 int mb; 160 char* same; 161 int samesize; 162 void* context; 163 File_t file[2]; 164} Join_t; 165 166static void 167done(register Join_t* jp) 168{ 169 if (jp->file[0].iop && jp->file[0].iop != sfstdin) 170 sfclose(jp->file[0].iop); 171 if (jp->file[1].iop && jp->file[1].iop != sfstdin) 172 sfclose(jp->file[1].iop); 173 if (jp->outlist) 174 free(jp->outlist); 175 if (jp->file[0].fields) 176 free(jp->file[0].fields); 177 if (jp->file[1].fields) 178 free(jp->file[1].fields); 179 if (jp->same) 180 free(jp->same); 181 free(jp); 182} 183 184static Join_t* 185init(void) 186{ 187 register Join_t* jp; 188 register int i; 189 190 setlocale(LC_ALL, ""); 191 if (jp = newof(0, Join_t, 1, 0)) 192 { 193 if (jp->mb = mbwide()) 194 for (i = 0x80; i <= 0xff; i++) 195 jp->state[i] = S_WIDE; 196 jp->state[' '] = jp->state['\t'] = S_SPACE; 197 jp->state['\n'] = S_NL; 198 jp->delim = -1; 199 jp->nullfield = 0; 200 if (!(jp->file[0].fields = newof(0, Field_t, NFIELD + 1, 0)) || 201 !(jp->file[1].fields = newof(0, Field_t, NFIELD + 1, 0))) 202 { 203 done(jp); 204 return 0; 205 } 206 jp->file[0].maxfields = NFIELD; 207 jp->file[1].maxfields = NFIELD; 208 jp->outmode = C_COMMON; 209 } 210 return jp; 211} 212 213static int 214getolist(Join_t* jp, const char* first, char** arglist) 215{ 216 register const char* cp = first; 217 char** argv = arglist; 218 register int c; 219 int* outptr; 220 int* outmax; 221 int nfield = NFIELD; 222 char* str; 223 224 outptr = jp->outlist = newof(0, int, NFIELD + 1, 0); 225 outmax = outptr + NFIELD; 226 while (c = *cp++) 227 { 228 if (c==' ' || c=='\t' || c==',') 229 continue; 230 str = (char*)--cp; 231 if (*cp=='0' && ((c=cp[1])==0 || c==' ' || c=='\t' || c==',')) 232 { 233 str++; 234 c = JOINFIELD; 235 goto skip; 236 } 237 if (cp[1]!='.' || (*cp!='1' && *cp!='2') || (c=strtol(cp+2,&str,10)) <=0) 238 { 239 error(2,"%s: invalid field list",first); 240 break; 241 } 242 c--; 243 c <<=2; 244 if (*cp=='2') 245 c |=1; 246 skip: 247 if (outptr >= outmax) 248 { 249 jp->outlist = newof(jp->outlist, int, 2 * nfield + 1, 0); 250 outptr = jp->outlist + nfield; 251 nfield *= 2; 252 outmax = jp->outlist + nfield; 253 } 254 *outptr++ = c; 255 cp = str; 256 } 257 /* need to accept obsolescent command syntax */ 258 while (1) 259 { 260 if (!(cp= *argv) || cp[1]!='.' || (*cp!='1' && *cp!='2')) 261 { 262 if (*cp=='0' && cp[1]==0) 263 { 264 c = JOINFIELD; 265 goto skip2; 266 } 267 break; 268 } 269 str = (char*)cp; 270 c = strtol(cp+2, &str,10); 271 if (*str || --c<0) 272 break; 273 argv++; 274 c <<= 2; 275 if (*cp=='2') 276 c |=1; 277 skip2: 278 if (outptr >= outmax) 279 { 280 jp->outlist = newof(jp->outlist, int, 2 * nfield + 1, 0); 281 outptr = jp->outlist + nfield; 282 nfield *= 2; 283 outmax = jp->outlist + nfield; 284 } 285 *outptr++ = c; 286 } 287 *outptr = -1; 288 return argv-arglist; 289} 290 291/* 292 * read in a record from file <index> and split into fields 293 */ 294static unsigned char* 295getrec(Join_t* jp, int index, int discard) 296{ 297 register unsigned char* sp = jp->state; 298 register File_t* fp = &jp->file[index]; 299 register Field_t* field = fp->fields; 300 register Field_t* fieldmax = field + fp->maxfields; 301 register char* cp; 302 register int n; 303 char* tp; 304 305 if (sh_checksig(jp->context)) 306 return 0; 307 if (discard && fp->discard) 308 sfraise(fp->iop, SFSK_DISCARD, NiL); 309 fp->spaces = 0; 310 fp->hit = 0; 311 if (!(cp = sfgetr(fp->iop, '\n', 0))) 312 { 313 jp->outmode &= ~(1<<index); 314 return 0; 315 } 316 fp->recptr = cp; 317 fp->reclen = sfvalue(fp->iop); 318 if (jp->delim == '\n') /* handle new-line delimiter specially */ 319 { 320 field->beg = cp; 321 cp += fp->reclen; 322 field->end = cp - 1; 323 field++; 324 } 325 else 326 do /* separate into fields */ 327 { 328 if (field >= fieldmax) 329 { 330 n = 2 * fp->maxfields; 331 fp->fields = newof(fp->fields, Field_t, n + 1, 0); 332 field = fp->fields + fp->maxfields; 333 fp->maxfields = n; 334 fieldmax = fp->fields + n; 335 } 336 field->beg = cp; 337 if (jp->delim == -1) 338 { 339 switch (sp[*(unsigned char*)cp]) 340 { 341 case S_SPACE: 342 cp++; 343 break; 344 case S_WIDE: 345 tp = cp; 346 if (iswspace(mbchar(tp))) 347 { 348 cp = tp; 349 break; 350 } 351 /*FALLTHROUGH*/ 352 default: 353 goto next; 354 } 355 fp->spaces = 1; 356 if (jp->mb) 357 for (;;) 358 { 359 switch (sp[*(unsigned char*)cp++]) 360 { 361 case S_SPACE: 362 continue; 363 case S_WIDE: 364 tp = cp - 1; 365 if (iswspace(mbchar(tp))) 366 { 367 cp = tp; 368 continue; 369 } 370 break; 371 } 372 break; 373 } 374 else 375 while (sp[*(unsigned char*)cp++]==S_SPACE); 376 cp--; 377 } 378 next: 379 if (jp->mb) 380 { 381 for (;;) 382 { 383 tp = cp; 384 switch (n = sp[*(unsigned char*)cp++]) 385 { 386 case 0: 387 continue; 388 case S_WIDE: 389 cp--; 390 n = mbchar(cp); 391 if (n == jp->delim) 392 { 393 n = S_DELIM; 394 break; 395 } 396 if (jp->delim == -1 && iswspace(n)) 397 { 398 n = S_SPACE; 399 break; 400 } 401 continue; 402 } 403 break; 404 } 405 field->end = tp; 406 } 407 else 408 { 409 while (!(n = sp[*(unsigned char*)cp++])); 410 field->end = cp - 1; 411 } 412 field++; 413 } while (n != S_NL); 414 fp->nfields = field - fp->fields; 415 if ((n = fp->field) < fp->nfields) 416 { 417 cp = fp->fields[n].beg; 418 /* eliminate leading spaces */ 419 if (fp->spaces) 420 { 421 if (jp->mb) 422 for (;;) 423 { 424 switch (sp[*(unsigned char*)cp++]) 425 { 426 case S_SPACE: 427 continue; 428 case S_WIDE: 429 tp = cp - 1; 430 if (iswspace(mbchar(tp))) 431 { 432 cp = tp; 433 continue; 434 } 435 break; 436 } 437 break; 438 } 439 else 440 while (sp[*(unsigned char*)cp++]==S_SPACE); 441 cp--; 442 } 443 fp->fieldlen = fp->fields[n].end - cp; 444 return (unsigned char*)cp; 445 } 446 fp->fieldlen = 0; 447 return (unsigned char*)""; 448} 449 450static unsigned char* 451_trace_getrec(Join_t* jp, int index, int discard) 452{ 453 unsigned char* r; 454 455 r = getrec(jp, index, discard); 456 return r; 457} 458#define getrec _trace_getrec 459 460#if DEBUG_TRACE 461static unsigned char* u1,u2,u3; 462#define getrec(p,n,d) (u1 = getrec(p, n, d), sfprintf(sfstdout, "[G%d#%d@%I*d:%-.8s]", __LINE__, n, sizeof(Sfoff_t), sftell(p->file[n].iop), u1), u1) 463#endif 464 465/* 466 * print field <n> from file <index> 467 */ 468static int 469outfield(Join_t* jp, int index, register int n, int last) 470{ 471 register File_t* fp = &jp->file[index]; 472 register char* cp; 473 register char* cpmax; 474 register int size; 475 register Sfio_t* iop = jp->outfile; 476 char* tp; 477 478 if (n < fp->nfields) 479 { 480 cp = fp->fields[n].beg; 481 cpmax = fp->fields[n].end + 1; 482 } 483 else 484 cp = 0; 485 if ((n = jp->delim) == -1) 486 { 487 if (cp && fp->spaces) 488 { 489 register unsigned char* sp = jp->state; 490 491 /*eliminate leading spaces */ 492 if (jp->mb) 493 for (;;) 494 { 495 switch (sp[*(unsigned char*)cp++]) 496 { 497 case S_SPACE: 498 continue; 499 case S_WIDE: 500 tp = cp - 1; 501 if (iswspace(mbchar(tp))) 502 { 503 cp = tp; 504 continue; 505 } 506 break; 507 } 508 break; 509 } 510 else 511 while (sp[*(unsigned char*)cp++]==S_SPACE); 512 cp--; 513 } 514 n = ' '; 515 } 516 else if (jp->delimstr) 517 n = -1; 518 if (last) 519 n = '\n'; 520 if (cp) 521 size = cpmax - cp; 522 else 523 size = 0; 524 if (n == -1) 525 { 526 if (size<=1) 527 { 528 if (jp->nullfield && sfputr(iop, jp->nullfield, -1) < 0) 529 return -1; 530 } 531 else if (sfwrite(iop, cp, size) < 0) 532 return -1; 533 if (sfwrite(iop, jp->delimstr, jp->delimlen) < 0) 534 return -1; 535 } 536 else if (size <= 1) 537 { 538 if (!jp->nullfield) 539 sfputc(iop, n); 540 else if (sfputr(iop, jp->nullfield, n) < 0) 541 return -1; 542 } 543 else 544 { 545 last = cp[size-1]; 546 cp[size-1] = n; 547 if (sfwrite(iop, cp, size) < 0) 548 return -1; 549 cp[size-1] = last; 550 } 551 return 0; 552} 553 554#if DEBUG_TRACE 555static int i1,i2,i3; 556#define outfield(p,i,n,f) (sfprintf(sfstdout, "[F%d#%d:%d,%d]", __LINE__, i1=i, i2=n, i3=f), outfield(p, i1, i2, i3)) 557#endif 558 559static int 560outrec(register Join_t* jp, int mode) 561{ 562 register File_t* fp; 563 register int i; 564 register int j; 565 register int k; 566 register int n; 567 int* out; 568 569 if (mode < 0 && jp->file[0].hit++) 570 return 0; 571 if (mode > 0 && jp->file[1].hit++) 572 return 0; 573 if (out = jp->outlist) 574 { 575 while ((n = *out++) >= 0) 576 { 577 if (n == JOINFIELD) 578 { 579 i = mode >= 0; 580 j = jp->file[i].field; 581 } 582 else 583 { 584 i = n & 1; 585 j = (mode<0 && i || mode>0 && !i) ? 586 jp->file[i].nfields : 587 n >> 2; 588 } 589 if (outfield(jp, i, j, *out < 0) < 0) 590 return -1; 591 } 592 return 0; 593 } 594 k = jp->file[0].nfields; 595 if (mode >= 0) 596 k += jp->file[1].nfields - 1; 597 for (i=0; i<2; i++) 598 { 599 fp = &jp->file[i]; 600 if (mode>0 && i==0) 601 { 602 k -= (fp->nfields - 1); 603 continue; 604 } 605 n = fp->field; 606 if (mode||i==0) 607 { 608 /* output join field first */ 609 if (outfield(jp,i,n,!--k) < 0) 610 return -1; 611 if (!k) 612 return 0; 613 for (j=0; j<n; j++) 614 { 615 if (outfield(jp,i,j,!--k) < 0) 616 return -1; 617 if (!k) 618 return 0; 619 } 620 j = n + 1; 621 } 622 else 623 j = 0; 624 for (;j<fp->nfields; j++) 625 { 626 if (j!=n && outfield(jp,i,j,!--k) < 0) 627 return -1; 628 if (!k) 629 return 0; 630 } 631 } 632 return 0; 633} 634 635#if DEBUG_TRACE 636#define outrec(p,n) (sfprintf(sfstdout, "[R#%d,%d,%lld,%lld:%-.*s{%d}:%-.*s{%d}]", __LINE__, i1=n, lo, hi, jp->file[0].fieldlen, cp1, jp->file[0].hit, jp->file[1].fieldlen, cp2, jp->file[1].hit), outrec(p, i1)) 637#endif 638 639static int 640join(Join_t* jp) 641{ 642 register unsigned char* cp1; 643 register unsigned char* cp2; 644 register int n1; 645 register int n2; 646 register int n; 647 register int cmp; 648 register int same; 649 int o2; 650 Sfoff_t lo = -1; 651 Sfoff_t hi = -1; 652 653 if ((cp1 = getrec(jp, 0, 0)) && (cp2 = getrec(jp, 1, 0)) || (cp2 = 0)) 654 { 655 n1 = jp->file[0].fieldlen; 656 n2 = jp->file[1].fieldlen; 657 same = 0; 658 for (;;) 659 { 660 n = n1 < n2 ? n1 : n2; 661#if DEBUG_TRACE 662 if (!n && !(cmp = n1 < n2 ? -1 : (n1 > n2)) || n && !(cmp = (int)*cp1 - (int)*cp2) && !(cmp = jp->ignorecase ? strncasecmp((char*)cp1, (char*)cp2, n) : memcmp(cp1, cp2, n))) 663 cmp = n1 - n2; 664sfprintf(sfstdout, "[C#%d:%d(%c-%c),%d,%lld,%lld%s]", __LINE__, cmp, *cp1, *cp2, same, lo, hi, (jp->outmode & C_COMMON) ? ",COMMON" : ""); 665 if (!cmp) 666#else 667 if (!n && !(cmp = n1 < n2 ? -1 : (n1 > n2)) || n && !(cmp = (int)*cp1 - (int)*cp2) && !(cmp = jp->ignorecase ? strncasecmp((char*)cp1, (char*)cp2, n) : memcmp(cp1, cp2, n)) && !(cmp = n1 - n2)) 668#endif 669 { 670 if (!(jp->outmode & C_COMMON)) 671 { 672 if (cp1 = getrec(jp, 0, 1)) 673 { 674 n1 = jp->file[0].fieldlen; 675 same = 1; 676 continue; 677 } 678 if ((jp->ooutmode & (C_FILE1|C_FILE2)) != C_FILE2) 679 break; 680 if (sfseek(jp->file[0].iop, (Sfoff_t)-jp->file[0].reclen, SEEK_CUR) < 0 || !(cp1 = getrec(jp, 0, 0))) 681 { 682 error(ERROR_SYSTEM|2, "%s: seek error", jp->file[0].name); 683 return -1; 684 } 685 } 686 else if (outrec(jp, 0) < 0) 687 return -1; 688 else if (lo < 0 && (jp->outmode & C_COMMON)) 689 { 690 if ((lo = sfseek(jp->file[1].iop, (Sfoff_t)0, SEEK_CUR)) < 0) 691 { 692 error(ERROR_SYSTEM|2, "%s: seek error", jp->file[1].name); 693 return -1; 694 } 695 lo -= jp->file[1].reclen; 696 } 697 if (cp2 = getrec(jp, 1, lo < 0)) 698 { 699 n2 = jp->file[1].fieldlen; 700 continue; 701 } 702#if DEBUG_TRACE 703sfprintf(sfstdout, "[2#%d:0,%lld,%lld]", __LINE__, lo, hi); 704#endif 705 } 706 else if (cmp > 0) 707 { 708 if (same) 709 { 710 same = 0; 711 next: 712 if (n2 > jp->samesize) 713 { 714 jp->samesize = roundof(n2, 16); 715 if (!(jp->same = newof(jp->same, char, jp->samesize, 0))) 716 { 717 error(ERROR_SYSTEM|2, "out of space"); 718 return -1; 719 } 720 } 721 memcpy(jp->same, cp2, o2 = n2); 722 if (!(cp2 = getrec(jp, 1, 0))) 723 break; 724 n2 = jp->file[1].fieldlen; 725 if (n2 == o2 && *cp2 == *jp->same && !memcmp(cp2, jp->same, n2)) 726 goto next; 727 continue; 728 } 729 if (hi >= 0) 730 { 731 if (sfseek(jp->file[1].iop, hi, SEEK_SET) != hi) 732 { 733 error(ERROR_SYSTEM|2, "%s: seek error", jp->file[1].name); 734 return -1; 735 } 736 hi = -1; 737 } 738 else if ((jp->outmode & C_FILE2) && outrec(jp, 1) < 0) 739 return -1; 740 lo = -1; 741 if (cp2 = getrec(jp, 1, 1)) 742 { 743 n2 = jp->file[1].fieldlen; 744 continue; 745 } 746#if DEBUG_TRACE 747sfprintf(sfstdout, "[2#%d:0,%lld,%lld]", __LINE__, lo, hi); 748#endif 749 } 750 else if (same) 751 { 752 same = 0; 753 if (!(cp1 = getrec(jp, 0, 0))) 754 break; 755 n1 = jp->file[0].fieldlen; 756 continue; 757 } 758 if (lo >= 0) 759 { 760 if ((hi = sfseek(jp->file[1].iop, (Sfoff_t)0, SEEK_CUR)) < 0 || 761 (hi -= jp->file[1].reclen) < 0 || 762 sfseek(jp->file[1].iop, lo, SEEK_SET) != lo || 763 !(cp2 = getrec(jp, 1, 0))) 764 { 765 error(ERROR_SYSTEM|2, "%s: seek error", jp->file[1].name); 766 return -1; 767 } 768 n2 = jp->file[1].fieldlen; 769 lo = -1; 770 if (jp->file[1].discard) 771 sfseek(jp->file[1].iop, (Sfoff_t)-1, SEEK_SET); 772 } 773 else if (!cp2) 774 break; 775 else if ((jp->outmode & C_FILE1) && outrec(jp, -1) < 0) 776 return -1; 777 if (!(cp1 = getrec(jp, 0, 1))) 778 break; 779 n1 = jp->file[0].fieldlen; 780 } 781 } 782#if DEBUG_TRACE 783sfprintf(sfstdout, "[X#%d:?,%p,%p,%d%,%d,%d%s]", __LINE__, cp1, cp2, cmp, lo, hi, (jp->outmode & C_COMMON) ? ",COMMON" : ""); 784#endif 785 if (cp2) 786 { 787 if (hi >= 0 && 788 sfseek(jp->file[1].iop, (Sfoff_t)0, SEEK_CUR) < hi && 789 sfseek(jp->file[1].iop, hi, SEEK_SET) != hi) 790 { 791 error(ERROR_SYSTEM|2, "%s: seek error", jp->file[1].name); 792 return -1; 793 } 794#if DEBUG_TRACE 795sfprintf(sfstdout, "[O#%d:%02o:%02o]", __LINE__, jp->ooutmode, jp->outmode); 796#endif 797 cp1 = (!cp1 && cmp && hi < 0 && !jp->file[1].hit && ((jp->ooutmode ^ C_ALL) <= 1 || jp->outmode == 2)) ? cp2 : getrec(jp, 1, 0); 798 cmp = 1; 799 n = 1; 800 } 801 else 802 { 803 cmp = -1; 804 n = 0; 805 } 806#if DEBUG_TRACE 807sfprintf(sfstdout, "[X#%d:%d,%p,%p,%d,%02o,%02o%s]", __LINE__, n, cp1, cp2, cmp, jp->ooutmode, jp->outmode, (jp->outmode & C_COMMON) ? ",COMMON" : ""); 808#endif 809 if (!cp1 || !(jp->outmode & (1<<n))) 810 { 811 if (cp1 && jp->file[n].iop == sfstdin) 812 sfseek(sfstdin, (Sfoff_t)0, SEEK_END); 813 return 0; 814 } 815 if (outrec(jp, cmp) < 0) 816 return -1; 817 do 818 { 819 if (!getrec(jp, n, 1)) 820 return 0; 821 } while (outrec(jp, cmp) >= 0); 822 return -1; 823} 824 825int 826b_join(int argc, char** argv, void* context) 827{ 828 register int n; 829 register char* cp; 830 register Join_t* jp; 831 char* e; 832 833#if !DEBUG_TRACE 834 cmdinit(argc, argv, context, ERROR_CATALOG, ERROR_NOTIFY); 835#endif 836 if (!(jp = init())) 837 error(ERROR_system(1),"out of space"); 838 jp->context = context; 839 for (;;) 840 { 841 switch (n = optget(argv, usage)) 842 { 843 case 'j': 844 /* 845 * check for obsolete "-j1 field" and "-j2 field" 846 */ 847 848 if (opt_info.offset == 0) 849 { 850 cp = argv[opt_info.index - 1]; 851 for (n = strlen(cp) - 1; n > 0 && cp[n] != 'j'; n--); 852 n = cp[n] == 'j'; 853 } 854 else 855 n = 0; 856 if (n) 857 { 858 if (opt_info.num!=1 && opt_info.num!=2) 859 error(2,"-jfileno field: fileno must be 1 or 2"); 860 n = '0' + opt_info.num; 861 if (!(cp = argv[opt_info.index])) 862 { 863 argc = 0; 864 break; 865 } 866 opt_info.num = strtol(cp, &e, 10); 867 if (*e) 868 { 869 argc = 0; 870 break; 871 } 872 opt_info.index++; 873 } 874 else 875 { 876 jp->file[0].field = (int)(opt_info.num-1); 877 n = '2'; 878 } 879 /*FALLTHROUGH*/ 880 case '1': 881 case '2': 882 if (opt_info.num <=0) 883 error(2,"field number must positive"); 884 jp->file[n-'1'].field = (int)(opt_info.num-1); 885 continue; 886 case 'v': 887 jp->outmode &= ~C_COMMON; 888 /*FALLTHROUGH*/ 889 case 'a': 890 if (opt_info.num!=1 && opt_info.num!=2) 891 error(2,"%s: file number must be 1 or 2", opt_info.name); 892 jp->outmode |= 1<<(opt_info.num-1); 893 continue; 894 case 'e': 895 jp->nullfield = opt_info.arg; 896 continue; 897 case 'o': 898 /* need to accept obsolescent command syntax */ 899 n = getolist(jp, opt_info.arg, argv+opt_info.index); 900 opt_info.index += n; 901 continue; 902 case 't': 903 jp->state[' '] = jp->state['\t'] = 0; 904 if (jp->mb) 905 { 906 cp = opt_info.arg; 907 jp->delim = mbchar(cp); 908 if ((n = cp - opt_info.arg) > 1) 909 { 910 jp->delimlen = n; 911 jp->delimstr = opt_info.arg; 912 continue; 913 } 914 } 915 n = *(unsigned char*)opt_info.arg; 916 jp->state[n] = S_DELIM; 917 jp->delim = n; 918 continue; 919 case 'i': 920 jp->ignorecase = !opt_info.num; 921 continue; 922 case 'B': 923 jp->buffered = !opt_info.num; 924 continue; 925 case ':': 926 error(2, "%s", opt_info.arg); 927 break; 928 case '?': 929 done(jp); 930 error(ERROR_usage(2), "%s", opt_info.arg); 931 break; 932 } 933 break; 934 } 935 argv += opt_info.index; 936 argc -= opt_info.index; 937 if (error_info.errors || argc!=2) 938 { 939 done(jp); 940 error(ERROR_usage(2),"%s", optusage(NiL)); 941 } 942 jp->ooutmode = jp->outmode; 943 jp->file[0].name = cp = *argv++; 944 if (streq(cp,"-")) 945 { 946 if (sfseek(sfstdin,(Sfoff_t)0,SEEK_CUR) < 0) 947 { 948 if (sfdcseekable(sfstdin)) 949 error(ERROR_warn(0),"%s: seek may fail",cp); 950 else 951 jp->file[0].discard = 1; 952 } 953 jp->file[0].iop = sfstdin; 954 } 955 else if (!(jp->file[0].iop = sfopen(NiL, cp, "r"))) 956 { 957 done(jp); 958 error(ERROR_system(1),"%s: cannot open",cp); 959 } 960 jp->file[1].name = cp = *argv; 961 if (streq(cp,"-")) 962 { 963 if (sfseek(sfstdin,(Sfoff_t)0,SEEK_CUR) < 0) 964 { 965 if (sfdcseekable(sfstdin)) 966 error(ERROR_warn(0),"%s: seek may fail",cp); 967 else 968 jp->file[1].discard = 1; 969 } 970 jp->file[1].iop = sfstdin; 971 } 972 else if (!(jp->file[1].iop = sfopen(NiL, cp, "r"))) 973 { 974 done(jp); 975 error(ERROR_system(1),"%s: cannot open",cp); 976 } 977 if (jp->buffered) 978 { 979 sfsetbuf(jp->file[0].iop, jp->file[0].iop, SF_UNBOUND); 980 sfsetbuf(jp->file[1].iop, jp->file[1].iop, SF_UNBOUND); 981 } 982 jp->outfile = sfstdout; 983 if (!jp->outlist) 984 jp->nullfield = 0; 985 if (join(jp) < 0) 986 { 987 done(jp); 988 error(ERROR_system(1),"write error"); 989 } 990 else if (jp->file[0].iop==sfstdin || jp->file[1].iop==sfstdin) 991 sfseek(sfstdin,(Sfoff_t)0,SEEK_END); 992 done(jp); 993 return error_info.errors; 994} 995