tr.c revision 144840
11590Srgrimes/* 21590Srgrimes * Copyright (c) 1988, 1993 31590Srgrimes * The Regents of the University of California. All rights reserved. 41590Srgrimes * 51590Srgrimes * Redistribution and use in source and binary forms, with or without 61590Srgrimes * modification, are permitted provided that the following conditions 71590Srgrimes * are met: 81590Srgrimes * 1. Redistributions of source code must retain the above copyright 91590Srgrimes * notice, this list of conditions and the following disclaimer. 101590Srgrimes * 2. Redistributions in binary form must reproduce the above copyright 111590Srgrimes * notice, this list of conditions and the following disclaimer in the 121590Srgrimes * documentation and/or other materials provided with the distribution. 131590Srgrimes * 3. All advertising materials mentioning features or use of this software 141590Srgrimes * must display the following acknowledgement: 151590Srgrimes * This product includes software developed by the University of 161590Srgrimes * California, Berkeley and its contributors. 171590Srgrimes * 4. Neither the name of the University nor the names of its contributors 181590Srgrimes * may be used to endorse or promote products derived from this software 191590Srgrimes * without specific prior written permission. 201590Srgrimes * 211590Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 221590Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 231590Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 241590Srgrimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 251590Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 261590Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 271590Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 281590Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 291590Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 301590Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 311590Srgrimes * SUCH DAMAGE. 321590Srgrimes */ 331590Srgrimes 3487705Smarkm#include <sys/cdefs.h> 3587705Smarkm 3687705Smarkm__FBSDID("$FreeBSD: head/usr.bin/tr/tr.c 144840 2005-04-09 14:31:41Z stefanf $"); 3787705Smarkm 381590Srgrimes#ifndef lint 3928368Scharnierstatic const char copyright[] = 401590Srgrimes"@(#) Copyright (c) 1988, 1993\n\ 411590Srgrimes The Regents of the University of California. All rights reserved.\n"; 4287705Smarkm#endif 431590Srgrimes 441590Srgrimes#ifndef lint 4587705Smarkmstatic const char sccsid[] = "@(#)tr.c 8.2 (Berkeley) 5/4/95"; 4628368Scharnier#endif 471590Srgrimes 481590Srgrimes#include <sys/types.h> 4923693Speter 5098214Stjr#include <ctype.h> 5128368Scharnier#include <err.h> 52131846Stjr#include <limits.h> 5387705Smarkm#include <locale.h> 541590Srgrimes#include <stdio.h> 551590Srgrimes#include <stdlib.h> 561590Srgrimes#include <string.h> 5723693Speter#include <unistd.h> 58131846Stjr#include <wchar.h> 59131846Stjr#include <wctype.h> 6023693Speter 61131846Stjr#include "cmap.h" 62131846Stjr#include "cset.h" 631590Srgrimes#include "extern.h" 641590Srgrimes 65131846StjrSTR s1 = { STRING1, NORMAL, 0, OOBCH, 0, { 0, OOBCH }, NULL, NULL }; 66131846StjrSTR s2 = { STRING2, NORMAL, 0, OOBCH, 0, { 0, OOBCH }, NULL, NULL }; 6798214Stjr 68131846Stjrstatic struct cset *setup(char *, STR *, int, int); 6992922Simpstatic void usage(void); 701590Srgrimes 711590Srgrimesint 72102944Sdwmalonemain(int argc, char **argv) 731590Srgrimes{ 74131846Stjr static int carray[NCHARS_SB]; 75131846Stjr struct cmap *map; 76131846Stjr struct cset *delete, *squeeze; 77131846Stjr int n, *p; 7898214Stjr int Cflag, cflag, dflag, sflag, isstring2; 79144840Sstefanf wint_t ch, cnt, lastch; 801590Srgrimes 8198210Stjr (void)setlocale(LC_ALL, ""); 8211895Sache 8398214Stjr Cflag = cflag = dflag = sflag = 0; 8498214Stjr while ((ch = getopt(argc, argv, "Ccdsu")) != -1) 851590Srgrimes switch((char)ch) { 8698214Stjr case 'C': 8798214Stjr Cflag = 1; 8898214Stjr cflag = 0; 8998214Stjr break; 901590Srgrimes case 'c': 911590Srgrimes cflag = 1; 9298214Stjr Cflag = 0; 931590Srgrimes break; 941590Srgrimes case 'd': 951590Srgrimes dflag = 1; 961590Srgrimes break; 971590Srgrimes case 's': 981590Srgrimes sflag = 1; 991590Srgrimes break; 10030322Shelbig case 'u': 10130322Shelbig setbuf(stdout, (char *)NULL); 10230322Shelbig break; 1031590Srgrimes case '?': 1041590Srgrimes default: 1051590Srgrimes usage(); 1061590Srgrimes } 1071590Srgrimes argc -= optind; 1081590Srgrimes argv += optind; 1091590Srgrimes 1101590Srgrimes switch(argc) { 1111590Srgrimes case 0: 1121590Srgrimes default: 1131590Srgrimes usage(); 1141590Srgrimes /* NOTREACHED */ 1151590Srgrimes case 1: 1161590Srgrimes isstring2 = 0; 1171590Srgrimes break; 1181590Srgrimes case 2: 1191590Srgrimes isstring2 = 1; 1201590Srgrimes break; 1211590Srgrimes } 1221590Srgrimes 1231590Srgrimes /* 12498214Stjr * tr -ds [-Cc] string1 string2 1251590Srgrimes * Delete all characters (or complemented characters) in string1. 1261590Srgrimes * Squeeze all characters in string2. 1271590Srgrimes */ 1281590Srgrimes if (dflag && sflag) { 1291590Srgrimes if (!isstring2) 1301590Srgrimes usage(); 1311590Srgrimes 132131846Stjr delete = setup(argv[0], &s1, cflag, Cflag); 133131846Stjr squeeze = setup(argv[1], &s2, 0, 0); 1348874Srgrimes 135131846Stjr for (lastch = OOBCH; (ch = getwchar()) != WEOF;) 136131846Stjr if (!cset_in(delete, ch) && 137131846Stjr (lastch != ch || !cset_in(squeeze, ch))) { 1381590Srgrimes lastch = ch; 139131846Stjr (void)putwchar(ch); 1401590Srgrimes } 141131855Stjr if (ferror(stdin)) 142131855Stjr err(1, NULL); 1431590Srgrimes exit(0); 1441590Srgrimes } 1451590Srgrimes 1461590Srgrimes /* 14798214Stjr * tr -d [-Cc] string1 1481590Srgrimes * Delete all characters (or complemented characters) in string1. 1491590Srgrimes */ 1501590Srgrimes if (dflag) { 1511590Srgrimes if (isstring2) 1521590Srgrimes usage(); 1531590Srgrimes 154131846Stjr delete = setup(argv[0], &s1, cflag, Cflag); 1551590Srgrimes 156131846Stjr while ((ch = getwchar()) != WEOF) 157131846Stjr if (!cset_in(delete, ch)) 158131846Stjr (void)putwchar(ch); 159131855Stjr if (ferror(stdin)) 160131855Stjr err(1, NULL); 1611590Srgrimes exit(0); 1621590Srgrimes } 1631590Srgrimes 1641590Srgrimes /* 16598214Stjr * tr -s [-Cc] string1 1661590Srgrimes * Squeeze all characters (or complemented characters) in string1. 1671590Srgrimes */ 1681590Srgrimes if (sflag && !isstring2) { 169131846Stjr squeeze = setup(argv[0], &s1, cflag, Cflag); 1701590Srgrimes 171131846Stjr for (lastch = OOBCH; (ch = getwchar()) != WEOF;) 172131846Stjr if (lastch != ch || !cset_in(squeeze, ch)) { 1731590Srgrimes lastch = ch; 174131846Stjr (void)putwchar(ch); 1751590Srgrimes } 176131855Stjr if (ferror(stdin)) 177131855Stjr err(1, NULL); 1781590Srgrimes exit(0); 1791590Srgrimes } 1801590Srgrimes 1811590Srgrimes /* 18298214Stjr * tr [-Ccs] string1 string2 1831590Srgrimes * Replace all characters (or complemented characters) in string1 with 1841590Srgrimes * the character in the same position in string2. If the -s option is 1851590Srgrimes * specified, squeeze all the characters in string2. 1861590Srgrimes */ 1871590Srgrimes if (!isstring2) 1881590Srgrimes usage(); 1891590Srgrimes 190131846Stjr map = cmap_alloc(); 191131846Stjr if (map == NULL) 192131846Stjr err(1, NULL); 193131846Stjr squeeze = cset_alloc(); 194131846Stjr if (squeeze == NULL) 195131846Stjr err(1, NULL); 196131846Stjr 1971590Srgrimes s1.str = argv[0]; 198131846Stjr 199131846Stjr if (Cflag || cflag) { 200131846Stjr cmap_default(map, OOBCH); 201118400Sache if ((s2.str = strdup(argv[1])) == NULL) 202118400Sache errx(1, "strdup(argv[1])"); 203118400Sache } else 204118400Sache s2.str = argv[1]; 2051590Srgrimes 2061590Srgrimes if (!next(&s2)) 20728368Scharnier errx(1, "empty string2"); 2081590Srgrimes 209118371Sache /* 210118371Sache * For -s result will contain only those characters defined 211118371Sache * as the second characters in each of the toupper or tolower 212118371Sache * pairs. 213118371Sache */ 214118371Sache 2151590Srgrimes /* If string2 runs out of characters, use the last one specified. */ 216118371Sache while (next(&s1)) { 217118371Sache again: 218131846Stjr if (s1.state == CCLASS_LOWER && 219131846Stjr s2.state == CCLASS_UPPER && 220118371Sache s1.cnt == 1 && s2.cnt == 1) { 221118371Sache do { 222131846Stjr ch = towupper(s1.lastch); 223131846Stjr cmap_add(map, s1.lastch, ch); 224131846Stjr if (sflag && iswupper(ch)) 225131846Stjr cset_add(squeeze, ch); 226118371Sache if (!next(&s1)) 227118371Sache goto endloop; 228131846Stjr } while (s1.state == CCLASS_LOWER && s1.cnt > 1); 229118371Sache /* skip upper set */ 230118371Sache do { 231118371Sache if (!next(&s2)) 232118371Sache break; 233131846Stjr } while (s2.state == CCLASS_UPPER && s2.cnt > 1); 234118371Sache goto again; 235131846Stjr } else if (s1.state == CCLASS_UPPER && 236131846Stjr s2.state == CCLASS_LOWER && 237118371Sache s1.cnt == 1 && s2.cnt == 1) { 238118371Sache do { 239131846Stjr ch = towlower(s1.lastch); 240131846Stjr cmap_add(map, s1.lastch, ch); 241131846Stjr if (sflag && iswlower(ch)) 242131846Stjr cset_add(squeeze, ch); 243118371Sache if (!next(&s1)) 244118371Sache goto endloop; 245131846Stjr } while (s1.state == CCLASS_UPPER && s1.cnt > 1); 246118371Sache /* skip lower set */ 247118371Sache do { 248118371Sache if (!next(&s2)) 249118371Sache break; 250131846Stjr } while (s2.state == CCLASS_LOWER && s2.cnt > 1); 251118371Sache goto again; 252118371Sache } else { 253131846Stjr cmap_add(map, s1.lastch, s2.lastch); 254118371Sache if (sflag) 255131846Stjr cset_add(squeeze, s2.lastch); 2561590Srgrimes } 257118371Sache (void)next(&s2); 258118371Sache } 259118371Sacheendloop: 260131846Stjr if (cflag || (Cflag && MB_CUR_MAX > 1)) { 261131846Stjr /* 262131846Stjr * This is somewhat tricky: since the character set is 263131846Stjr * potentially huge, we need to avoid allocating a map 264131846Stjr * entry for every character. Our strategy is to set the 265131846Stjr * default mapping to the last character of string #2 266131846Stjr * (= the one that gets automatically repeated), then to 267131846Stjr * add back identity mappings for characters that should 268131846Stjr * remain unchanged. We don't waste space on identity mappings 269131846Stjr * for non-characters with the -C option; those are simulated 270131846Stjr * in the I/O loop. 271131846Stjr */ 272131846Stjr s2.str = argv[1]; 273131846Stjr s2.state = NORMAL; 274131846Stjr for (cnt = 0; cnt < WCHAR_MAX; cnt++) { 275131846Stjr if (Cflag && !iswrune(cnt)) 276131846Stjr continue; 277131846Stjr if (cmap_lookup(map, cnt) == OOBCH) { 278131846Stjr if (next(&s2)) 279131846Stjr cmap_add(map, cnt, s2.lastch); 280131846Stjr if (sflag) 281131846Stjr cset_add(squeeze, s2.lastch); 282131846Stjr } else 283131846Stjr cmap_add(map, cnt, cnt); 284131846Stjr if ((s2.state == EOS || s2.state == INFINITE) && 285131846Stjr cnt >= cmap_max(map)) 286131846Stjr break; 287131846Stjr } 288131846Stjr cmap_default(map, s2.lastch); 289131846Stjr } else if (Cflag) { 290131846Stjr for (p = carray, cnt = 0; cnt < NCHARS_SB; cnt++) { 291131846Stjr if (cmap_lookup(map, cnt) == OOBCH && iswrune(cnt)) 292118399Sache *p++ = cnt; 293118399Sache else 294131846Stjr cmap_add(map, cnt, cnt); 295118399Sache } 296118399Sache n = p - carray; 297118399Sache if (Cflag && n > 1) 298118399Sache (void)mergesort(carray, n, sizeof(*carray), charcoll); 299118399Sache 300100874Stjr s2.str = argv[1]; 301100874Stjr s2.state = NORMAL; 302118399Sache for (cnt = 0; cnt < n; cnt++) { 303118399Sache (void)next(&s2); 304131846Stjr cmap_add(map, carray[cnt], s2.lastch); 305118409Sache /* 306118409Sache * Chars taken from s2 can be different this time 307118409Sache * due to lack of complex upper/lower processing, 308118409Sache * so fill string2 again to not miss some. 309118409Sache */ 310118409Sache if (sflag) 311131846Stjr cset_add(squeeze, s2.lastch); 312100874Stjr } 313100874Stjr } 3141590Srgrimes 315131846Stjr cset_cache(squeeze); 316131846Stjr cmap_cache(map); 317131846Stjr 3181590Srgrimes if (sflag) 319131846Stjr for (lastch = OOBCH; (ch = getwchar()) != WEOF;) { 320131846Stjr if (!Cflag || iswrune(ch)) 321131846Stjr ch = cmap_lookup(map, ch); 322131846Stjr if (lastch != ch || !cset_in(squeeze, ch)) { 3231590Srgrimes lastch = ch; 324131846Stjr (void)putwchar(ch); 3251590Srgrimes } 3261590Srgrimes } 3271590Srgrimes else 328131846Stjr while ((ch = getwchar()) != WEOF) { 329131846Stjr if (!Cflag || iswrune(ch)) 330131846Stjr ch = cmap_lookup(map, ch); 331131846Stjr (void)putwchar(ch); 332131846Stjr } 333131855Stjr if (ferror(stdin)) 334131855Stjr err(1, NULL); 3351590Srgrimes exit (0); 3361590Srgrimes} 3371590Srgrimes 338131846Stjrstatic struct cset * 339131846Stjrsetup(char *arg, STR *str, int cflag, int Cflag) 3401590Srgrimes{ 341131846Stjr struct cset *cs; 3421590Srgrimes 343131846Stjr cs = cset_alloc(); 344131846Stjr if (cs == NULL) 345131846Stjr err(1, NULL); 3461590Srgrimes str->str = arg; 3471590Srgrimes while (next(str)) 348131846Stjr cset_add(cs, str->lastch); 349131846Stjr if (Cflag) 350131846Stjr cset_addclass(cs, wctype("rune"), true); 351131846Stjr if (cflag || Cflag) 352131846Stjr cset_invert(cs); 353131846Stjr cset_cache(cs); 354131846Stjr return (cs); 3551590Srgrimes} 3561590Srgrimes 357118371Sacheint 358100891Stjrcharcoll(const void *a, const void *b) 359100891Stjr{ 360118371Sache static char sa[2], sb[2]; 361100891Stjr 362100891Stjr sa[0] = *(const int *)a; 363100891Stjr sb[0] = *(const int *)b; 364118373Sache return (strcoll(sa, sb)); 365100891Stjr} 366100891Stjr 3671590Srgrimesstatic void 368102944Sdwmaloneusage(void) 3691590Srgrimes{ 37028368Scharnier (void)fprintf(stderr, "%s\n%s\n%s\n%s\n", 37198214Stjr "usage: tr [-Ccsu] string1 string2", 37298214Stjr " tr [-Ccu] -d string1", 37398214Stjr " tr [-Ccu] -s string1", 37498214Stjr " tr [-Ccu] -ds string1 string2"); 3751590Srgrimes exit(1); 3761590Srgrimes} 377