1/********************************************************************
2 * COPYRIGHT:
3 * Copyright (C) 2002-2006 IBM, Inc.   All Rights Reserved.
4 *
5 ********************************************************************/
6
7/**
8 * This program demos string collation
9 */
10
11const char gHelpString[] =
12    "usage: strsrch [options*] -source source_string -pattern pattern_string\n"
13    "-help            Display this message.\n"
14    "-locale name     ICU locale to use.  Default is en_US\n"
15    "-rules rule      Collation rules file (overrides locale)\n"
16    "-french          French accent ordering\n"
17    "-norm            Normalizing mode on\n"
18    "-shifted         Shifted mode\n"
19    "-lower           Lower case first\n"
20    "-upper           Upper case first\n"
21    "-case            Enable separate case level\n"
22    "-level n         Sort level, 1 to 5, for Primary, Secndary, Tertiary, Quaternary, Identical\n"
23	"-source string   Source string\n"
24	"-pattern string  Pattern string to look for in source\n"
25	"-overlap         Enable searching to be done on overlapping patterns\n"
26	"-canonical       Enable searching to be done matching canonical equivalent patterns"
27    "Example strsrch -rules \\u0026b\\u003ca -source a\\u0020b\\u0020bc -pattern b\n"
28	"The format \\uXXXX is supported for the rules and comparison strings\n"
29	;
30
31#include <stdio.h>
32#include <string.h>
33#include <stdlib.h>
34
35#include <unicode/utypes.h>
36#include <unicode/ucol.h>
37#include <unicode/usearch.h>
38#include <unicode/ustring.h>
39
40/**
41 * Command line option variables
42 *    These global variables are set according to the options specified
43 *    on the command line by the user.
44 */
45char * opt_locale      = "en_US";
46char * opt_rules       = 0;
47UBool  opt_help        = FALSE;
48UBool  opt_norm        = FALSE;
49UBool  opt_french      = FALSE;
50UBool  opt_shifted     = FALSE;
51UBool  opt_lower       = FALSE;
52UBool  opt_upper       = FALSE;
53UBool  opt_case        = FALSE;
54UBool  opt_overlap     = FALSE;
55UBool  opt_canonical   = FALSE;
56int    opt_level       = 0;
57char * opt_source      = "International Components for Unicode";
58char * opt_pattern     = "Unicode";
59UCollator * collator   = 0;
60UStringSearch * search = 0;
61UChar rules[100];
62UChar source[100];
63UChar pattern[100];
64
65/**
66 * Definitions for the command line options
67 */
68struct OptSpec {
69    const char *name;
70    enum {FLAG, NUM, STRING} type;
71    void *pVar;
72};
73
74OptSpec opts[] = {
75    {"-locale",      OptSpec::STRING, &opt_locale},
76    {"-rules",       OptSpec::STRING, &opt_rules},
77	{"-source",      OptSpec::STRING, &opt_source},
78    {"-pattern",     OptSpec::STRING, &opt_pattern},
79    {"-norm",        OptSpec::FLAG,   &opt_norm},
80    {"-french",      OptSpec::FLAG,   &opt_french},
81    {"-shifted",     OptSpec::FLAG,   &opt_shifted},
82    {"-lower",       OptSpec::FLAG,   &opt_lower},
83    {"-upper",       OptSpec::FLAG,   &opt_upper},
84    {"-case",        OptSpec::FLAG,   &opt_case},
85    {"-level",       OptSpec::NUM,    &opt_level},
86	{"-overlap",     OptSpec::FLAG,   &opt_overlap},
87	{"-canonical",   OptSpec::FLAG,   &opt_canonical},
88    {"-help",        OptSpec::FLAG,   &opt_help},
89    {"-?",           OptSpec::FLAG,   &opt_help},
90    {0, OptSpec::FLAG, 0}
91};
92
93/**
94 * processOptions()  Function to read the command line options.
95 */
96UBool processOptions(int argc, const char **argv, OptSpec opts[])
97{
98    for (int argNum = 1; argNum < argc; argNum ++) {
99        const char *pArgName = argv[argNum];
100        OptSpec *pOpt;
101        for (pOpt = opts;  pOpt->name != 0; pOpt ++) {
102            if (strcmp(pOpt->name, pArgName) == 0) {
103                switch (pOpt->type) {
104                case OptSpec::FLAG:
105                    *(UBool *)(pOpt->pVar) = TRUE;
106                    break;
107                case OptSpec::STRING:
108                    argNum ++;
109                    if (argNum >= argc) {
110                        fprintf(stderr, "value expected for \"%s\" option.\n",
111							    pOpt->name);
112                        return FALSE;
113                    }
114                    *(const char **)(pOpt->pVar) = argv[argNum];
115                    break;
116                case OptSpec::NUM:
117                    argNum ++;
118                    if (argNum >= argc) {
119                        fprintf(stderr, "value expected for \"%s\" option.\n",
120							    pOpt->name);
121                        return FALSE;
122                    }
123                    char *endp;
124                    int i = strtol(argv[argNum], &endp, 0);
125                    if (endp == argv[argNum]) {
126                        fprintf(stderr,
127							    "integer value expected for \"%s\" option.\n",
128								pOpt->name);
129                        return FALSE;
130                    }
131                    *(int *)(pOpt->pVar) = i;
132                }
133                break;
134            }
135        }
136        if (pOpt->name == 0)
137        {
138            fprintf(stderr, "Unrecognized option \"%s\"\n", pArgName);
139            return FALSE;
140        }
141    }
142	return TRUE;
143}
144
145/**
146 * Creates a collator
147 */
148UBool processCollator()
149{
150	// Set up an ICU collator
151    UErrorCode status = U_ZERO_ERROR;
152
153    if (opt_rules != 0) {
154		u_unescape(opt_rules, rules, 100);
155        collator = ucol_openRules(rules, -1, UCOL_OFF, UCOL_TERTIARY,
156			                  NULL, &status);
157    }
158    else {
159        collator = ucol_open(opt_locale, &status);
160    }
161	if (U_FAILURE(status)) {
162        fprintf(stderr, "Collator creation failed.: %d\n", status);
163        return FALSE;
164    }
165    if (status == U_USING_DEFAULT_WARNING) {
166        fprintf(stderr, "Warning, U_USING_DEFAULT_WARNING for %s\n",
167			    opt_locale);
168    }
169    if (status == U_USING_FALLBACK_WARNING) {
170        fprintf(stderr, "Warning, U_USING_FALLBACK_ERROR for %s\n",
171			    opt_locale);
172    }
173    if (opt_norm) {
174        ucol_setAttribute(collator, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
175    }
176    if (opt_french) {
177        ucol_setAttribute(collator, UCOL_FRENCH_COLLATION, UCOL_ON, &status);
178    }
179    if (opt_lower) {
180        ucol_setAttribute(collator, UCOL_CASE_FIRST, UCOL_LOWER_FIRST,
181			              &status);
182    }
183    if (opt_upper) {
184        ucol_setAttribute(collator, UCOL_CASE_FIRST, UCOL_UPPER_FIRST,
185			              &status);
186    }
187    if (opt_case) {
188        ucol_setAttribute(collator, UCOL_CASE_LEVEL, UCOL_ON, &status);
189    }
190    if (opt_shifted) {
191        ucol_setAttribute(collator, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED,
192			              &status);
193    }
194    if (opt_level != 0) {
195        switch (opt_level) {
196        case 1:
197            ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_PRIMARY, &status);
198            break;
199        case 2:
200            ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_SECONDARY,
201				              &status);
202            break;
203        case 3:
204            ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_TERTIARY, &status);
205            break;
206        case 4:
207            ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_QUATERNARY,
208				              &status);
209            break;
210        case 5:
211            ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_IDENTICAL,
212				              &status);
213            break;
214        default:
215            fprintf(stderr, "-level param must be between 1 and 5\n");
216            return FALSE;
217        }
218    }
219    if (U_FAILURE(status)) {
220        fprintf(stderr, "Collator attribute setting failed.: %d\n", status);
221        return FALSE;
222    }
223	return TRUE;
224}
225
226/**
227 * Creates a string search
228 */
229UBool processStringSearch()
230{
231	u_unescape(opt_source, source, 100);
232	u_unescape(opt_pattern, pattern, 100);
233	UErrorCode status = U_ZERO_ERROR;
234	search = usearch_openFromCollator(pattern, -1, source, -1, collator, NULL,
235		                              &status);
236	if (U_FAILURE(status)) {
237		return FALSE;
238	}
239	if (opt_overlap == TRUE) {
240		usearch_setAttribute(search, USEARCH_OVERLAP, USEARCH_ON, &status);
241	}
242	if (opt_canonical == TRUE) {
243		usearch_setAttribute(search, USEARCH_CANONICAL_MATCH, USEARCH_ON,
244			                 &status);
245	}
246	if (U_FAILURE(status)) {
247		fprintf(stderr, "Error setting search attributes\n");
248		return FALSE;
249	}
250	return TRUE;
251}
252
253UBool findPattern()
254{
255	UErrorCode status = U_ZERO_ERROR;
256	int32_t offset = usearch_next(search, &status);
257	if (offset == USEARCH_DONE) {
258		fprintf(stdout, "Pattern not found in source\n");
259	}
260	while (offset != USEARCH_DONE) {
261		fprintf(stdout, "Pattern found at offset %d size %d\n", offset,
262				usearch_getMatchedLength(search));
263		offset = usearch_next(search, &status);
264	}
265	if (U_FAILURE(status)) {
266		fprintf(stderr, "Error in searching for pattern %d\n", status);
267		return FALSE;
268	}
269	fprintf(stdout, "End of search\n");
270	return TRUE;
271}
272
273/**
274 * Main   --  process command line, read in and pre-process the test file,
275 *            call other functions to do the actual tests.
276 */
277int main(int argc, const char** argv)
278{
279    if (processOptions(argc, argv, opts) != TRUE || opt_help) {
280        printf(gHelpString);
281        return -1;
282    }
283
284    if (processCollator() != TRUE) {
285		fprintf(stderr, "Error creating collator\n");
286		return -1;
287	}
288
289	if (processStringSearch() != TRUE) {
290		fprintf(stderr, "Error creating string search\n");
291		return -1;
292	}
293
294	fprintf(stdout, "Finding pattern %s in source %s\n", opt_pattern,
295		    opt_source);
296
297	findPattern();
298	ucol_close(collator);
299	usearch_close(search);
300	return 0;
301}
302