1/*
2*******************************************************************************
3*   Copyright (C) 2001-2009, International Business Machines
4*   Corporation and others.  All Rights Reserved.
5*******************************************************************************
6*
7* File ucoleitr.cpp
8*
9* Modification History:
10*
11* Date        Name        Description
12* 02/15/2001  synwee      Modified all methods to process its own function
13*                         instead of calling the equivalent c++ api (coleitr.h)
14*******************************************************************************/
15
16#ifndef UCOLEITR_H
17#define UCOLEITR_H
18
19#include "unicode/utypes.h"
20
21#if !UCONFIG_NO_COLLATION
22
23/**
24 * This indicates an error has occured during processing or if no more CEs is
25 * to be returned.
26 * @stable ICU 2.0
27 */
28#define UCOL_NULLORDER        ((int32_t)0xFFFFFFFF)
29
30/**
31 * This indicates an error has occured during processing or there are no more CEs
32 * to be returned.
33 *
34 * @internal
35 */
36#define UCOL_PROCESSED_NULLORDER        ((int64_t)U_INT64_MAX)
37
38#include "unicode/ucol.h"
39
40/**
41 * The UCollationElements struct.
42 * For usage in C programs.
43 * @stable ICU 2.0
44 */
45typedef struct UCollationElements UCollationElements;
46
47/**
48 * \file
49 * \brief C API: UCollationElements
50 *
51 * The UCollationElements API is used as an iterator to walk through each
52 * character of an international string. Use the iterator to return the
53 * ordering priority of the positioned character. The ordering priority of a
54 * character, which we refer to as a key, defines how a character is collated
55 * in the given collation object.
56 * For example, consider the following in Spanish:
57 * <pre>
58 * .       "ca" -> the first key is key('c') and second key is key('a').
59 * .       "cha" -> the first key is key('ch') and second key is key('a').
60 * </pre>
61 * And in German,
62 * <pre>
63 * .       "<ae ligature>b"-> the first key is key('a'), the second key is key('e'), and
64 * .       the third key is key('b').
65 * </pre>
66 * <p>Example of the iterator usage: (without error checking)
67 * <pre>
68 * .  void CollationElementIterator_Example()
69 * .  {
70 * .      UChar *s;
71 * .      t_int32 order, primaryOrder;
72 * .      UCollationElements *c;
73 * .      UCollatorOld *coll;
74 * .      UErrorCode success = U_ZERO_ERROR;
75 * .      s=(UChar*)malloc(sizeof(UChar) * (strlen("This is a test")+1) );
76 * .      u_uastrcpy(s, "This is a test");
77 * .      coll = ucol_open(NULL, &success);
78 * .      c = ucol_openElements(coll, str, u_strlen(str), &status);
79 * .      order = ucol_next(c, &success);
80 * .      ucol_reset(c);
81 * .      order = ucol_prev(c, &success);
82 * .      free(s);
83 * .      ucol_close(coll);
84 * .      ucol_closeElements(c);
85 * .  }
86 * </pre>
87 * <p>
88 * ucol_next() returns the collation order of the next.
89 * ucol_prev() returns the collation order of the previous character.
90 * The Collation Element Iterator moves only in one direction between calls to
91 * ucol_reset. That is, ucol_next() and ucol_prev can not be inter-used.
92 * Whenever ucol_prev is to be called after ucol_next() or vice versa,
93 * ucol_reset has to be called first to reset the status, shifting pointers to
94 * either the end or the start of the string. Hence at the next call of
95 * ucol_prev or ucol_next, the first or last collation order will be returned.
96 * If a change of direction is done without a ucol_reset, the result is
97 * undefined.
98 * The result of a forward iterate (ucol_next) and reversed result of the
99 * backward iterate (ucol_prev) on the same string are equivalent, if
100 * collation orders with the value UCOL_IGNORABLE are ignored.
101 * Character based on the comparison level of the collator.  A collation order
102 * consists of primary order, secondary order and tertiary order.  The data
103 * type of the collation order is <strong>t_int32</strong>.
104 *
105 * @see UCollator
106 */
107
108/**
109 * Open the collation elements for a string.
110 *
111 * @param coll The collator containing the desired collation rules.
112 * @param text The text to iterate over.
113 * @param textLength The number of characters in text, or -1 if null-terminated
114 * @param status A pointer to an UErrorCode to receive any errors.
115 * @return a struct containing collation element information
116 * @stable ICU 2.0
117 */
118U_STABLE UCollationElements* U_EXPORT2
119ucol_openElements(const UCollator  *coll,
120                  const UChar      *text,
121                        int32_t    textLength,
122                        UErrorCode *status);
123
124
125/**
126 * get a hash code for a key... Not very useful!
127 * @param key    the given key.
128 * @param length the size of the key array.
129 * @return       the hash code.
130 * @stable ICU 2.0
131 */
132U_STABLE int32_t U_EXPORT2
133ucol_keyHashCode(const uint8_t* key, int32_t length);
134
135/**
136 * Close a UCollationElements.
137 * Once closed, a UCollationElements may no longer be used.
138 * @param elems The UCollationElements to close.
139 * @stable ICU 2.0
140 */
141U_STABLE void U_EXPORT2
142ucol_closeElements(UCollationElements *elems);
143
144/**
145 * Reset the collation elements to their initial state.
146 * This will move the 'cursor' to the beginning of the text.
147 * Property settings for collation will be reset to the current status.
148 * @param elems The UCollationElements to reset.
149 * @see ucol_next
150 * @see ucol_previous
151 * @stable ICU 2.0
152 */
153U_STABLE void U_EXPORT2
154ucol_reset(UCollationElements *elems);
155
156/**
157 * Set the collation elements to use implicit ordering for Han
158 * even if they've been tailored. This will also force Hangul
159 * syllables to be ordered by decomposing them to their component
160 * Jamo.
161 *
162 * @param elems The UCollationElements containing the text.
163 * @param status A pointer to a UErrorCode to reveive any errors.
164 *
165 * @internal
166 */
167U_INTERNAL void U_EXPORT2
168ucol_forceHanImplicit(UCollationElements *elems, UErrorCode *status);
169
170/**
171 * Get the ordering priority of the next collation element in the text.
172 * A single character may contain more than one collation element.
173 * @param elems The UCollationElements containing the text.
174 * @param status A pointer to an UErrorCode to receive any errors.
175 * @return The next collation elements ordering, otherwise returns NULLORDER
176 *         if an error has occured or if the end of string has been reached
177 * @stable ICU 2.0
178 */
179U_STABLE int32_t U_EXPORT2
180ucol_next(UCollationElements *elems, UErrorCode *status);
181
182/**
183 * Get the ordering priority of the previous collation element in the text.
184 * A single character may contain more than one collation element.
185 * Note that internally a stack is used to store buffered collation elements.
186 * It is very rare that the stack will overflow, however if such a case is
187 * encountered, the problem can be solved by increasing the size
188 * UCOL_EXPAND_CE_BUFFER_SIZE in ucol_imp.h.
189 * @param elems The UCollationElements containing the text.
190 * @param status A pointer to an UErrorCode to receive any errors. Noteably
191 *               a U_BUFFER_OVERFLOW_ERROR is returned if the internal stack
192 *               buffer has been exhausted.
193 * @return The previous collation elements ordering, otherwise returns
194 *         NULLORDER if an error has occured or if the start of string has
195 *         been reached.
196 * @stable ICU 2.0
197 */
198U_STABLE int32_t U_EXPORT2
199ucol_previous(UCollationElements *elems, UErrorCode *status);
200
201/**
202 * Get the processed ordering priority of the next collation element in the text.
203 * A single character may contain more than one collation element.
204 *
205 * @param elems The UCollationElements containing the text.
206 * @param ixLow a pointer to an int32_t to receive the iterator index before fetching the CE.
207 * @param ixHigh a pointer to an int32_t to receive the iterator index after fetching the CE.
208 * @param status A pointer to an UErrorCode to receive any errors.
209 * @return The next collation elements ordering, otherwise returns UCOL_PROCESSED_NULLORDER
210 *         if an error has occured or if the end of string has been reached
211 *
212 * @internal
213 */
214U_INTERNAL int64_t U_EXPORT2
215ucol_nextProcessed(UCollationElements *elems, int32_t *ixLow, int32_t *ixHigh, UErrorCode *status);
216
217/**
218 * Get the processed ordering priority of the previous collation element in the text.
219 * A single character may contain more than one collation element.
220 * Note that internally a stack is used to store buffered collation elements.
221 * It is very rare that the stack will overflow, however if such a case is
222 * encountered, the problem can be solved by increasing the size
223 * UCOL_EXPAND_CE_BUFFER_SIZE in ucol_imp.h.
224 *
225 * @param elems The UCollationElements containing the text.
226 * @param ixLow A pointer to an int32_t to receive the iterator index after fetching the CE
227 * @param ixHigh A pointer to an int32_t to receiver the iterator index before fetching the CE
228 * @param status A pointer to an UErrorCode to receive any errors. Noteably
229 *               a U_BUFFER_OVERFLOW_ERROR is returned if the internal stack
230 *               buffer has been exhausted.
231 * @return The previous collation elements ordering, otherwise returns
232 *         UCOL_PROCESSED_NULLORDER if an error has occured or if the start of
233 *         string has been reached.
234 *
235 * @internal
236 */
237U_INTERNAL int64_t U_EXPORT2
238ucol_previousProcessed(UCollationElements *elems, int32_t *ixLow, int32_t *ixHigh, UErrorCode *status);
239
240/**
241 * Get the maximum length of any expansion sequences that end with the
242 * specified comparison order.
243 * This is useful for .... ?
244 * @param elems The UCollationElements containing the text.
245 * @param order A collation order returned by previous or next.
246 * @return maximum size of the expansion sequences ending with the collation
247 *         element or 1 if collation element does not occur at the end of any
248 *         expansion sequence
249 * @stable ICU 2.0
250 */
251U_STABLE int32_t U_EXPORT2
252ucol_getMaxExpansion(const UCollationElements *elems, int32_t order);
253
254/**
255 * Set the text containing the collation elements.
256 * Property settings for collation will remain the same.
257 * In order to reset the iterator to the current collation property settings,
258 * the API reset() has to be called.
259 * @param elems The UCollationElements to set.
260 * @param text The source text containing the collation elements.
261 * @param textLength The length of text, or -1 if null-terminated.
262 * @param status A pointer to an UErrorCode to receive any errors.
263 * @see ucol_getText
264 * @stable ICU 2.0
265 */
266U_STABLE void U_EXPORT2
267ucol_setText(      UCollationElements *elems,
268             const UChar              *text,
269                   int32_t            textLength,
270                   UErrorCode         *status);
271
272/**
273 * Get the offset of the current source character.
274 * This is an offset into the text of the character containing the current
275 * collation elements.
276 * @param elems The UCollationElements to query.
277 * @return The offset of the current source character.
278 * @see ucol_setOffset
279 * @stable ICU 2.0
280 */
281U_STABLE int32_t U_EXPORT2
282ucol_getOffset(const UCollationElements *elems);
283
284/**
285 * Set the offset of the current source character.
286 * This is an offset into the text of the character to be processed.
287 * Property settings for collation will remain the same.
288 * In order to reset the iterator to the current collation property settings,
289 * the API reset() has to be called.
290 * @param elems The UCollationElements to set.
291 * @param offset The desired character offset.
292 * @param status A pointer to an UErrorCode to receive any errors.
293 * @see ucol_getOffset
294 * @stable ICU 2.0
295 */
296U_STABLE void U_EXPORT2
297ucol_setOffset(UCollationElements *elems,
298               int32_t        offset,
299               UErrorCode         *status);
300
301/**
302* Get the primary order of a collation order.
303* @param order the collation order
304* @return the primary order of a collation order.
305* @stable ICU 2.6
306*/
307U_STABLE int32_t U_EXPORT2
308ucol_primaryOrder (int32_t order);
309
310/**
311* Get the secondary order of a collation order.
312* @param order the collation order
313* @return the secondary order of a collation order.
314* @stable ICU 2.6
315*/
316U_STABLE int32_t U_EXPORT2
317ucol_secondaryOrder (int32_t order);
318
319/**
320* Get the tertiary order of a collation order.
321* @param order the collation order
322* @return the tertiary order of a collation order.
323* @stable ICU 2.6
324*/
325U_STABLE int32_t U_EXPORT2
326ucol_tertiaryOrder (int32_t order);
327
328#endif /* #if !UCONFIG_NO_COLLATION */
329
330#endif
331