1/*
2*******************************************************************************
3*   Copyright (C) 2010-2012, International Business Machines
4*   Corporation and others.  All Rights Reserved.
5*******************************************************************************
6*   file name:  bytestriebuilder.h
7*   encoding:   US-ASCII
8*   tab size:   8 (not used)
9*   indentation:4
10*
11*   created on: 2010sep25
12*   created by: Markus W. Scherer
13*/
14
15/**
16 * \file
17 * \brief C++ API: Builder for icu::BytesTrie
18 */
19
20#ifndef __BYTESTRIEBUILDER_H__
21#define __BYTESTRIEBUILDER_H__
22
23#include "unicode/utypes.h"
24#include "unicode/bytestrie.h"
25#include "unicode/stringpiece.h"
26#include "unicode/stringtriebuilder.h"
27
28U_NAMESPACE_BEGIN
29
30class BytesTrieElement;
31class CharString;
32
33/**
34 * Builder class for BytesTrie.
35 *
36 * This class is not intended for public subclassing.
37 * @stable ICU 4.8
38 */
39class U_COMMON_API BytesTrieBuilder : public StringTrieBuilder {
40public:
41    /**
42     * Constructs an empty builder.
43     * @param errorCode Standard ICU error code.
44     * @stable ICU 4.8
45     */
46    BytesTrieBuilder(UErrorCode &errorCode);
47
48    /**
49     * Destructor.
50     * @stable ICU 4.8
51     */
52    virtual ~BytesTrieBuilder();
53
54    /**
55     * Adds a (byte sequence, value) pair.
56     * The byte sequence must be unique.
57     * The bytes will be copied; the builder does not keep
58     * a reference to the input StringPiece or its data().
59     * @param s The input byte sequence.
60     * @param value The value associated with this byte sequence.
61     * @param errorCode Standard ICU error code. Its input value must
62     *                  pass the U_SUCCESS() test, or else the function returns
63     *                  immediately. Check for U_FAILURE() on output or use with
64     *                  function chaining. (See User Guide for details.)
65     * @return *this
66     * @stable ICU 4.8
67     */
68    BytesTrieBuilder &add(const StringPiece &s, int32_t value, UErrorCode &errorCode);
69
70    /**
71     * Builds a BytesTrie for the add()ed data.
72     * Once built, no further data can be add()ed until clear() is called.
73     *
74     * This method passes ownership of the builder's internal result array to the new trie object.
75     * Another call to any build() variant will re-serialize the trie.
76     * After clear() has been called, a new array will be used as well.
77     * @param buildOption Build option, see UStringTrieBuildOption.
78     * @param errorCode Standard ICU error code. Its input value must
79     *                  pass the U_SUCCESS() test, or else the function returns
80     *                  immediately. Check for U_FAILURE() on output or use with
81     *                  function chaining. (See User Guide for details.)
82     * @return A new BytesTrie for the add()ed data.
83     * @stable ICU 4.8
84     */
85    BytesTrie *build(UStringTrieBuildOption buildOption, UErrorCode &errorCode);
86
87    /**
88     * Builds a BytesTrie for the add()ed data and byte-serializes it.
89     * Once built, no further data can be add()ed until clear() is called.
90     *
91     * Multiple calls to buildStringPiece() return StringPieces referring to the
92     * builder's same byte array, without rebuilding.
93     * If buildStringPiece() is called after build(), the trie will be
94     * re-serialized into a new array.
95     * If build() is called after buildStringPiece(), the trie object will become
96     * the owner of the previously returned array.
97     * After clear() has been called, a new array will be used as well.
98     * @param buildOption Build option, see UStringTrieBuildOption.
99     * @param errorCode Standard ICU error code. Its input value must
100     *                  pass the U_SUCCESS() test, or else the function returns
101     *                  immediately. Check for U_FAILURE() on output or use with
102     *                  function chaining. (See User Guide for details.)
103     * @return A StringPiece which refers to the byte-serialized BytesTrie for the add()ed data.
104     * @stable ICU 4.8
105     */
106    StringPiece buildStringPiece(UStringTrieBuildOption buildOption, UErrorCode &errorCode);
107
108    /**
109     * Removes all (byte sequence, value) pairs.
110     * New data can then be add()ed and a new trie can be built.
111     * @return *this
112     * @stable ICU 4.8
113     */
114    BytesTrieBuilder &clear();
115
116private:
117    BytesTrieBuilder(const BytesTrieBuilder &other);  // no copy constructor
118    BytesTrieBuilder &operator=(const BytesTrieBuilder &other);  // no assignment operator
119
120    void buildBytes(UStringTrieBuildOption buildOption, UErrorCode &errorCode);
121
122    virtual int32_t getElementStringLength(int32_t i) const;
123    virtual UChar getElementUnit(int32_t i, int32_t byteIndex) const;
124    virtual int32_t getElementValue(int32_t i) const;
125
126    virtual int32_t getLimitOfLinearMatch(int32_t first, int32_t last, int32_t byteIndex) const;
127
128    virtual int32_t countElementUnits(int32_t start, int32_t limit, int32_t byteIndex) const;
129    virtual int32_t skipElementsBySomeUnits(int32_t i, int32_t byteIndex, int32_t count) const;
130    virtual int32_t indexOfElementWithNextUnit(int32_t i, int32_t byteIndex, UChar byte) const;
131
132    virtual UBool matchNodesCanHaveValues() const { return FALSE; }
133
134    virtual int32_t getMaxBranchLinearSubNodeLength() const { return BytesTrie::kMaxBranchLinearSubNodeLength; }
135    virtual int32_t getMinLinearMatch() const { return BytesTrie::kMinLinearMatch; }
136    virtual int32_t getMaxLinearMatchLength() const { return BytesTrie::kMaxLinearMatchLength; }
137
138#ifndef U_HIDE_INTERNAL_API
139    /**
140     * @internal
141     */
142    class BTLinearMatchNode : public LinearMatchNode {
143    public:
144        BTLinearMatchNode(const char *units, int32_t len, Node *nextNode);
145        virtual UBool operator==(const Node &other) const;
146        virtual void write(StringTrieBuilder &builder);
147    private:
148        const char *s;
149    };
150#endif  /* U_HIDE_INTERNAL_API */
151
152    virtual Node *createLinearMatchNode(int32_t i, int32_t byteIndex, int32_t length,
153                                        Node *nextNode) const;
154
155    UBool ensureCapacity(int32_t length);
156    virtual int32_t write(int32_t byte);
157    int32_t write(const char *b, int32_t length);
158    virtual int32_t writeElementUnits(int32_t i, int32_t byteIndex, int32_t length);
159    virtual int32_t writeValueAndFinal(int32_t i, UBool isFinal);
160    virtual int32_t writeValueAndType(UBool hasValue, int32_t value, int32_t node);
161    virtual int32_t writeDeltaTo(int32_t jumpTarget);
162
163    CharString *strings;  // Pointer not object so we need not #include internal charstr.h.
164    BytesTrieElement *elements;
165    int32_t elementsCapacity;
166    int32_t elementsLength;
167
168    // Byte serialization of the trie.
169    // Grows from the back: bytesLength measures from the end of the buffer!
170    char *bytes;
171    int32_t bytesCapacity;
172    int32_t bytesLength;
173};
174
175U_NAMESPACE_END
176
177#endif  // __BYTESTRIEBUILDER_H__
178