Lines Matching defs:u8c

2985  *   u8c    : pointer to cursor.
2990 int utf8ncursor(struct utf8cursor *u8c, struct tree *tree, const char *s,
2997 u8c->tree = tree;
2998 u8c->s = s;
2999 u8c->p = NULL;
3000 u8c->ss = NULL;
3001 u8c->sp = NULL;
3002 u8c->len = len;
3003 u8c->slen = 0;
3004 u8c->ccc = STOPPER;
3005 u8c->nccc = STOPPER;
3006 u8c->unichar = 0;
3008 if (u8c->len != len)
3020 * u8c : pointer to cursor.
3025 int utf8cursor(struct utf8cursor *u8c, struct tree *tree, const char *s)
3027 return utf8ncursor(u8c, tree, s, (unsigned int)-1);
3031 * Get one byte from the normalized form of the string described by u8c.
3035 * The cursor keeps track of the location in the string in u8c->s.
3037 * u8c->p, and u8c->s is set to the start of the decomposition. Note
3038 * that bytes from a decomposition do not count against u8c->len.
3040 * Characters are emitted if they match the current CCC in u8c->ccc.
3041 * Hitting end-of-string while u8c->ccc == STOPPER means we're done,
3045 * values of u8c->s and u8c->p are stored in u8c->ss and u8c->sp at
3047 * emitted and stores it in u8c->nccc, the second pass emits the
3053 * u8c->p != NULL -> a decomposition is being scanned.
3054 * u8c->ss != NULL -> this is a repeating scan.
3055 * u8c->ccc == -1 -> this is the first scan of a repeating scan.
3057 int utf8byte(struct utf8cursor *u8c)
3064 if (u8c->p && *u8c->s == '\0') {
3065 u8c->s = u8c->p;
3066 u8c->p = NULL;
3070 if (!u8c->p && (u8c->len == 0 || *u8c->s == '\0')) {
3072 if (u8c->ccc == STOPPER)
3077 } else if ((*u8c->s & 0xC0) == 0x80) {
3079 if (!u8c->p)
3080 u8c->len--;
3081 return (unsigned char)*u8c->s++;
3085 if (u8c->p) {
3086 leaf = utf8lookup(u8c->tree, u8c->hangul, u8c->s);
3088 leaf = utf8nlookup(u8c->tree, u8c->hangul,
3089 u8c->s, u8c->len);
3097 if (ages[LEAF_GEN(leaf)] > u8c->tree->maxage) {
3100 u8c->len -= utf8clen(u8c->s);
3101 u8c->p = u8c->s + utf8clen(u8c->s);
3102 u8c->s = LEAF_STR(leaf);
3104 if (*u8c->s == '\0') {
3105 if (u8c->ccc == STOPPER)
3110 leaf = utf8lookup(u8c->tree, u8c->hangul, u8c->s);
3113 u8c->unichar = utf8decode(u8c->s);
3119 if (ccc != STOPPER && u8c->ccc < ccc && ccc < u8c->nccc)
3120 u8c->nccc = ccc;
3126 if (ccc == u8c->ccc) {
3127 if (!u8c->p)
3128 u8c->len--;
3129 return (unsigned char)*u8c->s++;
3134 if (u8c->nccc == STOPPER) {
3140 assert(u8c->ccc == STOPPER);
3141 u8c->ccc = MINCCC - 1;
3142 u8c->nccc = ccc;
3143 u8c->sp = u8c->p;
3144 u8c->ss = u8c->s;
3145 u8c->slen = u8c->len;
3146 if (!u8c->p)
3147 u8c->len -= utf8clen(u8c->s);
3148 u8c->s += utf8clen(u8c->s);
3151 if (!u8c->p)
3152 u8c->len -= utf8clen(u8c->s);
3153 u8c->s += utf8clen(u8c->s);
3154 } else if (u8c->nccc != MAXCCC + 1) {
3156 u8c->ccc = u8c->nccc;
3157 u8c->nccc = MAXCCC + 1;
3158 u8c->s = u8c->ss;
3159 u8c->p = u8c->sp;
3160 u8c->len = u8c->slen;
3163 u8c->ccc = STOPPER;
3164 u8c->nccc = STOPPER;
3165 u8c->sp = NULL;
3166 u8c->ss = NULL;
3167 u8c->slen = 0;
3179 struct utf8cursor u8c;
3184 if (utf8cursor(&u8c, tree, s))
3186 while ((c = utf8byte(&u8c)) > 0)
3199 if (utf8cursor(&u8c, tree, s))
3201 while ((c = utf8byte(&u8c)) > 0)