1/*
2 * tkTextIndex.c --
3 *
4 *	This module provides procedures that manipulate indices for
5 *	text widgets.
6 *
7 * Copyright (c) 1992-1994 The Regents of the University of California.
8 * Copyright (c) 1994-1997 Sun Microsystems, Inc.
9 *
10 * See the file "license.terms" for information on usage and redistribution
11 * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
12 *
13 * RCS: @(#) $Id: tkTextIndex.c,v 1.6 2002/08/05 04:30:40 dgp Exp $
14 */
15
16#include "default.h"
17#include "tkPort.h"
18#include "tkInt.h"
19#include "tkText.h"
20
21/*
22 * Index to use to select last character in line (very large integer):
23 */
24
25#define LAST_CHAR 1000000
26
27/*
28 * Forward declarations for procedures defined later in this file:
29 */
30
31static CONST char *	ForwBack _ANSI_ARGS_((CONST char *string,
32			    TkTextIndex *indexPtr));
33static CONST char *	StartEnd _ANSI_ARGS_((CONST char *string,
34			    TkTextIndex *indexPtr));
35
36/*
37 *---------------------------------------------------------------------------
38 *
39 * TkTextMakeByteIndex --
40 *
41 *	Given a line index and a byte index, look things up in the B-tree
42 *	and fill in a TkTextIndex structure.
43 *
44 * Results:
45 *	The structure at *indexPtr is filled in with information about the
46 *	character at lineIndex and byteIndex (or the closest existing
47 *	character, if the specified one doesn't exist), and indexPtr is
48 *	returned as result.
49 *
50 * Side effects:
51 *	None.
52 *
53 *---------------------------------------------------------------------------
54 */
55
56TkTextIndex *
57TkTextMakeByteIndex(tree, lineIndex, byteIndex, indexPtr)
58    TkTextBTree tree;		/* Tree that lineIndex and charIndex refer
59				 * to. */
60    int lineIndex;		/* Index of desired line (0 means first
61				 * line of text). */
62    int byteIndex;		/* Byte index of desired character. */
63    TkTextIndex *indexPtr;	/* Structure to fill in. */
64{
65    TkTextSegment *segPtr;
66    int index;
67    CONST char *p, *start;
68    Tcl_UniChar ch;
69
70    indexPtr->tree = tree;
71    if (lineIndex < 0) {
72	lineIndex = 0;
73	byteIndex = 0;
74    }
75    if (byteIndex < 0) {
76	byteIndex = 0;
77    }
78    indexPtr->linePtr = TkBTreeFindLine(tree, lineIndex);
79    if (indexPtr->linePtr == NULL) {
80	indexPtr->linePtr = TkBTreeFindLine(tree, TkBTreeNumLines(tree));
81	byteIndex = 0;
82    }
83    if (byteIndex == 0) {
84	indexPtr->byteIndex = byteIndex;
85	return indexPtr;
86    }
87
88    /*
89     * Verify that the index is within the range of the line and points
90     * to a valid character boundary.
91     */
92
93    index = 0;
94    for (segPtr = indexPtr->linePtr->segPtr; ; segPtr = segPtr->nextPtr) {
95	if (segPtr == NULL) {
96	    /*
97	     * Use the index of the last character in the line.  Since
98	     * the last character on the line is guaranteed to be a '\n',
99	     * we can back up a constant sizeof(char) bytes.
100	     */
101
102	    indexPtr->byteIndex = index - sizeof(char);
103	    break;
104	}
105	if (index + segPtr->size > byteIndex) {
106	    indexPtr->byteIndex = byteIndex;
107	    if ((byteIndex > index) && (segPtr->typePtr == &tkTextCharType)) {
108		/*
109		 * Prevent UTF-8 character from being split up by ensuring
110		 * that byteIndex falls on a character boundary.  If index
111		 * falls in the middle of a UTF-8 character, it will be
112		 * adjusted to the end of that UTF-8 character.
113		 */
114
115		start = segPtr->body.chars + (byteIndex - index);
116		p = Tcl_UtfPrev(start, segPtr->body.chars);
117		p += Tcl_UtfToUniChar(p, &ch);
118		indexPtr->byteIndex += p - start;
119	    }
120	    break;
121	}
122	index += segPtr->size;
123    }
124    return indexPtr;
125}
126
127/*
128 *---------------------------------------------------------------------------
129 *
130 * TkTextMakeCharIndex --
131 *
132 *	Given a line index and a character index, look things up in the
133 *	B-tree and fill in a TkTextIndex structure.
134 *
135 * Results:
136 *	The structure at *indexPtr is filled in with information about the
137 *	character at lineIndex and charIndex (or the closest existing
138 *	character, if the specified one doesn't exist), and indexPtr is
139 *	returned as result.
140 *
141 * Side effects:
142 *	None.
143 *
144 *---------------------------------------------------------------------------
145 */
146
147TkTextIndex *
148TkTextMakeCharIndex(tree, lineIndex, charIndex, indexPtr)
149    TkTextBTree tree;		/* Tree that lineIndex and charIndex refer
150				 * to. */
151    int lineIndex;		/* Index of desired line (0 means first
152				 * line of text). */
153    int charIndex;		/* Index of desired character. */
154    TkTextIndex *indexPtr;	/* Structure to fill in. */
155{
156    register TkTextSegment *segPtr;
157    char *p, *start, *end;
158    int index, offset;
159    Tcl_UniChar ch;
160
161    indexPtr->tree = tree;
162    if (lineIndex < 0) {
163	lineIndex = 0;
164	charIndex = 0;
165    }
166    if (charIndex < 0) {
167	charIndex = 0;
168    }
169    indexPtr->linePtr = TkBTreeFindLine(tree, lineIndex);
170    if (indexPtr->linePtr == NULL) {
171	indexPtr->linePtr = TkBTreeFindLine(tree, TkBTreeNumLines(tree));
172	charIndex = 0;
173    }
174
175    /*
176     * Verify that the index is within the range of the line.
177     * If not, just use the index of the last character in the line.
178     */
179
180    index = 0;
181    for (segPtr = indexPtr->linePtr->segPtr; ; segPtr = segPtr->nextPtr) {
182	if (segPtr == NULL) {
183	    /*
184	     * Use the index of the last character in the line.  Since
185	     * the last character on the line is guaranteed to be a '\n',
186	     * we can back up a constant sizeof(char) bytes.
187	     */
188
189	    indexPtr->byteIndex = index - sizeof(char);
190	    break;
191	}
192	if (segPtr->typePtr == &tkTextCharType) {
193	    /*
194	     * Turn character offset into a byte offset.
195	     */
196
197	    start = segPtr->body.chars;
198	    end = start + segPtr->size;
199	    for (p = start; p < end; p += offset) {
200		if (charIndex == 0) {
201		    indexPtr->byteIndex = index;
202		    return indexPtr;
203		}
204		charIndex--;
205		offset = Tcl_UtfToUniChar(p, &ch);
206		index += offset;
207	    }
208	} else {
209	    if (charIndex < segPtr->size) {
210		indexPtr->byteIndex = index;
211		break;
212	    }
213	    charIndex -= segPtr->size;
214	    index += segPtr->size;
215	}
216    }
217    return indexPtr;
218}
219
220/*
221 *---------------------------------------------------------------------------
222 *
223 * TkTextIndexToSeg --
224 *
225 *	Given an index, this procedure returns the segment and offset
226 *	within segment for the index.
227 *
228 * Results:
229 *	The return value is a pointer to the segment referred to by
230 *	indexPtr; this will always be a segment with non-zero size.  The
231 *	variable at *offsetPtr is set to hold the integer offset within
232 *	the segment of the character given by indexPtr.
233 *
234 * Side effects:
235 *	None.
236 *
237 *---------------------------------------------------------------------------
238 */
239
240TkTextSegment *
241TkTextIndexToSeg(indexPtr, offsetPtr)
242    CONST TkTextIndex *indexPtr;/* Text index. */
243    int *offsetPtr;		/* Where to store offset within segment, or
244				 * NULL if offset isn't wanted. */
245{
246    TkTextSegment *segPtr;
247    int offset;
248
249    for (offset = indexPtr->byteIndex, segPtr = indexPtr->linePtr->segPtr;
250	    offset >= segPtr->size;
251	    offset -= segPtr->size, segPtr = segPtr->nextPtr) {
252	/* Empty loop body. */
253    }
254    if (offsetPtr != NULL) {
255	*offsetPtr = offset;
256    }
257    return segPtr;
258}
259
260/*
261 *---------------------------------------------------------------------------
262 *
263 * TkTextSegToOffset --
264 *
265 *	Given a segment pointer and the line containing it, this procedure
266 *	returns the offset of the segment within its line.
267 *
268 * Results:
269 *	The return value is the offset (within its line) of the first
270 *	character in segPtr.
271 *
272 * Side effects:
273 *	None.
274 *
275 *---------------------------------------------------------------------------
276 */
277
278int
279TkTextSegToOffset(segPtr, linePtr)
280    CONST TkTextSegment *segPtr;/* Segment whose offset is desired. */
281    CONST TkTextLine *linePtr;	/* Line containing segPtr. */
282{
283    CONST TkTextSegment *segPtr2;
284    int offset;
285
286    offset = 0;
287    for (segPtr2 = linePtr->segPtr; segPtr2 != segPtr;
288	    segPtr2 = segPtr2->nextPtr) {
289	offset += segPtr2->size;
290    }
291    return offset;
292}
293
294/*
295 *---------------------------------------------------------------------------
296 *
297 * TkTextGetIndex --
298 *
299 *	Given a string, return the index that is described.
300 *
301 * Results:
302 *	The return value is a standard Tcl return result.  If TCL_OK is
303 *	returned, then everything went well and the index at *indexPtr is
304 *	filled in; otherwise TCL_ERROR is returned and an error message
305 *	is left in the interp's result.
306 *
307 * Side effects:
308 *	None.
309 *
310 *---------------------------------------------------------------------------
311 */
312
313int
314TkTextGetIndex(interp, textPtr, string, indexPtr)
315    Tcl_Interp *interp;		/* Use this for error reporting. */
316    TkText *textPtr;		/* Information about text widget. */
317    CONST char *string;		/* Textual description of position. */
318    TkTextIndex *indexPtr;	/* Index structure to fill in. */
319{
320    char *p, *end, *endOfBase;
321    Tcl_HashEntry *hPtr;
322    TkTextTag *tagPtr;
323    TkTextSearch search;
324    TkTextIndex first, last;
325    int wantLast, result;
326    char c;
327    CONST char *cp;
328    Tcl_DString copy;
329
330    /*
331     *---------------------------------------------------------------------
332     * Stage 1: check to see if the index consists of nothing but a mark
333     * name.  We do this check now even though it's also done later, in
334     * order to allow mark names that include funny characters such as
335     * spaces or "+1c".
336     *---------------------------------------------------------------------
337     */
338
339    if (TkTextMarkNameToIndex(textPtr, string, indexPtr) == TCL_OK) {
340	return TCL_OK;
341    }
342
343    /*
344     *------------------------------------------------
345     * Stage 2: start again by parsing the base index.
346     *------------------------------------------------
347     */
348
349    indexPtr->tree = textPtr->tree;
350
351    /*
352     * First look for the form "tag.first" or "tag.last" where "tag"
353     * is the name of a valid tag.  Try to use up as much as possible
354     * of the string in this check (strrchr instead of strchr below).
355     * Doing the check now, and in this way, allows tag names to include
356     * funny characters like "@" or "+1c".
357     */
358
359    Tcl_DStringInit(&copy);
360    p = strrchr(Tcl_DStringAppend(&copy, string, -1), '.');
361    if (p != NULL) {
362	if ((p[1] == 'f') && (strncmp(p+1, "first", 5) == 0)) {
363	    wantLast = 0;
364	    endOfBase = p+6;
365	} else if ((p[1] == 'l') && (strncmp(p+1, "last", 4) == 0)) {
366	    wantLast = 1;
367	    endOfBase = p+5;
368	} else {
369	    goto tryxy;
370	}
371	*p = 0;
372	hPtr = Tcl_FindHashEntry(&textPtr->tagTable, Tcl_DStringValue(&copy));
373	*p = '.';
374	if (hPtr == NULL) {
375	    goto tryxy;
376	}
377	tagPtr = (TkTextTag *) Tcl_GetHashValue(hPtr);
378	TkTextMakeByteIndex(textPtr->tree, 0, 0, &first);
379	TkTextMakeByteIndex(textPtr->tree, TkBTreeNumLines(textPtr->tree), 0,
380		&last);
381	TkBTreeStartSearch(&first, &last, tagPtr, &search);
382	if (!TkBTreeCharTagged(&first, tagPtr) && !TkBTreeNextTag(&search)) {
383	    Tcl_ResetResult(interp);
384	    Tcl_AppendResult(interp,
385		    "text doesn't contain any characters tagged with \"",
386		    Tcl_GetHashKey(&textPtr->tagTable, hPtr), "\"",
387			    (char *) NULL);
388	    Tcl_DStringFree(&copy);
389	    return TCL_ERROR;
390	}
391	*indexPtr = search.curIndex;
392	if (wantLast) {
393	    while (TkBTreeNextTag(&search)) {
394		*indexPtr = search.curIndex;
395	    }
396	}
397	goto gotBase;
398    }
399
400    tryxy:
401    if (string[0] == '@') {
402	/*
403	 * Find character at a given x,y location in the window.
404	 */
405
406	int x, y;
407
408	cp = string+1;
409	x = strtol(cp, &end, 0);
410	if ((end == cp) || (*end != ',')) {
411	    goto error;
412	}
413	cp = end+1;
414	y = strtol(cp, &end, 0);
415	if (end == cp) {
416	    goto error;
417	}
418	TkTextPixelIndex(textPtr, x, y, indexPtr);
419	endOfBase = end;
420	goto gotBase;
421    }
422
423    if (isdigit(UCHAR(string[0])) || (string[0] == '-')) {
424	int lineIndex, charIndex;
425
426	/*
427	 * Base is identified with line and character indices.
428	 */
429
430	lineIndex = strtol(string, &end, 0) - 1;
431	if ((end == string) || (*end != '.')) {
432	    goto error;
433	}
434	p = end+1;
435	if ((*p == 'e') && (strncmp(p, "end", 3) == 0)) {
436	    charIndex = LAST_CHAR;
437	    endOfBase = p+3;
438	} else {
439	    charIndex = strtol(p, &end, 0);
440	    if (end == p) {
441		goto error;
442	    }
443	    endOfBase = end;
444	}
445	TkTextMakeCharIndex(textPtr->tree, lineIndex, charIndex, indexPtr);
446	goto gotBase;
447    }
448
449    for (p = Tcl_DStringValue(&copy); *p != 0; p++) {
450	if (isspace(UCHAR(*p)) || (*p == '+') || (*p == '-')) {
451	    break;
452	}
453    }
454    endOfBase = p;
455    if (string[0] == '.') {
456	/*
457	 * See if the base position is the name of an embedded window.
458	 */
459
460	c = *endOfBase;
461	*endOfBase = 0;
462	result = TkTextWindowIndex(textPtr, Tcl_DStringValue(&copy), indexPtr);
463	*endOfBase = c;
464	if (result != 0) {
465	    goto gotBase;
466	}
467    }
468    if ((string[0] == 'e')
469	    && (strncmp(string, "end",
470	    (size_t) (endOfBase-Tcl_DStringValue(&copy))) == 0)) {
471	/*
472	 * Base position is end of text.
473	 */
474
475	TkTextMakeByteIndex(textPtr->tree, TkBTreeNumLines(textPtr->tree),
476		0, indexPtr);
477	goto gotBase;
478    } else {
479	/*
480	 * See if the base position is the name of a mark.
481	 */
482
483	c = *endOfBase;
484	*endOfBase = 0;
485	result = TkTextMarkNameToIndex(textPtr, Tcl_DStringValue(&copy),
486		indexPtr);
487	*endOfBase = c;
488	if (result == TCL_OK) {
489	    goto gotBase;
490	}
491
492	/*
493	 * See if the base position is the name of an embedded image
494	 */
495
496	c = *endOfBase;
497	*endOfBase = 0;
498	result = TkTextImageIndex(textPtr, Tcl_DStringValue(&copy), indexPtr);
499	*endOfBase = c;
500	if (result != 0) {
501	    goto gotBase;
502	}
503    }
504    goto error;
505
506    /*
507     *-------------------------------------------------------------------
508     * Stage 3: process zero or more modifiers.  Each modifier is either
509     * a keyword like "wordend" or "linestart", or it has the form
510     * "op count units" where op is + or -, count is a number, and units
511     * is "chars" or "lines".
512     *-------------------------------------------------------------------
513     */
514
515    gotBase:
516    cp = endOfBase;
517    while (1) {
518	while (isspace(UCHAR(*cp))) {
519	    cp++;
520	}
521	if (*cp == 0) {
522	    break;
523	}
524
525	if ((*cp == '+') || (*cp == '-')) {
526	    cp = ForwBack(cp, indexPtr);
527	} else {
528	    cp = StartEnd(cp, indexPtr);
529	}
530	if (cp == NULL) {
531	    goto error;
532	}
533    }
534    Tcl_DStringFree(&copy);
535    return TCL_OK;
536
537    error:
538    Tcl_DStringFree(&copy);
539    Tcl_ResetResult(interp);
540    Tcl_AppendResult(interp, "bad text index \"", string, "\"",
541	    (char *) NULL);
542    return TCL_ERROR;
543}
544
545/*
546 *---------------------------------------------------------------------------
547 *
548 * TkTextPrintIndex --
549 *
550 *	This procedure generates a string description of an index, suitable
551 *	for reading in again later.
552 *
553 * Results:
554 *	The characters pointed to by string are modified.
555 *
556 * Side effects:
557 *	None.
558 *
559 *---------------------------------------------------------------------------
560 */
561
562void
563TkTextPrintIndex(indexPtr, string)
564    CONST TkTextIndex *indexPtr;/* Pointer to index. */
565    char *string;		/* Place to store the position.  Must have
566				 * at least TK_POS_CHARS characters. */
567{
568    TkTextSegment *segPtr;
569    int numBytes, charIndex;
570
571    numBytes = indexPtr->byteIndex;
572    charIndex = 0;
573    for (segPtr = indexPtr->linePtr->segPtr; ; segPtr = segPtr->nextPtr) {
574	if (numBytes <= segPtr->size) {
575	    break;
576	}
577	if (segPtr->typePtr == &tkTextCharType) {
578	    charIndex += Tcl_NumUtfChars(segPtr->body.chars, segPtr->size);
579	} else {
580	    charIndex += segPtr->size;
581	}
582	numBytes -= segPtr->size;
583    }
584    if (segPtr->typePtr == &tkTextCharType) {
585	charIndex += Tcl_NumUtfChars(segPtr->body.chars, numBytes);
586    } else {
587	charIndex += numBytes;
588    }
589    sprintf(string, "%d.%d", TkBTreeLineIndex(indexPtr->linePtr) + 1,
590	    charIndex);
591}
592
593/*
594 *---------------------------------------------------------------------------
595 *
596 * TkTextIndexCmp --
597 *
598 *	Compare two indices to see which one is earlier in the text.
599 *
600 * Results:
601 *	The return value is 0 if index1Ptr and index2Ptr refer to the same
602 *	position in the file, -1 if index1Ptr refers to an earlier position
603 *	than index2Ptr, and 1 otherwise.
604 *
605 * Side effects:
606 *	None.
607 *
608 *---------------------------------------------------------------------------
609 */
610
611int
612TkTextIndexCmp(index1Ptr, index2Ptr)
613    CONST TkTextIndex *index1Ptr;		/* First index. */
614    CONST TkTextIndex *index2Ptr;		/* Second index. */
615{
616    int line1, line2;
617
618    if (index1Ptr->linePtr == index2Ptr->linePtr) {
619	if (index1Ptr->byteIndex < index2Ptr->byteIndex) {
620	    return -1;
621	} else if (index1Ptr->byteIndex > index2Ptr->byteIndex) {
622	    return 1;
623	} else {
624	    return 0;
625	}
626    }
627    line1 = TkBTreeLineIndex(index1Ptr->linePtr);
628    line2 = TkBTreeLineIndex(index2Ptr->linePtr);
629    if (line1 < line2) {
630	return -1;
631    }
632    if (line1 > line2) {
633	return 1;
634    }
635    return 0;
636}
637
638/*
639 *---------------------------------------------------------------------------
640 *
641 * ForwBack --
642 *
643 *	This procedure handles +/- modifiers for indices to adjust the
644 *	index forwards or backwards.
645 *
646 * Results:
647 *	If the modifier in string is successfully parsed then the return
648 *	value is the address of the first character after the modifier,
649 *	and *indexPtr is updated to reflect the modifier.  If there is a
650 *	syntax error in the modifier then NULL is returned.
651 *
652 * Side effects:
653 *	None.
654 *
655 *---------------------------------------------------------------------------
656 */
657
658static CONST char *
659ForwBack(string, indexPtr)
660    CONST char *string;		/* String to parse for additional info
661				 * about modifier (count and units).
662				 * Points to "+" or "-" that starts
663				 * modifier. */
664    TkTextIndex *indexPtr;	/* Index to update as specified in string. */
665{
666    register CONST char *p, *units;
667    char *end;
668    int count, lineIndex;
669    size_t length;
670
671    /*
672     * Get the count (how many units forward or backward).
673     */
674
675    p = string+1;
676    while (isspace(UCHAR(*p))) {
677	p++;
678    }
679    count = strtol(p, &end, 0);
680    if (end == p) {
681	return NULL;
682    }
683    p = end;
684    while (isspace(UCHAR(*p))) {
685	p++;
686    }
687
688    /*
689     * Find the end of this modifier (next space or + or - character),
690     * then parse the unit specifier and update the position
691     * accordingly.
692     */
693
694    units = p;
695    while ((*p != '\0') && !isspace(UCHAR(*p)) && (*p != '+') && (*p != '-')) {
696	p++;
697    }
698    length = p - units;
699    if ((*units == 'c') && (strncmp(units, "chars", length) == 0)) {
700	if (*string == '+') {
701	    TkTextIndexForwChars(indexPtr, count, indexPtr);
702	} else {
703	    TkTextIndexBackChars(indexPtr, count, indexPtr);
704	}
705    } else if ((*units == 'l') && (strncmp(units, "lines", length) == 0)) {
706	lineIndex = TkBTreeLineIndex(indexPtr->linePtr);
707	if (*string == '+') {
708	    lineIndex += count;
709	} else {
710	    lineIndex -= count;
711
712	    /*
713	     * The check below retains the character position, even
714	     * if the line runs off the start of the file.  Without
715	     * it, the character position will get reset to 0 by
716	     * TkTextMakeIndex.
717	     */
718
719	    if (lineIndex < 0) {
720		lineIndex = 0;
721	    }
722	}
723	/*
724	 * This doesn't work quite right if using a proportional font or
725	 * UTF-8 characters with varying numbers of bytes.  The cursor will
726	 * bop around, keeping a constant number of bytes (not characters)
727	 * from the left edge (but making sure not to split any UTF-8
728	 * characters), regardless of the x-position the index corresponds
729	 * to.  The proper way to do this is to get the x-position of the
730	 * index and then pick the character at the same x-position in the
731	 * new line.
732	 */
733
734	TkTextMakeByteIndex(indexPtr->tree, lineIndex, indexPtr->byteIndex,
735		indexPtr);
736    } else {
737	return NULL;
738    }
739    return p;
740}
741
742/*
743 *---------------------------------------------------------------------------
744 *
745 * TkTextIndexForwBytes --
746 *
747 *	Given an index for a text widget, this procedure creates a new
748 *	index that points "count" bytes ahead of the source index.
749 *
750 * Results:
751 *	*dstPtr is modified to refer to the character "count" bytes after
752 *	srcPtr, or to the last character in the TkText if there aren't
753 *	"count" bytes left.
754 *
755 * Side effects:
756 *	None.
757 *
758 *---------------------------------------------------------------------------
759 */
760
761void
762TkTextIndexForwBytes(srcPtr, byteCount, dstPtr)
763    CONST TkTextIndex *srcPtr;	/* Source index. */
764    int byteCount;		/* How many bytes forward to move.  May be
765				 * negative. */
766    TkTextIndex *dstPtr;	/* Destination index: gets modified. */
767{
768    TkTextLine *linePtr;
769    TkTextSegment *segPtr;
770    int lineLength;
771
772    if (byteCount < 0) {
773	TkTextIndexBackBytes(srcPtr, -byteCount, dstPtr);
774	return;
775    }
776
777    *dstPtr = *srcPtr;
778    dstPtr->byteIndex += byteCount;
779    while (1) {
780	/*
781	 * Compute the length of the current line.
782	 */
783
784	lineLength = 0;
785	for (segPtr = dstPtr->linePtr->segPtr; segPtr != NULL;
786		segPtr = segPtr->nextPtr) {
787	    lineLength += segPtr->size;
788	}
789
790	/*
791	 * If the new index is in the same line then we're done.
792	 * Otherwise go on to the next line.
793	 */
794
795	if (dstPtr->byteIndex < lineLength) {
796	    return;
797	}
798	dstPtr->byteIndex -= lineLength;
799	linePtr = TkBTreeNextLine(dstPtr->linePtr);
800	if (linePtr == NULL) {
801	    dstPtr->byteIndex = lineLength - 1;
802	    return;
803	}
804	dstPtr->linePtr = linePtr;
805    }
806}
807
808/*
809 *---------------------------------------------------------------------------
810 *
811 * TkTextIndexForwChars --
812 *
813 *	Given an index for a text widget, this procedure creates a new
814 *	index that points "count" characters ahead of the source index.
815 *
816 * Results:
817 *	*dstPtr is modified to refer to the character "count" characters
818 *	after srcPtr, or to the last character in the TkText if there
819 *	aren't "count" characters left in the file.
820 *
821 * Side effects:
822 *	None.
823 *
824 *---------------------------------------------------------------------------
825 */
826
827void
828TkTextIndexForwChars(srcPtr, charCount, dstPtr)
829    CONST TkTextIndex *srcPtr;	/* Source index. */
830    int charCount;		/* How many characters forward to move.
831				 * May be negative. */
832    TkTextIndex *dstPtr;	/* Destination index: gets modified. */
833{
834    TkTextLine *linePtr;
835    TkTextSegment *segPtr;
836    int byteOffset;
837    char *start, *end, *p;
838    Tcl_UniChar ch;
839
840    if (charCount < 0) {
841	TkTextIndexBackChars(srcPtr, -charCount, dstPtr);
842	return;
843    }
844
845    *dstPtr = *srcPtr;
846
847    /*
848     * Find seg that contains src byteIndex.
849     * Move forward specified number of chars.
850     */
851
852    segPtr = TkTextIndexToSeg(dstPtr, &byteOffset);
853    while (1) {
854	/*
855	 * Go through each segment in line looking for specified character
856	 * index.
857	 */
858
859	for ( ; segPtr != NULL; segPtr = segPtr->nextPtr) {
860	    if (segPtr->typePtr == &tkTextCharType) {
861		start = segPtr->body.chars + byteOffset;
862		end = segPtr->body.chars + segPtr->size;
863		for (p = start; p < end; p += Tcl_UtfToUniChar(p, &ch)) {
864		    if (charCount == 0) {
865			dstPtr->byteIndex += (p - start);
866			return;
867		    }
868		    charCount--;
869		}
870	    } else {
871		if (charCount < segPtr->size - byteOffset) {
872		    dstPtr->byteIndex += charCount;
873		    return;
874		}
875		charCount -= segPtr->size - byteOffset;
876	    }
877	    dstPtr->byteIndex += segPtr->size - byteOffset;
878	    byteOffset = 0;
879	}
880
881	/*
882	 * Go to the next line.  If we are at the end of the text item,
883	 * back up one byte (for the terminal '\n' character) and return
884	 * that index.
885	 */
886
887	linePtr = TkBTreeNextLine(dstPtr->linePtr);
888	if (linePtr == NULL) {
889	    dstPtr->byteIndex -= sizeof(char);
890	    return;
891	}
892	dstPtr->linePtr = linePtr;
893	dstPtr->byteIndex = 0;
894	segPtr = dstPtr->linePtr->segPtr;
895    }
896}
897
898/*
899 *---------------------------------------------------------------------------
900 *
901 * TkTextIndexBackBytes --
902 *
903 *	Given an index for a text widget, this procedure creates a new
904 *	index that points "count" bytes earlier than the source index.
905 *
906 * Results:
907 *	*dstPtr is modified to refer to the character "count" bytes before
908 *	srcPtr, or to the first character in the TkText if there aren't
909 *	"count" bytes earlier than srcPtr.
910 *
911 * Side effects:
912 *	None.
913 *
914 *---------------------------------------------------------------------------
915 */
916
917void
918TkTextIndexBackBytes(srcPtr, byteCount, dstPtr)
919    CONST TkTextIndex *srcPtr;	/* Source index. */
920    int byteCount;		/* How many bytes backward to move.  May be
921				 * negative. */
922    TkTextIndex *dstPtr;	/* Destination index: gets modified. */
923{
924    TkTextSegment *segPtr;
925    int lineIndex;
926
927    if (byteCount < 0) {
928	TkTextIndexForwBytes(srcPtr, -byteCount, dstPtr);
929	return;
930    }
931
932    *dstPtr = *srcPtr;
933    dstPtr->byteIndex -= byteCount;
934    lineIndex = -1;
935    while (dstPtr->byteIndex < 0) {
936	/*
937	 * Move back one line in the text.  If we run off the beginning
938	 * of the file then just return the first character in the text.
939	 */
940
941	if (lineIndex < 0) {
942	    lineIndex = TkBTreeLineIndex(dstPtr->linePtr);
943	}
944	if (lineIndex == 0) {
945	    dstPtr->byteIndex = 0;
946	    return;
947	}
948	lineIndex--;
949	dstPtr->linePtr = TkBTreeFindLine(dstPtr->tree, lineIndex);
950
951	/*
952	 * Compute the length of the line and add that to dstPtr->charIndex.
953	 */
954
955	for (segPtr = dstPtr->linePtr->segPtr; segPtr != NULL;
956		segPtr = segPtr->nextPtr) {
957	    dstPtr->byteIndex += segPtr->size;
958	}
959    }
960}
961
962/*
963 *---------------------------------------------------------------------------
964 *
965 * TkTextIndexBackChars --
966 *
967 *	Given an index for a text widget, this procedure creates a new
968 *	index that points "count" characters earlier than the source index.
969 *
970 * Results:
971 *	*dstPtr is modified to refer to the character "count" characters
972 *	before srcPtr, or to the first character in the file if there
973 *	aren't "count" characters earlier than srcPtr.
974 *
975 * Side effects:
976 *	None.
977 *
978 *---------------------------------------------------------------------------
979 */
980
981void
982TkTextIndexBackChars(srcPtr, charCount, dstPtr)
983    CONST TkTextIndex *srcPtr;	/* Source index. */
984    int charCount;		/* How many characters backward to move.
985				 * May be negative. */
986    TkTextIndex *dstPtr;	/* Destination index: gets modified. */
987{
988    TkTextSegment *segPtr, *oldPtr;
989    int lineIndex, segSize;
990    CONST char *p, *start, *end;
991
992    if (charCount <= 0) {
993	TkTextIndexForwChars(srcPtr, -charCount, dstPtr);
994	return;
995    }
996
997    *dstPtr = *srcPtr;
998
999    /*
1000     * Find offset within seg that contains byteIndex.
1001     * Move backward specified number of chars.
1002     */
1003
1004    lineIndex = -1;
1005
1006    segSize = dstPtr->byteIndex;
1007    for (segPtr = dstPtr->linePtr->segPtr; ; segPtr = segPtr->nextPtr) {
1008	if (segSize <= segPtr->size) {
1009	    break;
1010	}
1011	segSize -= segPtr->size;
1012    }
1013    while (1) {
1014	if (segPtr->typePtr == &tkTextCharType) {
1015	    start = segPtr->body.chars;
1016	    end = segPtr->body.chars + segSize;
1017	    for (p = end; ; p = Tcl_UtfPrev(p, start)) {
1018		if (charCount == 0) {
1019		    dstPtr->byteIndex -= (end - p);
1020		    return;
1021		}
1022		if (p == start) {
1023		    break;
1024		}
1025		charCount--;
1026	    }
1027	} else {
1028	    if (charCount <= segSize) {
1029		dstPtr->byteIndex -= charCount;
1030		return;
1031	    }
1032	    charCount -= segSize;
1033	}
1034	dstPtr->byteIndex -= segSize;
1035
1036	/*
1037	 * Move back into previous segment.
1038	 */
1039
1040	oldPtr = segPtr;
1041	segPtr = dstPtr->linePtr->segPtr;
1042	if (segPtr != oldPtr) {
1043	    for ( ; segPtr->nextPtr != oldPtr; segPtr = segPtr->nextPtr) {
1044		/* Empty body. */
1045	    }
1046	    segSize = segPtr->size;
1047	    continue;
1048	}
1049
1050	/*
1051	 * Move back to previous line.
1052	 */
1053
1054	if (lineIndex < 0) {
1055	    lineIndex = TkBTreeLineIndex(dstPtr->linePtr);
1056	}
1057	if (lineIndex == 0) {
1058	    dstPtr->byteIndex = 0;
1059	    return;
1060	}
1061	lineIndex--;
1062	dstPtr->linePtr = TkBTreeFindLine(dstPtr->tree, lineIndex);
1063
1064	/*
1065	 * Compute the length of the line and add that to dstPtr->byteIndex.
1066	 */
1067
1068	oldPtr = dstPtr->linePtr->segPtr;
1069	for (segPtr = oldPtr; segPtr != NULL; segPtr = segPtr->nextPtr) {
1070	    dstPtr->byteIndex += segPtr->size;
1071	    oldPtr = segPtr;
1072	}
1073	segPtr = oldPtr;
1074	segSize = segPtr->size;
1075    }
1076}
1077
1078/*
1079 *----------------------------------------------------------------------
1080 *
1081 * StartEnd --
1082 *
1083 *	This procedure handles modifiers like "wordstart" and "lineend"
1084 *	to adjust indices forwards or backwards.
1085 *
1086 * Results:
1087 *	If the modifier is successfully parsed then the return value
1088 *	is the address of the first character after the modifier, and
1089 *	*indexPtr is updated to reflect the modifier. If there is a
1090 *	syntax error in the modifier then NULL is returned.
1091 *
1092 * Side effects:
1093 *	None.
1094 *
1095 *----------------------------------------------------------------------
1096 */
1097
1098static CONST char *
1099StartEnd(string, indexPtr)
1100    CONST char *string;		/* String to parse for additional info
1101				 * about modifier (count and units).
1102				 * Points to first character of modifer
1103				 * word. */
1104    TkTextIndex *indexPtr;	/* Index to mdoify based on string. */
1105{
1106    CONST char *p;
1107    int c, offset;
1108    size_t length;
1109    register TkTextSegment *segPtr;
1110
1111    /*
1112     * Find the end of the modifier word.
1113     */
1114
1115    for (p = string; isalnum(UCHAR(*p)); p++) {
1116	/* Empty loop body. */
1117    }
1118    length = p-string;
1119    if ((*string == 'l') && (strncmp(string, "lineend", length) == 0)
1120	    && (length >= 5)) {
1121	indexPtr->byteIndex = 0;
1122	for (segPtr = indexPtr->linePtr->segPtr; segPtr != NULL;
1123		segPtr = segPtr->nextPtr) {
1124	    indexPtr->byteIndex += segPtr->size;
1125	}
1126	indexPtr->byteIndex -= sizeof(char);
1127    } else if ((*string == 'l') && (strncmp(string, "linestart", length) == 0)
1128	    && (length >= 5)) {
1129	indexPtr->byteIndex = 0;
1130    } else if ((*string == 'w') && (strncmp(string, "wordend", length) == 0)
1131	    && (length >= 5)) {
1132	int firstChar = 1;
1133
1134	/*
1135	 * If the current character isn't part of a word then just move
1136	 * forward one character.  Otherwise move forward until finding
1137	 * a character that isn't part of a word and stop there.
1138	 */
1139
1140	segPtr = TkTextIndexToSeg(indexPtr, &offset);
1141	while (1) {
1142	    if (segPtr->typePtr == &tkTextCharType) {
1143		c = segPtr->body.chars[offset];
1144		if (!isalnum(UCHAR(c)) && (c != '_')) {
1145		    break;
1146		}
1147		firstChar = 0;
1148	    }
1149	    offset += 1;
1150	    indexPtr->byteIndex += sizeof(char);
1151	    if (offset >= segPtr->size) {
1152		segPtr = TkTextIndexToSeg(indexPtr, &offset);
1153	    }
1154	}
1155	if (firstChar) {
1156	    TkTextIndexForwChars(indexPtr, 1, indexPtr);
1157	}
1158    } else if ((*string == 'w') && (strncmp(string, "wordstart", length) == 0)
1159	    && (length >= 5)) {
1160	int firstChar = 1;
1161
1162	/*
1163	 * Starting with the current character, look for one that's not
1164	 * part of a word and keep moving backward until you find one.
1165	 * Then if the character found wasn't the first one, move forward
1166	 * again one position.
1167	 */
1168
1169	segPtr = TkTextIndexToSeg(indexPtr, &offset);
1170	while (1) {
1171	    if (segPtr->typePtr == &tkTextCharType) {
1172		c = segPtr->body.chars[offset];
1173		if (!isalnum(UCHAR(c)) && (c != '_')) {
1174		    break;
1175		}
1176		firstChar = 0;
1177	    }
1178	    offset -= 1;
1179	    indexPtr->byteIndex -= sizeof(char);
1180	    if (offset < 0) {
1181		if (indexPtr->byteIndex < 0) {
1182		    indexPtr->byteIndex = 0;
1183		    goto done;
1184		}
1185		segPtr = TkTextIndexToSeg(indexPtr, &offset);
1186	    }
1187	}
1188	if (!firstChar) {
1189	    TkTextIndexForwChars(indexPtr, 1, indexPtr);
1190	}
1191    } else {
1192	return NULL;
1193    }
1194    done:
1195    return p;
1196}
1197