cvt.c revision 225736
1/*
2 * Copyright (C) 1984-2011  Mark Nudelman
3 *
4 * You may distribute under the terms of either the GNU General Public
5 * License or the Less License, as specified in the README file.
6 *
7 * For more information about less, or for information on how to
8 * contact the author, see the README file.
9 */
10
11/*
12 * Routines to convert text in various ways.  Used by search.
13 */
14
15#include "less.h"
16#include "charset.h"
17
18extern int utf_mode;
19
20/*
21 * Get the length of a buffer needed to convert a string.
22 */
23	public int
24cvt_length(len, ops)
25	int len;
26	int ops;
27{
28	if (utf_mode)
29		/*
30		 * Just copying a string in UTF-8 mode can cause it to grow
31		 * in length.
32		 * Four output bytes for one input byte is the worst case.
33		 */
34		len *= 4;
35	return (len + 1);
36}
37
38/*
39 * Allocate a chpos array for use by cvt_text.
40 */
41	public int *
42cvt_alloc_chpos(len)
43	int len;
44{
45	int i;
46	int *chpos = (int *) ecalloc(sizeof(int), len);
47	/* Initialize all entries to an invalid position. */
48	for (i = 0;  i < len;  i++)
49		chpos[i] = -1;
50	return (chpos);
51}
52
53/*
54 * Convert text.  Perform the transformations specified by ops.
55 * Returns converted text in odst.  The original offset of each
56 * odst character (when it was in osrc) is returned in the chpos array.
57 */
58	public void
59cvt_text(odst, osrc, chpos, lenp, ops)
60	char *odst;
61	char *osrc;
62	int *chpos;
63	int *lenp;
64	int ops;
65{
66	char *dst;
67	char *src;
68	register char *src_end;
69	LWCHAR ch;
70
71	if (lenp != NULL)
72		src_end = osrc + *lenp;
73	else
74		src_end = osrc + strlen(osrc);
75
76	for (src = osrc, dst = odst;  src < src_end;  )
77	{
78		int src_pos = src - osrc;
79		int dst_pos = dst - odst;
80		ch = step_char(&src, +1, src_end);
81		if ((ops & CVT_BS) && ch == '\b' && dst > odst)
82		{
83			/* Delete backspace and preceding char. */
84			do {
85				dst--;
86			} while (dst > odst &&
87				!IS_ASCII_OCTET(*dst) && !IS_UTF8_LEAD(*dst));
88		} else if ((ops & CVT_ANSI) && IS_CSI_START(ch))
89		{
90			/* Skip to end of ANSI escape sequence. */
91			src++;  /* skip the CSI start char */
92			while (src < src_end)
93				if (!is_ansi_middle(*src++))
94					break;
95		} else
96		{
97			/* Just copy the char to the destination buffer. */
98			if ((ops & CVT_TO_LC) && IS_UPPER(ch))
99				ch = TO_LOWER(ch);
100			put_wchar(&dst, ch);
101			/*
102			 * Record the original position of the char.
103			 * But if we've already recorded a position
104			 * for this char (due to a backspace), leave
105			 * it alone; if multiple source chars map to
106			 * one destination char, we want the position
107			 * of the first one.
108			 */
109			if (chpos != NULL && chpos[dst_pos] < 0)
110				chpos[dst_pos] = src_pos;
111		}
112	}
113	if ((ops & CVT_CRLF) && dst > odst && dst[-1] == '\r')
114		dst--;
115	*dst = '\0';
116	if (lenp != NULL)
117		*lenp = dst - odst;
118	if (chpos != NULL)
119		chpos[dst - odst] = src - osrc;
120}
121