cvt.c revision 294286
1189251Ssam/*
2214734Srpaulo * Copyright (C) 1984-2015  Mark Nudelman
3214734Srpaulo *
4189251Ssam * You may distribute under the terms of either the GNU General Public
5252726Srpaulo * License or the Less License, as specified in the README file.
6252726Srpaulo *
7189251Ssam * For more information, see the README file.
8189251Ssam */
9189251Ssam
10189251Ssam/*
11189251Ssam * Routines to convert text in various ways.  Used by search.
12214734Srpaulo */
13214734Srpaulo
14189251Ssam#include "less.h"
15252726Srpaulo#include "charset.h"
16189251Ssam
17189251Ssamextern int utf_mode;
18189251Ssam
19189251Ssam/*
20189251Ssam * Get the length of a buffer needed to convert a string.
21189251Ssam */
22189251Ssam	public int
23189251Ssamcvt_length(len, ops)
24189251Ssam	int len;
25189251Ssam	int ops;
26189251Ssam{
27189251Ssam	if (utf_mode)
28189251Ssam		/*
29189251Ssam		 * Just copying a string in UTF-8 mode can cause it to grow
30189251Ssam		 * in length.
31189251Ssam		 * Four output bytes for one input byte is the worst case.
32189251Ssam		 */
33189251Ssam		len *= 4;
34189251Ssam	return (len + 1);
35189251Ssam}
36189251Ssam
37214734Srpaulo/*
38189251Ssam * Allocate a chpos array for use by cvt_text.
39189251Ssam */
40189251Ssam	public int *
41252726Srpaulocvt_alloc_chpos(len)
42189251Ssam	int len;
43189251Ssam{
44189251Ssam	int i;
45189251Ssam	int *chpos = (int *) ecalloc(sizeof(int), len);
46189251Ssam	/* Initialize all entries to an invalid position. */
47189251Ssam	for (i = 0;  i < len;  i++)
48189251Ssam		chpos[i] = -1;
49189251Ssam	return (chpos);
50189251Ssam}
51189251Ssam
52189251Ssam/*
53189251Ssam * Convert text.  Perform the transformations specified by ops.
54189251Ssam * Returns converted text in odst.  The original offset of each
55189251Ssam * odst character (when it was in osrc) is returned in the chpos array.
56189251Ssam */
57189251Ssam	public void
58189251Ssamcvt_text(odst, osrc, chpos, lenp, ops)
59189251Ssam	char *odst;
60189251Ssam	char *osrc;
61189251Ssam	int *chpos;
62189251Ssam	int *lenp;
63189251Ssam	int ops;
64189251Ssam{
65189251Ssam	char *dst;
66189251Ssam	char *edst = odst;
67189251Ssam	char *src;
68189251Ssam	register char *src_end;
69189251Ssam	LWCHAR ch;
70189251Ssam
71189251Ssam	if (lenp != NULL)
72189251Ssam		src_end = osrc + *lenp;
73189251Ssam	else
74189251Ssam		src_end = osrc + strlen(osrc);
75189251Ssam
76189251Ssam	for (src = osrc, dst = odst;  src < src_end;  )
77189251Ssam	{
78189251Ssam		int src_pos = (int) (src - osrc);
79189251Ssam		int dst_pos = (int) (dst - odst);
80189251Ssam		ch = step_char(&src, +1, src_end);
81214734Srpaulo		if ((ops & CVT_BS) && ch == '\b' && dst > odst)
82189251Ssam		{
83189251Ssam			/* Delete backspace and preceding char. */
84189251Ssam			do {
85189251Ssam				dst--;
86189251Ssam			} while (dst > odst &&
87189251Ssam				!IS_ASCII_OCTET(*dst) && !IS_UTF8_LEAD(*dst));
88189251Ssam		} else if ((ops & CVT_ANSI) && IS_CSI_START(ch))
89189251Ssam		{
90189251Ssam			/* Skip to end of ANSI escape sequence. */
91252726Srpaulo			src++;  /* skip the CSI start char */
92252726Srpaulo			while (src < src_end)
93252726Srpaulo				if (!is_ansi_middle(*src++))
94252726Srpaulo					break;
95189251Ssam		} else
96252726Srpaulo		{
97252726Srpaulo			/* Just copy the char to the destination buffer. */
98252726Srpaulo			if ((ops & CVT_TO_LC) && IS_UPPER(ch))
99252726Srpaulo				ch = TO_LOWER(ch);
100252726Srpaulo			put_wchar(&dst, ch);
101252726Srpaulo			/* Record the original position of the char. */
102252726Srpaulo			if (chpos != NULL)
103252726Srpaulo				chpos[dst_pos] = src_pos;
104252726Srpaulo		}
105252726Srpaulo		if (dst > edst)
106189251Ssam			edst = dst;
107189251Ssam	}
108189251Ssam	if ((ops & CVT_CRLF) && edst > odst && edst[-1] == '\r')
109189251Ssam		edst--;
110189251Ssam	*edst = '\0';
111189251Ssam	if (lenp != NULL)
112189251Ssam		*lenp = (int) (edst - odst);
113189251Ssam	/* FIXME: why was this here?  if (chpos != NULL) chpos[dst - odst] = src - osrc; */
114189251Ssam}
115189251Ssam