1//=========================================================================
2// FILENAME	: textutils.c
3// DESCRIPTION	: Misc. text utilities
4//=========================================================================
5// Copyright (c) 2008- NETGEAR, Inc. All Rights Reserved.
6//=========================================================================
7
8/* This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
21 */
22
23
24#include <stdlib.h>
25#include <stdio.h>
26#include <string.h>
27#include <ctype.h>
28
29#include "misc.h"
30#include "textutils.h"
31#include "../log.h"
32
33static unsigned int
34_char_htoi(char h)
35{
36  if (h<'0')
37    return 0;
38  if (h<='9')
39    return h-'0';
40  if (h<'A')
41    return 0;
42  if (h<='F')
43    return h-'A'+10;
44  if (h<'a')
45    return 0;
46  if (h<='f')
47    return h-'a'+10;
48  return 0;
49}
50
51void
52urldecode(char *src)
53{
54  char c, *s, *d;
55
56  for (d=s=src; *s; s++, d++) {
57    c = *s;
58    if (c=='%') {
59      c = *++s;
60      if (c=='%')
61	c = '%';
62      else {
63	c = _char_htoi(c)<<4 | _char_htoi(*++s);
64      }
65      *d = c;
66    }
67    else {
68      *d = c;
69    }
70  }
71  *d = '\0';
72}
73
74#if 0
75static int
76is_ignoredword(const char *str)
77{
78  int i;
79
80  if (!prefs.ignoredwords)
81    return 0;
82
83  for (i=0; prefs.ignoredwords[i].n; i++) {
84    if (!(strncasecmp(prefs.ignoredwords[i].word, str, prefs.ignoredwords[i].n))) {
85      char next_char = str[prefs.ignoredwords[i].n];
86      if (isalnum(next_char))
87	continue;
88      return prefs.ignoredwords[i].n;
89    }
90  }
91  return 0;
92}
93#endif
94
95char *
96skipspaces(const char *str)
97{
98  while (isspace(*str)) str++;
99  return (char*) str;
100}
101
102/*
103U+0040 (40): @ A B C  D E F G  H I J K  L M N O
104U+0050 (50): P Q R S  T U V W  X Y Z [  \ ] ^ _
105U+0060 (60): ` a b c  d e f g  h i j k  l m n o
106U+0070 (70): p q r s  t u v w  x y z {  | } ~
107
108U+00c0 (c3 80):  �� �� �� ��  �� �� �� ��  �� �� �� ��  �� �� �� ��
109U+00d0 (c3 90):  �� �� �� ��  �� �� �� ��  �� �� �� ��  �� �� �� ��
110U+00e0 (c3 a0):  �� �� �� ��  �� �� �� ��  �� �� �� ��  �� �� �� ��
111U+00f0 (c3 b0):  �� �� �� ��  �� �� �� ��  �� �� �� ��  �� �� �� ��
112U+0100 (c4 80):  �� �� �� ��  �� �� �� ��  �� �� �� ��  �� �� �� ��
113U+0110 (c4 90):  �� �� �� ��  �� �� �� ��  �� �� �� ��  �� �� �� ��
114U+0120 (c4 a0):  �� �� �� ��  �� �� �� ��  �� �� �� ��  �� �� �� ��
115U+0130 (c4 b0):  �� �� �� ��  �� �� �� ��  �� �� �� ��  �� �� �� ��
116U+0140 (c5 80):  �� �� �� ��  �� �� �� ��  �� �� �� ��  �� �� �� ��
117U+0150 (c5 90):  �� �� �� ��  �� �� �� ��  �� �� �� ��  �� �� �� ��
118U+0160 (c5 a0):  �� �� �� ��  �� �� �� ��  �� �� �� ��  �� �� �� ��
119U+0170 (c5 b0):  �� �� �� ��  �� �� �� ��  �� �� �� ��  �� �� �� ��
120 */
121
122// conversion table for latin diacritical char to ascii one char or two chars.
123unsigned short UtoAscii[] = {
124  // U+00c0
125  0x0041,0x0041,0x0041,0x0041, 0x0041,0x0041,0x4145,0x0043, 0x0045,0x0045,0x0045,0x0045, 0x0049,0x0049,0x0049,0x0049,
126  0x0044,0x004e,0x004f,0x004f, 0x004f,0x004f,0x004f,0xc397, 0xc398,0x0055,0x0055,0x0055, 0x0055,0x0059,0x0050,0x5353,
127  // U+00e0
128  0x0041,0x0041,0x0041,0x0041, 0x0041,0x0041,0x4145,0x0043, 0x0045,0x0045,0x0045,0x0045, 0x0049,0x0049,0x0049,0x0049,
129  0x0044,0x004e,0x004f,0x004f, 0x004f,0x004f,0x004f,0xc397, 0xc398,0x0055,0x0055,0x0055, 0x0055,0x0059,0x0050,0x5353,
130  // U+0100
131  0x0041,0x0041,0x0041,0x0041, 0x0041,0x0041,0x0043,0x0043, 0x0043,0x0043,0x0043,0x0043, 0x0043,0x0043,0x0044,0x0044,
132  0x0044,0x0044,0x0045,0x0045, 0x0045,0x0045,0x0045,0x0045, 0x0045,0x0045,0x0045,0x0045, 0x0047,0x0047,0x0047,0x0047,
133  // U+0120
134  0x0047,0x0047,0x0047,0x0047, 0x0048,0x0048,0x0048,0x0048, 0x0049,0x0049,0x0049,0x0049, 0x0049,0x0049,0x0049,0x0049,
135  0x0049,0x0049,0x494a,0x494a, 0x004a,0x004a,0x004b,0x004b, 0x004b,0x004c,0x004c,0x004c, 0x004c,0x004c,0x004c,0x004c,
136  // U+0140
137  0x004c,0x004c,0x004c,0x004e, 0x004e,0x004e,0x004e,0x004e, 0x004e,0x004e,0x004e,0x004e, 0x004f,0x004f,0x004f,0x004f,
138  0x004f,0x004f,0x4f45,0x4f45, 0x0052,0x0052,0x0052,0x0052, 0x0052,0x0052,0x0053,0x0053, 0x0053,0x0053,0x0053,0x0053,
139  // U+0160
140  0x0053,0x0053,0x0054,0x0054, 0x0054,0x0054,0x0054,0x0054, 0x0055,0x0055,0x0055,0x0055, 0x0055,0x0055,0x0055,0x0055,
141  0x0055,0x0055,0x0055,0x0055, 0x0057,0x0057,0x0059,0x0059, 0x0059,0x005a,0x005a,0x005a, 0x005a,0x005a,0x005a,0xc5bf
142};
143
144// conversion table for toupper() function for latin diacritical char
145unsigned short UtoUpper[] = {
146  // U+00c0
147  0xc380,0xc381,0xc382,0xc383, 0xc384,0xc385,0xc386,0xc387, 0xc388,0xc389,0xc38a,0xc38b, 0xc38c,0xc38d,0xc38e,0xc38f,
148  0xc390,0xc391,0xc392,0xc393, 0xc394,0xc395,0xc396,0xc397, 0xc398,0xc399,0xc39a,0xc39b, 0xc39c,0xc39d,0xc39e,0x5353,
149  // U+00e0
150  0xc380,0xc381,0xc382,0xc383, 0xc384,0xc385,0xc386,0xc387, 0xc388,0xc389,0xc38a,0xc38b, 0xc38c,0xc38d,0xc38e,0xc38f,
151  0xc390,0xc391,0xc392,0xc393, 0xc394,0xc395,0xc396,0xc397, 0xc398,0xc399,0xc39a,0xc39b, 0xc39c,0xc39d,0xc39e,0xc39f,
152  // U+0100
153  0xc480,0xc480,0xc482,0xc482, 0xc484,0xc484,0xc486,0xc486, 0xc488,0xc488,0xc48a,0xc48a, 0xc48c,0xc48c,0xc48e,0xc48e,
154  0xc490,0xc490,0xc492,0xc492, 0xc494,0xc494,0xc496,0xc496, 0xc498,0xc498,0xc49a,0xc49a, 0xc49c,0xc49c,0xc49e,0xc49e,
155  // U+0120
156  0xc4a0,0xc4a0,0xc4a2,0xc4a2, 0xc4a4,0xc4a4,0xc4a6,0xc4a6, 0xc4a8,0xc4a8,0xc4aa,0xc4aa, 0xc4ac,0xc4ac,0xc4ae,0xc4ae,
157  0xc4b0,0xc4b0,0xc4b2,0xc4b2, 0xc4b4,0xc4b4,0xc4b6,0xc4b6, 0xc4b8,0xc4b9,0xc4b9,0xc4bb, 0xc4bb,0xc4bd,0xc4bd,0xc4bf,
158  // U+0140
159  0xc4bf,0xc581,0xc581,0xc583, 0xc583,0xc585,0xc585,0xc587, 0xc587,0xc589,0xc58a,0xc58a, 0xc58c,0xc58c,0xc58e,0xc58e,
160  0xc590,0xc591,0xc592,0xc593, 0xc594,0xc595,0xc596,0xc597, 0xc598,0xc599,0xc59a,0xc59b, 0xc59c,0xc59d,0xc59e,0xc59f,
161  // U+0160
162  0xc5a0,0xc5a0,0xc5a2,0xc5a2, 0xc5a4,0xc5a4,0xc5a6,0xc5a6, 0xc5a8,0xc5a8,0xc5aa,0xc5aa, 0xc5ac,0xc5ac,0xc5ae,0xc5ae,
163  0xc5b0,0xc5b1,0xc5b2,0xc5b3, 0xc5b4,0xc5b5,0xc5b6,0xc5b7, 0xc5b8,0xc5b9,0xc5b9,0xc5bb, 0xc5bc,0xc5bd,0xc5bd,0xc5bf,
164};
165
166
167int
168safe_atoi(char *s)
169{
170  if (!s)
171    return 0;
172  if ((s[0]>='0' && s[0]<='9') || s[0]=='-' || s[0]=='+')
173    return atoi(s);
174  return 0;
175}
176
177// NOTE: support U+0000 ~ U+FFFF only.
178int
179utf16le_to_utf8(char *dst, int n, __u16 utf16le)
180{
181  __u16 wc = le16_to_cpu(utf16le);
182  if (wc < 0x80) {
183    if (n<1) return 0;
184    *dst++ = wc & 0xff;
185    return 1;
186  }
187  else if (wc < 0x800) {
188    if (n<2) return 0;
189    *dst++ = 0xc0 | (wc>>6);
190    *dst++ = 0x80 | (wc & 0x3f);
191    return 2;
192  }
193  else {
194    if (n<3) return 0;
195    *dst++ = 0xe0 | (wc>>12);
196    *dst++ = 0x80 | ((wc>>6) & 0x3f);
197    *dst++ = 0x80 | (wc & 0x3f);
198    return 3;
199  }
200}
201
202void
203fetch_string_txt(char *fname, char *lang, int n, ...)
204{
205  va_list args;
206  char **keys;
207  char ***strs;
208  char **defstr;
209  int i;
210  FILE *fp;
211  char buf[4096];
212  int state;
213  char *p;
214  char *langid;
215  const char *lang_en = "EN";
216
217  if (!(keys = malloc(sizeof(keys) * n))) {
218    DPRINTF(E_FATAL, L_SCANNER, "Out of memory\n");
219  }
220  if (!(strs = malloc(sizeof(strs) * n))) {
221    DPRINTF(E_FATAL, L_SCANNER, "Out of memory\n");
222  }
223  if (!(defstr = malloc(sizeof(defstr) * n))) {
224    DPRINTF(E_FATAL, L_SCANNER, "Out of memory\n");
225  }
226
227  va_start(args, n);
228  for (i=0; i<n; i++) {
229    keys[i] = va_arg(args, char *);
230    strs[i] = va_arg(args, char **);
231    defstr[i] = va_arg(args, char *);
232  }
233  va_end(args);
234
235  if (!(fp = fopen(fname, "rb"))) {
236    DPRINTF(E_ERROR, L_SCANNER, "Cannot open <%s>\n", fname);
237    goto _exit;
238  }
239
240  state = -1;
241  while (fgets(buf, sizeof(buf), fp)) {
242    int len = strlen(buf);
243
244    if (buf[len-1]=='\n') buf[len-1] = '\0';
245
246    if (state<0) {
247      if (isalpha(buf[0])) {
248	for (i=0; i<n; i++) {
249	  if (!(strcmp(keys[i], buf))) {
250	    state = i;
251	    break;
252	  }
253	}
254      }
255    }
256    else {
257      int found = 0;
258
259      if (isalpha(buf[0]) || buf[0]=='\0') {
260	state = -1;
261	continue;
262      }
263
264      p = buf;
265      while (isspace(*p)) p++;
266      if (*p == '\0') {
267	state = -1;
268	continue;
269      }
270      langid = p;
271      while (!isspace(*p)) p++;
272      *p++ = '\0';
273
274      if (!strcmp(lang, langid))
275	found = 1;
276      else if (strcmp(lang_en, langid))
277	continue;
278
279      while (isspace(*p)) p++;
280      if (*strs[state])
281	free(*strs[state]);
282      *strs[state] = strdup(p);
283
284      if (found)
285	state = -1;
286    }
287  }
288
289  for (i=0; i<n; i++) {
290    if (!*strs[i])
291      *strs[i] = defstr[i];
292  }
293
294 _exit:
295  free(keys);
296  free(strs);
297  free(defstr);
298}
299