1//========================================================================= 2// FILENAME : textutils.c 3// DESCRIPTION : Misc. text utilities 4//========================================================================= 5// Copyright (c) 2008- NETGEAR, Inc. All Rights Reserved. 6//========================================================================= 7 8/* This program is free software; you can redistribute it and/or modify 9 * it under the terms of the GNU General Public License as published by 10 * the Free Software Foundation; either version 2 of the License, or 11 * (at your option) any later version. 12 * 13 * This program is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 * GNU General Public License for more details. 17 * 18 * You should have received a copy of the GNU General Public License 19 * along with this program; if not, write to the Free Software 20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 21 */ 22 23 24#include <stdlib.h> 25#include <stdio.h> 26#include <string.h> 27#include <ctype.h> 28 29#include "misc.h" 30#include "textutils.h" 31#include "../log.h" 32 33static unsigned int 34_char_htoi(char h) 35{ 36 if (h<'0') 37 return 0; 38 if (h<='9') 39 return h-'0'; 40 if (h<'A') 41 return 0; 42 if (h<='F') 43 return h-'A'+10; 44 if (h<'a') 45 return 0; 46 if (h<='f') 47 return h-'a'+10; 48 return 0; 49} 50 51void 52urldecode(char *src) 53{ 54 char c, *s, *d; 55 56 for (d=s=src; *s; s++, d++) { 57 c = *s; 58 if (c=='%') { 59 c = *++s; 60 if (c=='%') 61 c = '%'; 62 else { 63 c = _char_htoi(c)<<4 | _char_htoi(*++s); 64 } 65 *d = c; 66 } 67 else { 68 *d = c; 69 } 70 } 71 *d = '\0'; 72} 73 74#if 0 75static int 76is_ignoredword(const char *str) 77{ 78 int i; 79 80 if (!prefs.ignoredwords) 81 return 0; 82 83 for (i=0; prefs.ignoredwords[i].n; i++) { 84 if (!(strncasecmp(prefs.ignoredwords[i].word, str, prefs.ignoredwords[i].n))) { 85 char next_char = str[prefs.ignoredwords[i].n]; 86 if (isalnum(next_char)) 87 continue; 88 return prefs.ignoredwords[i].n; 89 } 90 } 91 return 0; 92} 93#endif 94 95char * 96skipspaces(const char *str) 97{ 98 while (isspace(*str)) str++; 99 return (char*) str; 100} 101 102/* 103U+0040 (40): @ A B C D E F G H I J K L M N O 104U+0050 (50): P Q R S T U V W X Y Z [ \ ] ^ _ 105U+0060 (60): ` a b c d e f g h i j k l m n o 106U+0070 (70): p q r s t u v w x y z { | } ~ 107 108U+00c0 (c3 80): �� �� �� �� �� �� �� �� �� �� �� �� �� �� �� �� 109U+00d0 (c3 90): �� �� �� �� �� �� �� �� �� �� �� �� �� �� �� �� 110U+00e0 (c3 a0): �� �� �� �� �� �� �� �� �� �� �� �� �� �� �� �� 111U+00f0 (c3 b0): �� �� �� �� �� �� �� �� �� �� �� �� �� �� �� �� 112U+0100 (c4 80): �� �� �� �� �� �� �� �� �� �� �� �� �� �� �� �� 113U+0110 (c4 90): �� �� �� �� �� �� �� �� �� �� �� �� �� �� �� �� 114U+0120 (c4 a0): �� �� �� �� �� �� �� �� �� �� �� �� �� �� �� �� 115U+0130 (c4 b0): �� �� �� �� �� �� �� �� �� �� �� �� �� �� �� �� 116U+0140 (c5 80): �� �� �� �� �� �� �� �� �� �� �� �� �� �� �� �� 117U+0150 (c5 90): �� �� �� �� �� �� �� �� �� �� �� �� �� �� �� �� 118U+0160 (c5 a0): �� �� �� �� �� �� �� �� �� �� �� �� �� �� �� �� 119U+0170 (c5 b0): �� �� �� �� �� �� �� �� �� �� �� �� �� �� �� �� 120 */ 121 122// conversion table for latin diacritical char to ascii one char or two chars. 123unsigned short UtoAscii[] = { 124 // U+00c0 125 0x0041,0x0041,0x0041,0x0041, 0x0041,0x0041,0x4145,0x0043, 0x0045,0x0045,0x0045,0x0045, 0x0049,0x0049,0x0049,0x0049, 126 0x0044,0x004e,0x004f,0x004f, 0x004f,0x004f,0x004f,0xc397, 0xc398,0x0055,0x0055,0x0055, 0x0055,0x0059,0x0050,0x5353, 127 // U+00e0 128 0x0041,0x0041,0x0041,0x0041, 0x0041,0x0041,0x4145,0x0043, 0x0045,0x0045,0x0045,0x0045, 0x0049,0x0049,0x0049,0x0049, 129 0x0044,0x004e,0x004f,0x004f, 0x004f,0x004f,0x004f,0xc397, 0xc398,0x0055,0x0055,0x0055, 0x0055,0x0059,0x0050,0x5353, 130 // U+0100 131 0x0041,0x0041,0x0041,0x0041, 0x0041,0x0041,0x0043,0x0043, 0x0043,0x0043,0x0043,0x0043, 0x0043,0x0043,0x0044,0x0044, 132 0x0044,0x0044,0x0045,0x0045, 0x0045,0x0045,0x0045,0x0045, 0x0045,0x0045,0x0045,0x0045, 0x0047,0x0047,0x0047,0x0047, 133 // U+0120 134 0x0047,0x0047,0x0047,0x0047, 0x0048,0x0048,0x0048,0x0048, 0x0049,0x0049,0x0049,0x0049, 0x0049,0x0049,0x0049,0x0049, 135 0x0049,0x0049,0x494a,0x494a, 0x004a,0x004a,0x004b,0x004b, 0x004b,0x004c,0x004c,0x004c, 0x004c,0x004c,0x004c,0x004c, 136 // U+0140 137 0x004c,0x004c,0x004c,0x004e, 0x004e,0x004e,0x004e,0x004e, 0x004e,0x004e,0x004e,0x004e, 0x004f,0x004f,0x004f,0x004f, 138 0x004f,0x004f,0x4f45,0x4f45, 0x0052,0x0052,0x0052,0x0052, 0x0052,0x0052,0x0053,0x0053, 0x0053,0x0053,0x0053,0x0053, 139 // U+0160 140 0x0053,0x0053,0x0054,0x0054, 0x0054,0x0054,0x0054,0x0054, 0x0055,0x0055,0x0055,0x0055, 0x0055,0x0055,0x0055,0x0055, 141 0x0055,0x0055,0x0055,0x0055, 0x0057,0x0057,0x0059,0x0059, 0x0059,0x005a,0x005a,0x005a, 0x005a,0x005a,0x005a,0xc5bf 142}; 143 144// conversion table for toupper() function for latin diacritical char 145unsigned short UtoUpper[] = { 146 // U+00c0 147 0xc380,0xc381,0xc382,0xc383, 0xc384,0xc385,0xc386,0xc387, 0xc388,0xc389,0xc38a,0xc38b, 0xc38c,0xc38d,0xc38e,0xc38f, 148 0xc390,0xc391,0xc392,0xc393, 0xc394,0xc395,0xc396,0xc397, 0xc398,0xc399,0xc39a,0xc39b, 0xc39c,0xc39d,0xc39e,0x5353, 149 // U+00e0 150 0xc380,0xc381,0xc382,0xc383, 0xc384,0xc385,0xc386,0xc387, 0xc388,0xc389,0xc38a,0xc38b, 0xc38c,0xc38d,0xc38e,0xc38f, 151 0xc390,0xc391,0xc392,0xc393, 0xc394,0xc395,0xc396,0xc397, 0xc398,0xc399,0xc39a,0xc39b, 0xc39c,0xc39d,0xc39e,0xc39f, 152 // U+0100 153 0xc480,0xc480,0xc482,0xc482, 0xc484,0xc484,0xc486,0xc486, 0xc488,0xc488,0xc48a,0xc48a, 0xc48c,0xc48c,0xc48e,0xc48e, 154 0xc490,0xc490,0xc492,0xc492, 0xc494,0xc494,0xc496,0xc496, 0xc498,0xc498,0xc49a,0xc49a, 0xc49c,0xc49c,0xc49e,0xc49e, 155 // U+0120 156 0xc4a0,0xc4a0,0xc4a2,0xc4a2, 0xc4a4,0xc4a4,0xc4a6,0xc4a6, 0xc4a8,0xc4a8,0xc4aa,0xc4aa, 0xc4ac,0xc4ac,0xc4ae,0xc4ae, 157 0xc4b0,0xc4b0,0xc4b2,0xc4b2, 0xc4b4,0xc4b4,0xc4b6,0xc4b6, 0xc4b8,0xc4b9,0xc4b9,0xc4bb, 0xc4bb,0xc4bd,0xc4bd,0xc4bf, 158 // U+0140 159 0xc4bf,0xc581,0xc581,0xc583, 0xc583,0xc585,0xc585,0xc587, 0xc587,0xc589,0xc58a,0xc58a, 0xc58c,0xc58c,0xc58e,0xc58e, 160 0xc590,0xc591,0xc592,0xc593, 0xc594,0xc595,0xc596,0xc597, 0xc598,0xc599,0xc59a,0xc59b, 0xc59c,0xc59d,0xc59e,0xc59f, 161 // U+0160 162 0xc5a0,0xc5a0,0xc5a2,0xc5a2, 0xc5a4,0xc5a4,0xc5a6,0xc5a6, 0xc5a8,0xc5a8,0xc5aa,0xc5aa, 0xc5ac,0xc5ac,0xc5ae,0xc5ae, 163 0xc5b0,0xc5b1,0xc5b2,0xc5b3, 0xc5b4,0xc5b5,0xc5b6,0xc5b7, 0xc5b8,0xc5b9,0xc5b9,0xc5bb, 0xc5bc,0xc5bd,0xc5bd,0xc5bf, 164}; 165 166 167int 168safe_atoi(char *s) 169{ 170 if (!s) 171 return 0; 172 if ((s[0]>='0' && s[0]<='9') || s[0]=='-' || s[0]=='+') 173 return atoi(s); 174 return 0; 175} 176 177// NOTE: support U+0000 ~ U+FFFF only. 178int 179utf16le_to_utf8(char *dst, int n, __u16 utf16le) 180{ 181 __u16 wc = le16_to_cpu(utf16le); 182 if (wc < 0x80) { 183 if (n<1) return 0; 184 *dst++ = wc & 0xff; 185 return 1; 186 } 187 else if (wc < 0x800) { 188 if (n<2) return 0; 189 *dst++ = 0xc0 | (wc>>6); 190 *dst++ = 0x80 | (wc & 0x3f); 191 return 2; 192 } 193 else { 194 if (n<3) return 0; 195 *dst++ = 0xe0 | (wc>>12); 196 *dst++ = 0x80 | ((wc>>6) & 0x3f); 197 *dst++ = 0x80 | (wc & 0x3f); 198 return 3; 199 } 200} 201 202void 203fetch_string_txt(char *fname, char *lang, int n, ...) 204{ 205 va_list args; 206 char **keys; 207 char ***strs; 208 char **defstr; 209 int i; 210 FILE *fp; 211 char buf[4096]; 212 int state; 213 char *p; 214 char *langid; 215 const char *lang_en = "EN"; 216 217 if (!(keys = malloc(sizeof(keys) * n))) { 218 DPRINTF(E_FATAL, L_SCANNER, "Out of memory\n"); 219 } 220 if (!(strs = malloc(sizeof(strs) * n))) { 221 DPRINTF(E_FATAL, L_SCANNER, "Out of memory\n"); 222 } 223 if (!(defstr = malloc(sizeof(defstr) * n))) { 224 DPRINTF(E_FATAL, L_SCANNER, "Out of memory\n"); 225 } 226 227 va_start(args, n); 228 for (i=0; i<n; i++) { 229 keys[i] = va_arg(args, char *); 230 strs[i] = va_arg(args, char **); 231 defstr[i] = va_arg(args, char *); 232 } 233 va_end(args); 234 235 if (!(fp = fopen(fname, "rb"))) { 236 DPRINTF(E_ERROR, L_SCANNER, "Cannot open <%s>\n", fname); 237 goto _exit; 238 } 239 240 state = -1; 241 while (fgets(buf, sizeof(buf), fp)) { 242 int len = strlen(buf); 243 244 if (buf[len-1]=='\n') buf[len-1] = '\0'; 245 246 if (state<0) { 247 if (isalpha(buf[0])) { 248 for (i=0; i<n; i++) { 249 if (!(strcmp(keys[i], buf))) { 250 state = i; 251 break; 252 } 253 } 254 } 255 } 256 else { 257 int found = 0; 258 259 if (isalpha(buf[0]) || buf[0]=='\0') { 260 state = -1; 261 continue; 262 } 263 264 p = buf; 265 while (isspace(*p)) p++; 266 if (*p == '\0') { 267 state = -1; 268 continue; 269 } 270 langid = p; 271 while (!isspace(*p)) p++; 272 *p++ = '\0'; 273 274 if (!strcmp(lang, langid)) 275 found = 1; 276 else if (strcmp(lang_en, langid)) 277 continue; 278 279 while (isspace(*p)) p++; 280 if (*strs[state]) 281 free(*strs[state]); 282 *strs[state] = strdup(p); 283 284 if (found) 285 state = -1; 286 } 287 } 288 289 for (i=0; i<n; i++) { 290 if (!*strs[i]) 291 *strs[i] = defstr[i]; 292 } 293 294 _exit: 295 free(keys); 296 free(strs); 297 free(defstr); 298} 299