1;;; ogonek.el --- change the encoding of Polish diacritics 2 3;; Copyright (C) 1997, 1998, 2001, 2002, 2003, 2004, 2005, 2006, 2007 4;; Free Software Foundation, Inc. 5 6;; Author: W{\l}odek Bzyl, Ryszard Kubiak 7;; Maintainer: rysiek@ipipan.gda.pl (Ryszard Kubiak) 8;; Keywords: i18n 9 10;; This file is part of GNU Emacs. 11 12;; GNU Emacs is free software; you can redistribute it and/or modify 13;; it under the terms of the GNU General Public License as published by 14;; the Free Software Foundation; either version 2, or (at your option) 15;; any later version. 16 17;; GNU Emacs is distributed in the hope that it will be useful, 18;; but WITHOUT ANY WARRANTY; without even the implied warranty of 19;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 20;; GNU General Public License for more details. 21 22;; You should have received a copy of the GNU General Public License 23;; along with GNU Emacs; see the file COPYING. If not, write to the 24;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 25;; Boston, MA 02110-1301, USA. 26 27;;; Commentary: 28 29;; To use this library load it using 30;; M-x load-library [enter] ogonek 31;; Then, you may get a short info by calling one of 32;; M-x ogonek-jak -- in Polish 33;; M-x ogonek-how -- in English " 34 35;;; Code: 36 37(defgroup ogonek nil 38 "Change the encoding of Polish diacritic characters." 39 :prefix "ogonek-" 40 :group 'i18n) 41 42(defconst ogonek-name-encoding-alist 43 '(("ascii" . (?A ?C ?E ?L ?N ?O ?S ?Z ?Z 44 ?a ?c ?e ?l ?n ?o ?s ?z ?z)) 45 ("iso8859-2" . (161 198 202 163 209 211 166 172 175 46 177 230 234 179 241 243 182 188 191)) 47 ("mazovia" . (143 149 144 156 165 163 152 160 161 48 134 141 145 146 164 162 158 166 167)) 49 ("windows-EE" . (165 198 202 163 209 211 140 143 175 50 185 230 234 179 241 243 156 159 191)) 51 ("windows-PL" . (165 198 202 163 209 211 140 143 175 52 185 230 234 179 241 243 156 159 191)) 53 ("latin-2" . (164 143 168 157 227 224 151 141 189 54 165 134 169 136 228 162 152 171 190)) 55 ("CP852" . (164 143 168 157 227 224 151 141 189 56 165 134 169 136 228 162 152 171 190)) 57 ("MeX" . (129 130 134 138 139 211 145 153 155 58 161 162 166 170 171 243 177 185 187)) 59 ("CorelDraw" . (197 242 201 163 209 211 255 225 237 60 229 236 230 198 241 243 165 170 186)) 61 ("Amiga" . (194 202 203 206 207 211 212 218 219 62 226 234 235 238 239 243 244 250 251)) 63 ("Mac" . (132 140 162 252 193 238 229 143 251 64 136 141 171 184 196 151 230 144 253)) 65 ) 66 "The constant `ogonek-name-encoding-alist' is a list of (NAME.LIST) pairs. 67Each LIST contains codes for 18 Polish diacritic characters. The codes 68are given in the following order: 69 Aogonek Cacute Eogonek Lslash Nacute Oacute Sacute Zacute Zdotaccent 70 aogonek cacute eogonek lslash nacute oacute sacute zacute zdotaccent.") 71 72;; ------ A Little Info in Polish --------------- 73 74(defconst ogonek-informacja 75 " FUNKCJE INTERAKCYJNE UDOST/EPNIANE PRZEZ BIBLIOTEK/E `ogonek'. 76 77Je/sli czytasz ten tekst, to albo przegl/adasz plik /xr/od/lowy 78biblioteki `ogonek.el', albo wywo/la/le/s polecenie `ogonek-jak'. 79W drugim przypadku mo/zesz usun/a/c tekst z ekranu, stosuj/ac 80polecenie `M-x kill-buffer'. 81 82Niniejsza biblioteka dostarcza funkcji do zmiany kodowania polskich 83znak/ow diakrytycznych. Funkcje te mo/zna pogrupowa/c nast/epuj/aco. 84 85 1. `ogonek-recode-region' oraz `ogonek-recode-buffer' 86 przekodowuj/a zaznaczony fragment wzgl/ednie ca/ly buffor. 87 Po wywo/laniu interakcyjnym funkcji zadawane s/a 88 pytania o parametry przekodowania: nazw/e kodowania 89 w tek/scie /xr/od/lowym i nazw/e kodowania docelowego. 90 Poni/zsze przyk/lady powinny wyja/sni/c, jakich parametr/ow 91 oczekuj/a wymienione funkcje: 92 93 (ogonek-recode-region (poczatek-fragmentu) (koniec-fragmentu) 94 nazwa-kodowania-w-tekscie-zrodlowym nazwa-kodowania-docelowa) 95 (ogonek-recode-buffer 96 nazwa-kodowania-w-tekscie-zrodlowym nazwa-kodowania-docelowa) 97 98 2. `ogonek-prefixify-region' oraz `ogonek-prefixify-buffer' 99 s/lu/z/a do wprowadzania notacji prefiksowej. 100 101 (ogonek-prefixify-region (poczatek-fragmentu) (koniec-fragmentu) 102 nazwa-kodowania-w-tekscie-zrodlowym znak-prefiksu) 103 (ogonek-prefixify-buffer 104 nazwa-kodowania-w-tekscie-zrodlowym znak-prefiksu) 105 106 3. `ogonek-deprefixify-region' oraz `ogonek-deprefixify-buffer' 107 s/lu/z/a do usuwania notacji prefiksowej. 108 109 (ogonek-deprefixify-region (poczatek-fragmentu) (koniec-fragmentu) 110 znak-prefiksu nazwa-kodowania-docelowa) 111 (ogonek-prefixify-buffer 112 znak-prefiksu nazwa-kodowania-docelowa) 113 114 U/zycie klawisza TAB w trybie interakcyjnym powoduje wy/swietlenie 115 listy dopuszczalnych nazw kod/ow, pami/etanych w sta/lej 116 `ogonek-name-encoding-alist'. 117 118 Funkcje biblioteki odwo/luj/a si/e do pi/eciu zmiennych, kt/ore 119 przechowuj/a podpowiedzi do zadawanych pyta/n. Nazwy tych zmiennych 120 oraz ich warto/sci domy/slne s/a nast/epuj/ace: 121 122 ogonek-from-encoding iso8859-2 123 ogonek-to-encoding ascii 124 ogonek-prefix-char / 125 ogonek-prefix-from-encoding iso8859-2 126 ogonek-prefix-to-encoding iso8859-2 127 128 Powy/zsze warto/sci domy/slne mo/zna zmieni/c przez umieszczenie w pliku 129 konfiguracyjnym `~/.emacs' odpowiednich przypisa/n, na przyk/lad: 130 131 (setq ogonek-prefix-char ?/) 132 (setq ogonek-prefix-to-encoding \"iso8859-2\") 133 134 Zamiast wczytywania ca/lej biblioteki `ogonek.el' mo/zna w pliku 135 `~/.emacs' za/z/ada/c wczytania wybranych funkcji, na dodatek dopiero 136 w chwili ich rzeczywistego u/zycia: 137 138 (autoload 'ogonek-jak \"ogonek\") 139 (autoload 'ogonek-recode-region \"ogonek\") 140 (autoload 'ogonek-prefixify-region \"ogonek\") 141 (autoload 'ogonek-deprefixify-region \"ogonek\") 142 143 Cz/esto wyst/epuj/ace kombinacje wywo/la/n funkcji mo/zna dla wygody 144 skr/oci/c i przypisa/c klawiszom. Oto praktyczne przyk/lady: 145 146 (defun deprefixify-iso8859-2-region (start end) 147 (interactive \"*r\") 148 (ogonek-deprefixify-region start end ?/ \"iso8859-2\")) 149 (global-set-key \"\\C-cd\" 'deprefixify-iso8859-2-region) ; ctrl-c d 150 151 (defun mazovia-to-iso8859-2 (start end) 152 (interactive \"*r\") 153 (ogonek-recode-region start end \"mazovia\" \"iso8859-2\")) 154 (global-set-key \"\\C-cr\" 'mazovia-to-iso8859-2) ; ctrl-c r 155 156 (defun prefixify-iso8859-2-region (start end) 157 (interactive \"*r\") 158 (ogonek-prefixify-region start end \"iso8859-2\" ?/)) 159 (global-set-key \"\\C-cp\" 'prefixify-iso8859-2-region) ; ctrl-c p 160 161 Ka/zd/a operacj/e przekodowania mo/zna w ca/lo/sci odwo/la/c 162 przez wykonanie polecenia `undo'.") 163 164(defun ogonek-jak () 165 "Display `ogonek-informacja' in an auxiliary *ogonek-jak* buffer." 166 (interactive) 167 (set-buffer (get-buffer-create " *ogonek-jak*")) 168 (insert ogonek-informacja) 169 (switch-to-buffer " *ogonek-jak*") 170 (goto-char (point-min))) 171 172;; ------ A Little Info in English -------- 173 174(defconst ogonek-information 175 " THE INTERACTIVE FUNCTIONS PROVIDED BY THE LIBRARY `ogonek'. 176 177If you read this text then you are either looking at the library's 178source text or you have called the `ogonek-how' command. In the 179latter case you may remove this text using `M-x kill-buffer'. 180 181The library provides functions for changing the encoding of Polish 182diacritic characters, the ones with an `ogonek' below or above them. 183The functions come in the following groups. 184 185 1. `ogonek-recode-region' and `ogonek-recode-buffer' to change 186 between one-character encodings, such as `iso-8859-2', `mazovia', 187 plain `ascii' or `TeX'. As the names suggest you may recode 188 either the entire current buffer or just a marked region 189 in it. You may use the functions interactively as commands. 190 Once you call a command you will be asked about the code 191 currently used in your text and the target encoding, the one 192 you want to get. The following example shows a non-interactive 193 use of the functions in a program. This also illustrates what 194 type of parameters the functions expect to be called with: 195 196 (ogonek-recode-region 197 (region-beginning) (region-end) from-code-name to-code-name) 198 (ogonek-recode-buffer from-code-name to-code-name) 199 200 2. `ogonek-prefixify-region' and `ogonek-prefixify-buffer' for 201 introducing prefix notation: 202 203 (ogonek-prefixify-region 204 (region-beginning) (region-end) from-code-name prefix-char) 205 (ogonek-prefixify-buffer from-code-name prefix-char) 206 207 3. `ogonek-deprefixify-region' and `ogonek-deprefixify-buffer' for 208 removing prefix notation: 209 210 (ogonek-deprefixify-region 211 (region-beginning) (region-end) prefix-char to-code-name) 212 (ogonek-prefixify-buffer prefix-char to-code-name) 213 214 The TAB character used in interactive mode makes `emacs' 215 display the list of encodings recognized by the library. The list 216 is stored in the constant `ogonek-name-encoding-alist'. 217 218 The `ogonek' functions refer to five variables in which the suggested 219 answers to dialogue questions are stored. The variables and their 220 default values are: 221 222 ogonek-from-encoding iso8859-2 223 ogonek-to-encoding ascii 224 ogonek-prefix-char / 225 ogonek-prefix-from-encoding iso8859-2 226 ogonek-prefix-to-encoding iso8859-2 227 228 The above default values can be changed by placing appropriate settings 229 in the '~/.emacs' file: 230 231 (setq ogonek-prefix-char ?/) 232 (setq ogonek-prefix-to-encoding \"iso8859-2\") 233 234 Instead of loading the whole library `ogonek' it may be better to 235 autoload the needed functions, for example by placing in `~/.emacs': 236 237 (autoload 'ogonek-how \"ogonek\") 238 (autoload 'ogonek-recode-region \"ogonek\") 239 (autoload 'ogonek-prefixify-region \"ogonek\") 240 (autoload 'ogonek-deprefixify-region \"ogonek\") 241 242 The most frequent function calls can be abbreviated and assigned to 243 keyboard keys. Here are a few practical examples: 244 245 (defun deprefixify-iso8859-2-region (start end) 246 (interactive \"*r\") 247 (ogonek-deprefixify-region start end ?/ \"iso8859-2\")) 248 (global-set-key \"\\C-cd\" 'deprefixify-iso8859-2-region) ; ctrl-c d 249 250 (defun mazovia-to-iso8859-2 (start end) 251 (interactive \"*r\") 252 (ogonek-recode-region start end \"mazovia\" \"iso8859-2\")) 253 (global-set-key \"\\C-cr\" 'mazovia-to-iso8859-2) ; ctrl-c r 254 255 (defun prefixify-iso8859-2-region (start end) 256 (interactive \"*r\") 257 (ogonek-prefixify-region start end \"iso8859-2\" ?/)) 258 (global-set-key \"\\C-cp\" 'prefixify-iso8859-2-region) ; ctrl-c p 259 260 Each recoding operation can be called off using the `undo' command.") 261 262(defun ogonek-how () 263 "Display `ogonek-information' in an auxiliary *recode-how* buffer." 264 (interactive "*") 265 (set-buffer (get-buffer-create " *ogonek-how*")) 266 (insert ogonek-information) 267 (switch-to-buffer " *ogonek-how*") 268 (goto-char (point-min))) 269 270;; ---- Variables keeping the suggested answers to dialogue questions ----- 271(defvar ogonek-encoding-choices 272 (cons 'choice 273 (mapcar (lambda (x) (list 'const (car x))) 274 ogonek-name-encoding-alist)) 275 "List of ogonek encodings. Used only for customization.") 276(defcustom ogonek-from-encoding "iso8859-2" 277 "*Encoding in the source file of recoding." 278 :type ogonek-encoding-choices 279 :group 'ogonek) 280(defcustom ogonek-to-encoding "ascii" 281 "*Encoding in the target file of recoding." 282 :type ogonek-encoding-choices 283 :group 'ogonek) 284(defcustom ogonek-prefix-char ?/ 285 "*Prefix character for prefix encodings." 286 :type 'character 287 :group 'ogonek) 288(defcustom ogonek-prefix-from-encoding "iso8859-2" 289 "*Encoding in the source file subject to prefixifation." 290 :type ogonek-encoding-choices 291 :group 'ogonek) 292(defcustom ogonek-prefix-to-encoding "iso8859-2" 293 "*Encoding in the target file subject to deprefixifation." 294 :type ogonek-encoding-choices 295 :group 'ogonek) 296 297;; ---- Auxiliary functions for reading parameters in interactive mode ---- 298 299(defun ogonek-read-encoding (prompt default-name-var) 300 "Read encoding name with completion based on `ogonek-name-encoding-alist'. 301Store the name in the parameter-variable DEFAULT-NAME-VAR. 302PROMPT is a string to be shown when the user is asked for a name." 303 (let ((encoding 304 (completing-read 305 (format "%s (default %s): " prompt (eval default-name-var)) 306 ogonek-name-encoding-alist nil t))) 307 ;; change the default name to the one just read 308 (set default-name-var 309 (if (string= encoding "") (eval default-name-var) encoding)) 310 ;; return the new default as the name you read 311 (eval default-name-var))) 312 313(defun ogonek-read-prefix (prompt default-prefix-var) 314 "Read a prefix character for prefix notation. 315The result is stored in the variable DEFAULT-PREFIX-VAR. 316PROMPT is a string to be shown when the user is asked for a new prefix." 317 (let ((prefix-string 318 (read-string 319 (format "%s (default %s): " prompt 320 (char-to-string (eval default-prefix-var)))))) 321 (if (> (length prefix-string) 1) 322 (error "! Only one character expected") 323 ;; set the default prefix character to the one just read 324 (set default-prefix-var 325 (if (string= prefix-string "") 326 (eval default-prefix-var) 327 (string-to-char prefix-string))) 328 ;; the new default prefix is the function's result: 329 (eval default-prefix-var)))) 330 331(defun ogonek-lookup-encoding (encoding) 332 "Pick up an association for ENCODING in `ogonek-name-encoding-alist'. 333Before returning a result test whether the string ENCODING is in 334the list `ogonek-name-encoding-alist'" 335 (let ((code-list (assoc encoding ogonek-name-encoding-alist))) 336 (if (null code-list) 337 (error "! Name `%s' not known in `ogonek-name-encoding-alist'" 338 encoding) 339 (cdr code-list)))) 340 341;; ---- An auxiliary function for zipping two lists of equal length ---- 342 343(defun ogonek-zip-lists (xs ys) 344 "Build a list of pairs from lists XS and YS of the same length." 345 (let ((pairs nil)) 346 (while xs 347 (setq pairs (cons (cons (car xs) (car ys)) pairs)) 348 (setq xs (cdr xs)) 349 (setq ys (cdr ys))) 350 ;; `pairs' are the function's result 351 pairs)) 352 353;; ---- An auxiliary function building a one-to-one recoding table ----- 354 355(defun ogonek-build-table (recoding-pairs) 356 "Build a table required by Emacs's `translate-region' function. 357RECODING-PAIRS is a list of character pairs for which recoding 358is not an identity. 359By using the built-in `translate-region' function 360we gain better performance compared to converting characters 361by a hand-written routine as it is done for prefix encodings." 362 (let ((table (make-string 256 0)) 363 (i 0)) 364 (while (< i 256) 365 (aset table i i) 366 (setq i (1+ i))) 367 ;; make changes in `table' according to `recoding-pairs' 368 (while recoding-pairs 369 (aset table (car (car recoding-pairs)) (cdr (car recoding-pairs))) 370 (setq recoding-pairs (cdr recoding-pairs))) 371 ;; return the table just built 372 table)) 373 374;; ---- Commands for one-to-one recoding ------------------------------- 375 376(defun ogonek-recode-region (start end from-encoding to-encoding) 377 "Recode text in a marked region in one-to-one manner. 378When called interactively ask the user for the names of the FROM- 379and TO- encodings." 380 (interactive (progn (barf-if-buffer-read-only) 381 (list 382 (region-beginning) 383 (region-end) 384 (ogonek-read-encoding "From code" 'ogonek-from-encoding) 385 (ogonek-read-encoding "To code" 'ogonek-to-encoding)))) 386 (save-excursion 387 (translate-region 388 start end 389 (ogonek-build-table 390 (ogonek-zip-lists 391 (ogonek-lookup-encoding from-encoding) 392 (ogonek-lookup-encoding to-encoding)))))) 393 394(defun ogonek-recode-buffer (from-encoding to-encoding) 395 "Call `ogonek-recode-region' on the entire buffer. 396When called interactively ask the user for the names of the FROM- 397and TO- encodings." 398 (interactive (progn (barf-if-buffer-read-only) 399 (list 400 (ogonek-read-encoding "From code" 'ogonek-from-encoding) 401 (ogonek-read-encoding "To code" 'ogonek-to-encoding)))) 402 (ogonek-recode-region 403 (point-min) (point-max) from-encoding to-encoding)) 404 405;; ---- Recoding with prefix notation ------------------------------- 406 407(defconst ogonek-prefix-code '(?A ?C ?E ?L ?N ?O ?S ?X ?Z 408 ?a ?c ?e ?l ?n ?o ?s ?x ?z)) 409 410(defun ogonek-prefixify-region (start end from-encoding prefix-char) 411 "In a region, replace FROM-encoded Polish characters with PREFIX pairs. 412A PREFIX pair generated consists of PREFIX-CHAR and the respective 413character listed in the `ogonek-prefix-code' constant. 414PREFIX-CHAR itself gets doubled." 415 (interactive (progn (barf-if-buffer-read-only) 416 (list 417 (region-beginning) 418 (region-end) 419 (ogonek-read-encoding "From code" 'ogonek-prefix-from-encoding) 420 (ogonek-read-prefix "Prefix character" 'ogonek-prefix-char)))) 421 (let* 422 ((from-code (ogonek-lookup-encoding from-encoding)) 423 (to-code ogonek-prefix-code) 424 (recoding-pairs ; `ogonek-prefix-char' added for doubling 425 (ogonek-zip-lists 426 (cons prefix-char from-code) 427 (cons prefix-char to-code)))) 428 (save-excursion 429 (goto-char start) 430 (while (< (point) end) 431 (let ((pair (assoc (following-char) recoding-pairs))) 432 (if (null pair) 433 ;; not a Polish character -- skip it 434 (forward-char 1) 435 ;; Polish character -- insert a prefix pair instead 436 (delete-char 1) 437 (insert ogonek-prefix-char) 438 (insert (cdr pair)) 439 ;; the region is now one character longer 440 (setq end (1+ end)))))))) 441 442(defun ogonek-prefixify-buffer (from-encoding prefix-char) 443 "Call `ogonek-prefixify-region' on the entire buffer." 444 (interactive (progn (barf-if-buffer-read-only) 445 (list 446 (ogonek-read-encoding "From code" 'ogonek-prefix-from-encoding) 447 (ogonek-read-prefix "Prefix character" 'ogonek-prefix-char)))) 448 (ogonek-prefixify-region 449 (point-min) (point-max) from-encoding prefix-char)) 450 451(defun ogonek-deprefixify-region (start end prefix-char to-encoding) 452 "In a region, replace PREFIX pairs with their corresponding TO-encodings. 453PREFIX-CHAR followed by a Polish character from the `ogonek-prefix-code' 454list is replaced with the corresponding TO-encoded character. A doubled 455PREFIX-CHAR gets replaced with a single one. A combination of PREFIX-CHAR 456followed by a non-Polish character, that is one not listed in the 457`ogonek-prefix-code' constant, is left unchanged." 458 (interactive (progn (barf-if-buffer-read-only) 459 (list (region-beginning) 460 (region-end) 461 (ogonek-read-prefix 462 "Prefix character" 'ogonek-prefix-char) 463 (ogonek-read-encoding 464 "To code" 'ogonek-prefix-to-encoding)))) 465 (let* 466 ((from-code ogonek-prefix-code) 467 (to-code (ogonek-lookup-encoding to-encoding)) 468 (recoding-pairs 469 (ogonek-zip-lists 470 (cons prefix-char from-code) 471 (cons prefix-char to-code)))) 472 (save-excursion 473 (goto-char start) 474 (while (< (point) end) 475 (forward-char 1) 476 (if (or (not (= (preceding-char) prefix-char)) (= (point) end)) 477 ;; non-prefix character or the end-of-region -- do nothing 478 () 479 ;; now, we can check the next character 480 (let ((pair (assoc (following-char) recoding-pairs))) 481 (if (null pair) 482 ;; `following-char' is not a Polish character nor it is 483 ;; `prefix-char' since the one is among `recoding-pairs' 484 (forward-char 1) 485 ;; else prefix followed by a Polish character has been found 486 ;; replace it by the corresponding Polish character 487 (backward-char 1) 488 (delete-char 2) 489 (insert (cdr pair)) 490 ;; the region got shorter by one character 491 (setq end (1- end))))))))) 492 493(defun ogonek-deprefixify-buffer (prefix-char to-encoding) 494 "Call `ogonek-deprefixify-region' on the entire buffer." 495 (interactive (progn (barf-if-buffer-read-only) 496 (list 497 (ogonek-read-prefix "Prefix character" 'ogonek-prefix-char) 498 (ogonek-read-encoding "To code" 'ogonek-prefix-to-encoding)))) 499 (ogonek-deprefixify-region 500 (point-min) (point-max) prefix-char to-encoding)) 501 502(provide 'ogonek) 503 504;;; arch-tag: 672d7744-28ac-412b-965e-06a27e50d1d7 505;;; ogonek.el ends here 506