1;;; european.el --- support for European languages -*- coding: iso-2022-7bit; -*- 2 3;; Copyright (C) 1997, 1998, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007 4;; Free Software Foundation, Inc. 5;; Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 6;; 2005, 2006, 2007 7;; National Institute of Advanced Industrial Science and Technology (AIST) 8;; Registration Number H14PRO021 9 10;; Keywords: multilingual, European 11 12;; This file is part of GNU Emacs. 13 14;; GNU Emacs is free software; you can redistribute it and/or modify 15;; it under the terms of the GNU General Public License as published by 16;; the Free Software Foundation; either version 2, or (at your option) 17;; any later version. 18 19;; GNU Emacs is distributed in the hope that it will be useful, 20;; but WITHOUT ANY WARRANTY; without even the implied warranty of 21;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 22;; GNU General Public License for more details. 23 24;; You should have received a copy of the GNU General Public License 25;; along with GNU Emacs; see the file COPYING. If not, write to the 26;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 27;; Boston, MA 02110-1301, USA. 28 29;;; Commentary: 30 31;; For European scripts, all the ISO Latin character sets are 32;; supported, along with various others. 33 34;;; Code: 35 36;; Latin-1 (ISO-8859-1) 37 38(set-language-info-alist 39 "Latin-1" '((charset ascii latin-iso8859-1) 40 (coding-system iso-latin-1) 41 (coding-priority iso-latin-1 windows-1252) 42 (nonascii-translation . latin-iso8859-1) 43 (unibyte-syntax . "latin-1") 44 (unibyte-display . iso-latin-1) 45 (input-method . "latin-1-prefix") 46 (sample-text 47 . "Hello, Hej, Tere, Hei, Bonjour, Gr,A|_(B Gott, Ciao, ,A!(BHola!") 48 (documentation . "\ 49This language environment is a generic one for the Latin-1 (ISO-8859-1) 50character set which supports the following European languages: 51 Albanian, Basque, Breton, Catalan, Danish, Dutch, English, Faeroese, 52 Finnish, French (with restrictions -- see Latin-9), Frisian, Galician, 53 German, Greenlandic, Icelandic, Irish Gaelic (new orthography), 54 Italian, Latin, Luxemburgish, Norwegian, Portuguese, Rhaeto-Romanic, 55 Scottish Gaelic, Spanish, and Swedish. 56We also have specific language environments for the following languages: 57 For Dutch, \"Dutch\". 58 For German, \"German\". 59 For French, \"French\". 60 For Italian, \"Italian\". 61 For Slovenian, \"Slovenian\". 62 For Spanish, \"Spanish\". 63 64Latin-1 also covers several written languages outside Europe, including 65Indonesian/Malay, Tagalog (Philippines), Swahili and Afrikaans.")) 66 '("European")) 67 68(eval-and-compile 69 (setq 70 non-iso-charset-alist 71 (cp-make-coding-system 72 windows-1252 73 [?\$,1tL(B nil ?\$,1rz(B ?\$,1!R(B ?\$,1r~(B ?\$,1s&(B ?\$,1s (B ?\$,1s!(B ?\$,1$f(B ?\$,1s0(B ?\$,1! (B ?\$,1s9(B ?\$,1 r(B nil ?\$,1!=(B nil nil 74 ?\$,1rx(B ?\$,1ry(B ?\$,1r|(B ?\$,1r}(B ?\$,1s"(B ?\$,1rs(B ?\$,1rt(B ?\$,1$|(B ?\$,1ub(B ?\$,1!!(B ?\$,1s:(B ?\$,1 s(B nil ?\$,1!>(B ?\$,1!8(B ?\,A (B ?\,A!(B 75 ?\,A"(B ?\,A#(B ?\,A$(B ?\,A%(B ?\,A&(B ?\,A'(B ?\,A((B ?\,A)(B ?\,A*(B ?\,A+(B ?\,A,(B ?\,A-(B ?\,A.(B ?\,A/(B ?\,A0(B ?\,A1(B ?\,A2(B 76 ?\,A3(B ?\,A4(B ?\,A5(B ?\,A6(B ?\,A7(B ?\,A8(B ?\,A9(B ?\,A:(B ?\,A;(B ?\,A<(B ?\,A=(B ?\,A>(B ?\,A?(B ?\,A@(B ?\,AA(B ?\,AB(B ?\,AC(B 77 ?\,AD(B ?\,AE(B ?\,AF(B ?\,AG(B ?\,AH(B ?\,AI(B ?\,AJ(B ?\,AK(B ?\,AL(B ?\,AM(B ?\,AN(B ?\,AO(B ?\,AP(B ?\,AQ(B ?\,AR(B ?\,AS(B ?\,AT(B 78 ?\,AU(B ?\,AV(B ?\,AW(B ?\,AX(B ?\,AY(B ?\,AZ(B ?\,A[(B ?\,A\(B ?\,A](B ?\,A^(B ?\,A_(B ?\,A`(B ?\,Aa(B ?\,Ab(B ?\,Ac(B ?\,Ad(B ?\,Ae(B 79 ?\,Af(B ?\,Ag(B ?\,Ah(B ?\,Ai(B ?\,Aj(B ?\,Ak(B ?\,Al(B ?\,Am(B ?\,An(B ?\,Ao(B ?\,Ap(B ?\,Aq(B ?\,Ar(B ?\,As(B ?\,At(B ?\,Au(B ?\,Av(B 80 ?\,Aw(B ?\,Ax(B ?\,Ay(B ?\,Az(B ?\,A{(B ?\,A|(B ?\,A}(B ?\,A~(B ?\,A(B]))) 81 82(define-coding-system-alias 'cp1252 'windows-1252) 83 84 85;; Latin-2 (ISO-8859-2) 86 87(make-coding-system 88 'iso-latin-2 2 ?2 89 "ISO 2022 based 8-bit encoding for Latin-2 (MIME:ISO-8859-2)." 90 '(ascii latin-iso8859-2 nil nil 91 nil nil nil nil nil nil nil nil nil nil nil t) 92 '((safe-charsets ascii latin-iso8859-2) 93 (mime-charset . iso-8859-2))) 94 95(define-coding-system-alias 'iso-8859-2 'iso-latin-2) 96(define-coding-system-alias 'latin-2 'iso-latin-2) 97 98(set-language-info-alist 99 "Latin-2" '((charset ascii latin-iso8859-2) 100 (coding-system iso-latin-2) 101 (coding-priority iso-latin-2) 102 (nonascii-translation . latin-iso8859-2) 103 (unibyte-syntax . "latin-2") 104 (unibyte-display . iso-latin-2) 105 (input-method . "latin-2-prefix") 106 (documentation . "\ 107This language environment is a generic one for the Latin-2 (ISO-8859-2) 108character set which supports the following languages: 109 Albanian, Czech, English, German, Hungarian, Polish, Romanian, 110 Serbo-Croatian or Croatian, Slovak, Slovene, Sorbian (upper and lower), 111 and Swedish. 112We also have specific language environments for the following languages: 113 For Czech, \"Czech\". 114 For Croatian, \"Croatian\". 115 For Romanian, \"Romanian\". 116 For Slovak, \"Slovak\".")) 117 '("European")) 118 119 120;; Latin-3 (ISO-8859-3) 121 122(make-coding-system 123 'iso-latin-3 2 ?3 124 "ISO 2022 based 8-bit encoding for Latin-3 (MIME:ISO-8859-3)." 125 '(ascii latin-iso8859-3 nil nil 126 nil nil nil nil nil nil nil nil nil nil nil t) 127 '((safe-charsets ascii latin-iso8859-3) 128 (mime-charset . iso-8859-3))) 129 130(define-coding-system-alias 'iso-8859-3 'iso-latin-3) 131(define-coding-system-alias 'latin-3 'iso-latin-3) 132 133(set-language-info-alist 134 "Latin-3" '((charset ascii latin-iso8859-3) 135 (coding-system iso-latin-3) 136 (coding-priority iso-latin-3) 137 (nonascii-translation . latin-iso8859-3) 138 (unibyte-syntax . "latin-3") 139 (unibyte-display . iso-latin-3) 140 (input-method . "latin-3-prefix") 141 (documentation . "\ 142These languages are supported with the Latin-3 (ISO-8859-3) character set: 143 Afrikaans, Catalan, Dutch, English, Esperanto, French, Galician, 144 German, Italian, Maltese, Spanish, and Turkish.")) 145 '("European")) 146 147 148;; Latin-4 (ISO-8859-4) 149 150(make-coding-system 151 'iso-latin-4 2 ?4 152 "ISO 2022 based 8-bit encoding for Latin-4 (MIME:ISO-8859-4)." 153 '(ascii latin-iso8859-4 nil nil 154 nil nil nil nil nil nil nil nil nil nil nil t) 155 '((safe-charsets ascii latin-iso8859-4) 156 (mime-charset . iso-8859-4))) 157 158(define-coding-system-alias 'iso-8859-4 'iso-latin-4) 159(define-coding-system-alias 'latin-4 'iso-latin-4) 160 161(set-language-info-alist 162 "Latin-4" '((charset ascii latin-iso8859-4) 163 (coding-system iso-8859-4) 164 (coding-priority iso-8859-4) 165 (nonascii-translation . latin-iso8859-4) 166 (unibyte-syntax . "latin-4") 167 (unibyte-display . iso-8859-4) 168 (input-method . "latin-4-postfix") 169 (documentation . "\ 170These languages are supported with the Latin-4 (ISO-8859-4) character set: 171 Danish, English, Estonian, Finnish, German, Greenlandic, Lappish, 172 Latvian, Lithuanian, and Norwegian.")) 173 '("European")) 174 175 176;; Latin-5 (ISO-8859-9) 177 178(make-coding-system 179 'iso-latin-5 2 ?9 180 "ISO 2022 based 8-bit encoding for Latin-5 (MIME:ISO-8859-9)." 181 '(ascii latin-iso8859-9 nil nil 182 nil nil nil nil nil nil nil nil nil nil nil t) 183 '((safe-charsets ascii latin-iso8859-9) 184 (mime-charset . iso-8859-9))) 185 186(define-coding-system-alias 'iso-8859-9 'iso-latin-5) 187(define-coding-system-alias 'latin-5 'iso-latin-5) 188 189(set-language-info-alist 190 "Latin-5" '((charset ascii latin-iso8859-9) 191 (coding-system iso-latin-5) 192 (coding-priority iso-latin-5) 193 (nonascii-translation . latin-iso8859-9) 194 (unibyte-syntax . "latin-5") 195 (unibyte-display . iso-latin-5) 196 (input-method . "latin-5-postfix") 197 (documentation . "Support for Turkish language.")) 198 '("European")) 199 200 201;; Latin-8 (ISO-8859-14) 202 203(make-coding-system 204 'iso-latin-8 2 ?W ; `W' for `Welsh', since `C' 205 ; for `Celtic' is taken. 206 "ISO 2022 based 8-bit encoding for Latin-8 (MIME:ISO-8859-14)." 207 '(ascii latin-iso8859-14 nil nil 208 nil nil nil nil nil nil nil nil nil nil nil t t) 209 '((safe-charsets ascii latin-iso8859-14) 210 (mime-charset . iso-8859-14))) 211 212(define-coding-system-alias 'iso-8859-14 'iso-latin-8) 213(define-coding-system-alias 'latin-8 'iso-latin-8) 214 215(set-language-info-alist 216 "Latin-8" '((charset ascii latin-iso8859-14) 217 (coding-system iso-latin-8) 218 (coding-priority iso-latin-8) 219 (nonascii-translation . latin-iso8859-14) 220 (unibyte-syntax . "latin-8") 221 (unibyte-display . iso-latin-8) 222 (input-method . "latin-8-prefix") 223 ;; Fixme: Welsh/Ga{e}lic greetings 224 (sample-text . ",_"(B ,_p(B ,_^(B") 225 (documentation . "\ 226This language environment is a generic one for the Latin-8 (ISO-8859-14) 227character set which supports the Celtic languages, including those not 228covered by other ISO-8859 character sets: 229 Welsh, Manx Gaelic and Irish Gaelic (old orthography).")) 230 '("European")) 231 232;; Latin-9 (ISO-8859-15) 233 234(make-coding-system 235 'iso-latin-9 2 ?0 ; `0' for `Latin-0' 236 "ISO 2022 based 8-bit encoding for Latin-9 (MIME:ISO-8859-15)." 237 '(ascii latin-iso8859-15 nil nil 238 nil nil nil nil nil nil nil nil nil nil nil t t) 239 '((safe-charsets ascii latin-iso8859-15) 240 (mime-charset . iso-8859-15))) 241 242(define-coding-system-alias 'iso-8859-15 'iso-latin-9) 243(define-coding-system-alias 'latin-9 'iso-latin-9) 244(define-coding-system-alias 'latin-0 'iso-latin-9) 245 246(set-language-info-alist 247 "Latin-9" '((charset ascii latin-iso8859-15) 248 (coding-system iso-latin-9) 249 (coding-priority iso-latin-9) 250 (nonascii-translation . latin-iso8859-15) 251 (unibyte-syntax . "latin-9") 252 (unibyte-display . iso-latin-9) 253 (input-method . "latin-9-prefix") 254 (sample-text 255 . "AVE. ,b&(48<=>(B ,b$(B") 256 (documentation . "\ 257This language environment is a generic one for the Latin-9 (ISO-8859-15) 258character set which supports the same languages as Latin-1 with the 259addition of the Euro sign and some additional French and Finnish letters. 260Latin-9 is sometimes nicknamed `Latin-0'.")) 261 '("European")) 262 263(set-language-info-alist 264 "Dutch" '((tutorial . "TUTORIAL.nl") 265 (charset ascii latin-iso8859-1) 266 (coding-system iso-latin-1 iso-latin-9) 267 (coding-priority iso-latin-1) 268 (nonascii-translation . latin-iso8859-1) 269 (unibyte-syntax . "latin-1") 270 (unibyte-display . iso-latin-1) 271 (input-method . "dutch") 272 (sample-text . "Er is een aantal manieren waarop je dit kan doen") 273 (documentation . "\ 274This language environment is almost the same as Latin-1, 275but it selects the Dutch tutorial and input method.")) 276 '("European")) 277 278(set-language-info-alist 279 "German" '((tutorial . "TUTORIAL.de") 280 (charset ascii latin-iso8859-1) 281 (coding-system iso-latin-1 iso-latin-9) 282 (coding-priority iso-latin-1 windows-1252) 283 (input-method . "german-postfix") 284 (nonascii-translation . latin-iso8859-1) 285 (unibyte-syntax . "latin-1") 286 (unibyte-display . iso-latin-1) 287 (sample-text . "\ 288German (Deutsch Nord) Guten Tag 289German (Deutsch S,A|(Bd) Gr,A|_(B Gott") 290 (documentation . "\ 291This language environment is almost the same as Latin-1, 292but sets the default input method to \"german-postfix\". 293Additionally, it selects the German tutorial.")) 294 '("European")) 295 296(set-language-info-alist 297 "French" '((tutorial . "TUTORIAL.fr") 298 (charset ascii latin-iso8859-1) 299 (coding-system iso-latin-1 iso-latin-9) 300 (coding-priority iso-latin-1) 301 (nonascii-translation . latin-iso8859-1) 302 (unibyte-syntax . "latin-1") 303 (unibyte-display . iso-latin-1) 304 (input-method . "latin-1-prefix") 305 (sample-text . "French (Fran,Ag(Bais) Bonjour, Salut") 306 (documentation . "\ 307This language environment is almost the same as Latin-1, 308but it selects the French tutorial and input method.")) 309 '("European")) 310 311(set-language-info-alist 312 "Italian" '((tutorial . "TUTORIAL.it") 313 (charset ascii latin-iso8859-1) 314 (coding-system iso-latin-1 iso-latin-9) 315 (coding-priority iso-latin-1) 316 (nonascii-translation . latin-iso8859-1) 317 (unibyte-syntax . "latin-1") 318 (unibyte-display . iso-latin-1) 319 (input-method . "italian-postfix") 320 (sample-text . "Salve, ciao!") 321 (documentation . "\ 322This language environment is almost the same as Latin-1, 323but sets the default input method to \"italian-postfix\". 324Additionally, it selects the Italian tutorial.")) 325 '("European")) 326 327(set-language-info-alist 328 "Slovenian" '((charset . (ascii latin-iso8859-2)) 329 (coding-system . (iso-8859-2)) 330 (coding-priority . (iso-8859-2)) 331 (nonascii-translation . latin-iso8859-2) 332 (input-method . "slovenian") 333 (unibyte-syntax . "latin-2") 334 (unibyte-display . iso-8859-2) 335 (tutorial . "TUTORIAL.sl") 336 (sample-text . ",B.(Belimo vam uspe,B9(Ben dan!") 337 (documentation . "\ 338This language environment is almost the same as Latin-2, 339but it selects the Slovenian tutorial and input method.")) 340 '("European")) 341 342(set-language-info-alist 343 "Spanish" '((tutorial . "TUTORIAL.es") 344 (charset ascii latin-iso8859-1) 345 (coding-system iso-latin-1 iso-latin-9) 346 (coding-priority iso-latin-1) 347 (input-method . "spanish-postfix") 348 (nonascii-translation . latin-iso8859-1) 349 (unibyte-syntax . "latin-1") 350 (unibyte-display . iso-latin-1) 351 (sample-text . "Spanish (Espa,Aq(Bol) ,A!(BHola!") 352 (documentation . "\ 353This language environment is almost the same as Latin-1, 354but it sets the default input method to \"spanish-postfix\", 355and it selects the Spanish tutorial.")) 356 '("European")) 357 358;; For Turkish, the character set ISO-8859-9 (Latin-5) is used. But, 359;; before the introduction of ISO-8859-9 in 1988, ISO-8859-3 (Latin-3) 360;; was used for Turkish. Those who use Latin-3 for Turkish should use 361;; "Latin-3" language environment. 362 363(set-language-info-alist 364 "Turkish" '((charset ascii latin-iso8859-9) 365 (coding-system iso-latin-5 iso-latin-3) 366 (coding-priority iso-latin-5) 367 (nonascii-translation . latin-iso8859-9) 368 (unibyte-syntax . "latin-5") 369 (unibyte-display . iso-latin-5) 370 (input-method . "turkish-postfix") 371 (sample-text . "Turkish (T,M|(Brk,Mg(Be) Merhaba") 372 (documentation . t) 373 (setup-function . turkish-case-conversion-enable) 374 (exit-function . turkish-case-conversion-disable))) 375 376(defun turkish-case-conversion-enable () 377 "Set up Turkish case conversion of `i' and `I' into `$,1 P(B' and `$,1 Q(B'." 378 (let ((table (standard-case-table))) 379 (set-case-syntax-pair ?$,1 P(B ?i table) 380 (set-case-syntax-pair ?I ?$,1 Q(B table))) 381 382(defun turkish-case-conversion-disable () 383 "Set up normal (non-Turkish) case conversion of `i' into `I'." 384 (let ((table (standard-case-table))) 385 (set-case-syntax-pair ?I ?i table) 386 (set-case-syntax ?$,1 P(B "w" table) 387 (set-case-syntax ?$,1 Q(B "w" table))) 388 389;; Polish ISO 8859-2 environment. 390;; Maintainer: Wlodek Bzyl <matwb@univ.gda.pl> 391;; Keywords: multilingual, Polish 392 393(set-language-info-alist 394 "Polish" '((charset . (ascii latin-iso8859-2)) 395 (coding-system . (iso-8859-2)) 396 (coding-priority . (iso-8859-2)) 397 (input-method . "polish-slash") 398 (nonascii-translation . latin-iso8859-2) 399 (unibyte-syntax . "latin-2") 400 (unibyte-display . iso-8859-2) 401 (tutorial . "TUTORIAL.pl") 402 (sample-text . "P,Bs(Bjd,B<(B, ki,Bq(B-,B?(Be t,Bj(B chmurno,B6f(B w g,B31(Bb flaszy") 403 (documentation . t)) 404 '("European")) 405 406(set-language-info-alist 407 "Welsh" `((coding-system utf-8 latin-8) ; the input method is Unicode-based 408 (coding-priority utf-8 latin-8) 409 (nonascii-translation . latin-iso8859-14) 410 (input-method . "welsh") 411 (documentation . "Support for Welsh, using Unicode.")) 412 '("European")) 413 414(set-language-info-alist 415 "Latin-6" `((coding-system latin-6) 416 (coding-priority latin-6) 417 (nonascii-translation . ,(get 'decode-iso-latin-6 'translation-table)) 418 (input-method . "latin-prefix") 419 (features code-pages) 420 (documentation . "Support for Latin-6.")) 421 '("European")) 422 423(set-language-info-alist 424 "Latin-7" `((coding-system latin-7) 425 (coding-priority latin-7) 426 (nonascii-translation . ,(get 'decode-iso-latin-7 427 'translation-table)) 428 (input-method . "latin-prefix") 429 (features code-pages) 430 (documentation . "Support for Latin-7, e.g. Latvian, Lithuanian.")) 431 '("European")) 432 433(set-language-info-alist 434 "Lithuanian" `((coding-system latin-7) 435 (coding-priority latin-7) 436 (input-method . "lithuanian-keyboard") 437 (nonascii-translation . ,(get 'decode-iso-latin-7 438 'translation-table)) 439 (features code-pages) 440 (documentation . "Support for Lithuanian.")) 441 '("European")) 442 443(set-language-info-alist 444 "Latvian" `((coding-system latin-7) 445 (coding-priority latin-7) 446 (input-method . "latvian-keyboard") 447 (nonascii-translation . ,(get 'decode-iso-latin-7 448 'translation-table)) 449 (features code-pages) 450 (documentation . "Support for Latvian.")) 451 '("European")) 452 453(set-language-info-alist 454 "Swedish" '((tutorial . "TUTORIAL.sv") 455 (charset ascii latin-iso8859-1) 456 (coding-system iso-latin-1) 457 (coding-priority iso-latin-1) 458 (nonascii-translation . latin-iso8859-1) 459 (unibyte-syntax . "latin-1") 460 (unibyte-display . iso-latin-1) 461 (sample-text . "Goddag Hej") 462 (documentation . "Support for Swedish")) 463 '("European")) 464 465(set-language-info-alist 466 "Croatian" '((charset . (ascii latin-iso8859-2)) 467 (coding-system . (iso-8859-2)) 468 (coding-priority . (iso-8859-2)) 469 (input-method . "croatian") 470 (nonascii-translation . latin-iso8859-2) 471 (unibyte-syntax . "latin-2") 472 (unibyte-display . iso-8859-2) 473 (documentation . "Support for Croatian with Latin-2 encoding.")) 474 '("European")) 475 476(set-language-info-alist 477 "Brazilian Portuguese" '((tutorial . "TUTORIAL.pt_BR") 478 (charset ascii latin-iso8859-1) 479 (coding-system iso-latin-1 iso-latin-9) 480 (coding-priority iso-latin-1) 481 (nonascii-translation . latin-iso8859-1) 482 (unibyte-syntax . "latin-1") 483 (unibyte-display . iso-latin-1) 484 (input-method . "latin-1-prefix") 485 (sample-text . "Oi") 486 (documentation . "Support for Brazilian Portuguese.")) 487 '("European")) 488 489(set-language-info-alist 490 "Esperanto" '((tutorial . "TUTORIAL.eo") 491 (charset ascii latin-iso8859-3) 492 (coding-system iso-latin-3) 493 (coding-priority iso-latin-3) 494 (nonascii-translation . latin-iso8859-3) 495 (unibyte-syntax . "latin-3") 496 (unibyte-display . iso-latin-3) 497 (input-method . "latin-3-prefix") 498 (documentation . "Support for Esperanto with ISO-8859-3 character set.")) 499 '("European")) 500 501 502;; Definitions for the Mac Roman character sets and coding system. 503;; The Mac Roman encoding uses all 128 code points in the range 128 to 504;; 255 for actual characters. Emacs decodes them to one of the 505;; following character sets. 506;; ascii, latin-iso8859-1, mule-unicode-0100-24ff, 507;; mule-unicode-2500-33ff, mule-unicode-e000-ffff 508 509(let 510 ((encoding-vector (make-vector 256 nil)) 511 (i 0) 512 (vec ;; mac-roman (128..255) -> UCS mapping 513 [ #x00C4 ;; 128:LATIN CAPITAL LETTER A WITH DIAERESIS 514 #x00C5 ;; 129:LATIN CAPITAL LETTER A WITH RING ABOVE 515 #x00C7 ;; 130:LATIN CAPITAL LETTER C WITH CEDILLA 516 #x00C9 ;; 131:LATIN CAPITAL LETTER E WITH ACUTE 517 #x00D1 ;; 132:LATIN CAPITAL LETTER N WITH TILDE 518 #x00D6 ;; 133:LATIN CAPITAL LETTER O WITH DIAERESIS 519 #x00DC ;; 134:LATIN CAPITAL LETTER U WITH DIAERESIS 520 #x00E1 ;; 135:LATIN SMALL LETTER A WITH ACUTE 521 #x00E0 ;; 136:LATIN SMALL LETTER A WITH GRAVE 522 #x00E2 ;; 137:LATIN SMALL LETTER A WITH CIRCUMFLEX 523 #x00E4 ;; 138:LATIN SMALL LETTER A WITH DIAERESIS 524 #x00E3 ;; 139:LATIN SMALL LETTER A WITH TILDE 525 #x00E5 ;; 140:LATIN SMALL LETTER A WITH RING ABOVE 526 #x00E7 ;; 141:LATIN SMALL LETTER C WITH CEDILLA 527 #x00E9 ;; 142:LATIN SMALL LETTER E WITH ACUTE 528 #x00E8 ;; 143:LATIN SMALL LETTER E WITH GRAVE 529 #x00EA ;; 144:LATIN SMALL LETTER E WITH CIRCUMFLEX 530 #x00EB ;; 145:LATIN SMALL LETTER E WITH DIAERESIS 531 #x00ED ;; 146:LATIN SMALL LETTER I WITH ACUTE 532 #x00EC ;; 147:LATIN SMALL LETTER I WITH GRAVE 533 #x00EE ;; 148:LATIN SMALL LETTER I WITH CIRCUMFLEX 534 #x00EF ;; 149:LATIN SMALL LETTER I WITH DIAERESIS 535 #x00F1 ;; 150:LATIN SMALL LETTER N WITH TILDE 536 #x00F3 ;; 151:LATIN SMALL LETTER O WITH ACUTE 537 #x00F2 ;; 152:LATIN SMALL LETTER O WITH GRAVE 538 #x00F4 ;; 153:LATIN SMALL LETTER O WITH CIRCUMFLEX 539 #x00F6 ;; 154:LATIN SMALL LETTER O WITH DIAERESIS 540 #x00F5 ;; 155:LATIN SMALL LETTER O WITH TILDE 541 #x00FA ;; 156:LATIN SMALL LETTER U WITH ACUTE 542 #x00F9 ;; 157:LATIN SMALL LETTER U WITH GRAVE 543 #x00FB ;; 158:LATIN SMALL LETTER U WITH CIRCUMFLEX 544 #x00FC ;; 159:LATIN SMALL LETTER U WITH DIAERESIS 545 #x2020 ;; 160:DAGGER 546 #x00B0 ;; 161:DEGREE SIGN 547 #x00A2 ;; 162:CENT SIGN 548 #x00A3 ;; 163:POUND SIGN 549 #x00A7 ;; 164:SECTION SIGN 550 #x2022 ;; 165:BULLET 551 #x00B6 ;; 166:PILCROW SIGN 552 #x00DF ;; 167:LATIN SMALL LETTER SHARP S 553 #x00AE ;; 168:REGISTERED SIGN 554 #x00A9 ;; 169:COPYRIGHT SIGN 555 #x2122 ;; 170:TRADE MARK SIGN 556 #x00B4 ;; 171:ACUTE ACCENT 557 #x00A8 ;; 172:DIAERESIS 558 #x2260 ;; 173:NOT EQUAL TO 559 #x00C6 ;; 174:LATIN CAPITAL LETTER AE 560 #x00D8 ;; 175:LATIN CAPITAL LETTER O WITH STROKE 561 #x221E ;; 176:INFINITY 562 #x00B1 ;; 177:PLUS-MINUS SIGN 563 #x2264 ;; 178:LESS-THAN OR EQUAL TO 564 #x2265 ;; 179:GREATER-THAN OR EQUAL TO 565 #x00A5 ;; 180:YEN SIGN 566 #x00B5 ;; 181:MICRO SIGN 567 #x2202 ;; 182:PARTIAL DIFFERENTIAL 568 #x2211 ;; 183:N-ARY SUMMATION 569 #x220F ;; 184:N-ARY PRODUCT 570 #x03C0 ;; 185:GREEK SMALL LETTER PI 571 #x222B ;; 186:INTEGRAL 572 #x00AA ;; 187:FEMININE ORDINAL INDICATOR 573 #x00BA ;; 188:MASCULINE ORDINAL INDICATOR 574 #x03A9 ;; 189:GREEK CAPITAL LETTER OMEGA 575 #x00E6 ;; 190:LATIN SMALL LETTER AE 576 #x00F8 ;; 191:LATIN SMALL LETTER O WITH STROKE 577 #x00BF ;; 192:INVERTED QUESTION MARK 578 #x00A1 ;; 193:INVERTED EXCLAMATION MARK 579 #x00AC ;; 194:NOT SIGN 580 #x221A ;; 195:SQUARE ROOT 581 #x0192 ;; 196:LATIN SMALL LETTER F WITH HOOK 582 #x2248 ;; 197:ALMOST EQUAL TO 583 #x2206 ;; 198:INCREMENT 584 #x00AB ;; 199:LEFT-POINTING DOUBLE ANGLE QUOTATION MARK 585 #x00BB ;; 200:RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK 586 #x2026 ;; 201:HORIZONTAL ELLIPSIS 587 #x00A0 ;; 202:NO-BREAK SPACE 588 #x00C0 ;; 203:LATIN CAPITAL LETTER A WITH GRAVE 589 #x00C3 ;; 204:LATIN CAPITAL LETTER A WITH TILDE 590 #x00D5 ;; 205:LATIN CAPITAL LETTER O WITH TILDE 591 #x0152 ;; 206:LATIN CAPITAL LIGATURE OE 592 #x0153 ;; 207:LATIN SMALL LIGATURE OE 593 #x2013 ;; 208:EN DASH 594 #x2014 ;; 209:EM DASH 595 #x201C ;; 210:LEFT DOUBLE QUOTATION MARK 596 #x201D ;; 211:RIGHT DOUBLE QUOTATION MARK 597 #x2018 ;; 212:LEFT SINGLE QUOTATION MARK 598 #x2019 ;; 213:RIGHT SINGLE QUOTATION MARK 599 #x00F7 ;; 214:DIVISION SIGN 600 #x25CA ;; 215:LOZENGE 601 #x00FF ;; 216:LATIN SMALL LETTER Y WITH DIAERESIS 602 #x0178 ;; 217:LATIN CAPITAL LETTER Y WITH DIAERESIS 603 #x2044 ;; 218:FRACTION SLASH 604 #x20AC ;; 219:EURO SIGN 605 #x2039 ;; 220:SINGLE LEFT-POINTING ANGLE QUOTATION MARK 606 #x203A ;; 221:SINGLE RIGHT-POINTING ANGLE QUOTATION MARK 607 #xFB01 ;; 222:LATIN SMALL LIGATURE FI 608 #xFB02 ;; 223:LATIN SMALL LIGATURE FL 609 #x2021 ;; 224:DOUBLE DAGGER 610 #x00B7 ;; 225:MIDDLE DOT 611 #x201A ;; 226:SINGLE LOW-9 QUOTATION MARK 612 #x201E ;; 227:DOUBLE LOW-9 QUOTATION MARK 613 #x2030 ;; 228:PER MILLE SIGN 614 #x00C2 ;; 229:LATIN CAPITAL LETTER A WITH CIRCUMFLEX 615 #x00CA ;; 230:LATIN CAPITAL LETTER E WITH CIRCUMFLEX 616 #x00C1 ;; 231:LATIN CAPITAL LETTER A WITH ACUTE 617 #x00CB ;; 232:LATIN CAPITAL LETTER E WITH DIAERESIS 618 #x00C8 ;; 233:LATIN CAPITAL LETTER E WITH GRAVE 619 #x00CD ;; 234:LATIN CAPITAL LETTER I WITH ACUTE 620 #x00CE ;; 235:LATIN CAPITAL LETTER I WITH CIRCUMFLEX 621 #x00CF ;; 236:LATIN CAPITAL LETTER I WITH DIAERESIS 622 #x00CC ;; 237:LATIN CAPITAL LETTER I WITH GRAVE 623 #x00D3 ;; 238:LATIN CAPITAL LETTER O WITH ACUTE 624 #x00D4 ;; 239:LATIN CAPITAL LETTER O WITH CIRCUMFLEX 625 #xF8FF ;; 240:Apple logo 626 #x00D2 ;; 241:LATIN CAPITAL LETTER O WITH GRAVE 627 #x00DA ;; 242:LATIN CAPITAL LETTER U WITH ACUTE 628 #x00DB ;; 243:LATIN CAPITAL LETTER U WITH CIRCUMFLEX 629 #x00D9 ;; 244:LATIN CAPITAL LETTER U WITH GRAVE 630 #x0131 ;; 245:LATIN SMALL LETTER DOTLESS I 631 #x02C6 ;; 246:MODIFIER LETTER CIRCUMFLEX ACCENT 632 #x02DC ;; 247:SMALL TILDE 633 #x00AF ;; 248:MACRON 634 #x02D8 ;; 249:BREVE 635 #x02D9 ;; 250:DOT ABOVE 636 #x02DA ;; 251:RING ABOVE 637 #x00B8 ;; 252:CEDILLA 638 #x02DD ;; 253:DOUBLE ACUTE ACCENT 639 #x02DB ;; 254:OGONEK 640 #x02C7 ;; 255:CARON 641 ]) 642 translation-table) 643 (while (< i 128) 644 (aset encoding-vector i i) 645 (setq i (1+ i))) 646 (while (< i 256) 647 (aset encoding-vector i 648 (decode-char 'ucs (aref vec (- i 128)))) 649 (setq i (1+ i))) 650 (setq translation-table 651 (make-translation-table-from-vector encoding-vector)) 652 (define-translation-table 'mac-roman-decoder translation-table) 653 (define-translation-table 'mac-roman-encoder 654 (char-table-extra-slot translation-table 0))) 655 656(define-ccl-program decode-mac-roman 657 `(4 658 ((loop 659 (read r1) 660 (if (r1 < 128) ;; ASCII 661 (r0 = ,(charset-id 'ascii)) 662 (if (r1 < 160) 663 (r0 = ,(charset-id 'eight-bit-control)) 664 (r0 = ,(charset-id 'eight-bit-graphic)))) 665 (translate-character mac-roman-decoder r0 r1) 666 (write-multibyte-character r0 r1) 667 (repeat)))) 668 "CCL program to decode Mac Roman") 669 670(define-ccl-program encode-mac-roman 671 `(1 672 ((loop 673 (read-multibyte-character r0 r1) 674 (translate-character ucs-mule-to-mule-unicode r0 r1) 675 (translate-character mac-roman-encoder r0 r1) 676 (if (r0 != ,(charset-id 'ascii)) 677 (if (r0 != ,(charset-id 'eight-bit-graphic)) 678 (if (r0 != ,(charset-id 'eight-bit-control)) 679 (r1 = ??)))) 680 (write-repeat r1)))) 681 "CCL program to encode Mac Roman") 682 683(make-coding-system 684 'mac-roman 4 ?M 685 "Mac Roman Encoding (MIME:MACINTOSH)." 686 '(decode-mac-roman . encode-mac-roman) 687 (list (cons 'safe-chars (get 'mac-roman-encoder 'translation-table)) 688 '(valid-codes (0 . 255)) 689 '(mime-charset . macintosh))) ; per IANA, rfc1345 690 691(defconst diacritic-composition-pattern "\\C^\\c^+") 692 693(defun diacritic-compose-region (beg end) 694 "Compose diacritic characters in the region. 695When called from a program, expects two arguments, 696positions (integers or markers) specifying the region." 697 (interactive "r") 698 (save-restriction 699 (narrow-to-region beg end) 700 (goto-char (point-min)) 701 (while (re-search-forward diacritic-composition-pattern nil t) 702 (compose-region (match-beginning 0) (match-end 0))))) 703 704(defun diacritic-compose-string (string) 705 "Compose diacritic characters in STRING and return the resulting string." 706 (let ((idx 0)) 707 (while (setq idx (string-match diacritic-composition-pattern string idx)) 708 (compose-string string idx (match-end 0)) 709 (setq idx (match-end 0)))) 710 string) 711 712(defun diacritic-compose-buffer () 713 "Compose diacritic characters in the current buffer." 714 (interactive) 715 (diacritic-compose-region (point-min) (point-max))) 716 717(defun diacritic-post-read-conversion (len) 718 (diacritic-compose-region (point) (+ (point) len)) 719 len) 720 721(defun diacritic-composition-function (from to pattern &optional string) 722 "Compose diacritic text in the region FROM and TO. 723The text matches the regular expression PATTERN. 724Optional 4th argument STRING, if non-nil, is a string containing text 725to compose. 726 727The return value is the number of composed characters." 728 (when (< (1+ from) to) 729 (if string 730 (compose-string string from to) 731 (compose-region from to)) 732 (- to from))) 733 734;; Register a function to compose Unicode diacrtics and marks. 735(let ((patterns '(("\\C^\\c^+" . diacritic-composition-function)))) 736 (let ((c #x300)) 737 (while (<= c #x362) 738 (aset composition-function-table (decode-char 'ucs c) patterns) 739 (setq c (1+ c))) 740 (setq c #x20d0) 741 (while (<= c #x20e3) 742 (aset composition-function-table (decode-char 'ucs c) patterns) 743 (setq c (1+ c))))) 744 745(provide 'european) 746 747;;; arch-tag: 9e018b12-fb02-4120-907b-9adeaf84b5c2 748;;; european.el ends here 749