1;;; cyrillic.el --- support for Cyrillic -*- coding: iso-2022-7bit; -*- 2 3;; Copyright (C) 1997, 1998, 2001, 2002, 2003, 2004, 2005, 2006, 2007 4;; Free Software Foundation, Inc. 5;; Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 6;; 2005, 2006, 2007 7;; National Institute of Advanced Industrial Science and Technology (AIST) 8;; Registration Number H14PRO021 9 10;; Author: Kenichi Handa <handa@etl.go.jp> 11;; Keywords: multilingual, Cyrillic, i18n 12 13;; This file is part of GNU Emacs. 14 15;; GNU Emacs is free software; you can redistribute it and/or modify 16;; it under the terms of the GNU General Public License as published by 17;; the Free Software Foundation; either version 2, or (at your option) 18;; any later version. 19 20;; GNU Emacs is distributed in the hope that it will be useful, 21;; but WITHOUT ANY WARRANTY; without even the implied warranty of 22;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 23;; GNU General Public License for more details. 24 25;; You should have received a copy of the GNU General Public License 26;; along with GNU Emacs; see the file COPYING. If not, write to the 27;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 28;; Boston, MA 02110-1301, USA. 29 30;;; Commentary: 31 32;; The character set ISO8859-5 is supported. KOI-8 and ALTERNATIVNYJ 33;; are converted to Unicode internally. See 34;; <URL:http://www.ecma.ch/ecma1/STAND/ECMA-113.HTM>. For more info 35;; on Cyrillic charsets, see 36;; <URL:http://czyborra.com/charsets/cyrillic.html>. The KOI and 37;; Alternativnyj coding systems should live in code-pages.el, but 38;; they've always been preloaded and the coding system autoload 39;; mechanism didn't get accepted, so they have to stay here and 40;; duplicate code-pages stuff. 41 42;; Note that 8859-5 maps directly onto the Unicode Cyrillic block, 43;; apart from codepoints 160 (NBSP, c.f. U+0400), 173 (soft hyphen, 44;; c.f. U+04OD) and 253 (section sign, c.f U+045D). The KOI-8 and 45;; Alternativnyj coding systems encode both 8859-5 and Unicode. 46;; ucs-tables.el provides unification for cyrillic-iso-8bit. 47 48;; Customizing `utf-fragment-on-decoding' allows decoding characters 49;; from KOI and Alternativnyj into 8859-5 where that's possible. 50;; cyrillic-iso8859-5 characters take half as much space in the buffer 51;; as the mule-unicode-0100-24ff equivalents, though that's probably 52;; not normally a big deal. 53 54;;; Code: 55 56;; Cyrillic (general) 57 58;; ISO-8859-5 stuff 59 60(make-coding-system 61 'cyrillic-iso-8bit 2 ?5 62 "ISO 2022 based 8-bit encoding for Cyrillic script (MIME:ISO-8859-5)." 63 '(ascii cyrillic-iso8859-5 nil nil 64 nil nil nil nil nil nil nil nil nil nil nil t) 65 '((safe-charsets ascii cyrillic-iso8859-5) 66 (mime-charset . iso-8859-5))) 67 68(define-coding-system-alias 'iso-8859-5 'cyrillic-iso-8bit) 69 70(set-language-info-alist 71 "Cyrillic-ISO" '((charset cyrillic-iso8859-5) 72 (coding-system cyrillic-iso-8bit) 73 (coding-priority cyrillic-iso-8bit) 74 (input-method . "cyrillic-yawerty") ; fixme 75 (nonascii-translation . cyrillic-iso8859-5) 76 (unibyte-display . cyrillic-iso-8bit) 77 (features cyril-util) 78 (sample-text . "Russian (,L@caaZXY(B) ,L7T`PRabRcYbU(B!") 79 (documentation . "Support for Cyrillic ISO-8859-5.")) 80 '("Cyrillic")) 81 82;; KOI-8R stuff 83 84;; The mule-unicode portion of this is from 85;; http://www.unicode.org/Public/MAPPINGS/VENDORS/MISC/KOI8-R.TXT, 86;; which references RFC 1489. 87(defvar cyrillic-koi8-r-decode-table 88 [ 89 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 90 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 91 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 92 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 93 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 94 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 96 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 97 ;; 8859-5 plus Unicode 98 ?$,2 (B ?$,2 "(B ?$,2 ,(B ?$,2 0(B ?$,2 4(B ?$,2 8(B ?$,2 <(B ?$,2 D(B ?$,2 L(B ?$,2 T(B ?$,2 \(B ?$,2!@(B ?$,2!D(B ?$,2!H(B ?$,2!L(B ?$,2!P(B 99 ?$,2!Q(B ?$,2!R(B ?$,2!S(B ?$,1{ (B ?$,2!`(B ?$,1s"(B ?$,1x:(B ?$,1xh(B ?$,1y$(B ?$,1y%(B ?,L (B ?$,1{!(B ?,A0(B ?,A2(B ?,A7(B ?,Aw(B 100 ?$,2 p(B ?$,2 q(B ?$,2 r(B ?,Lq(B ?$,2 s(B ?$,2 t(B ?$,2 u(B ?$,2 v(B ?$,2 w(B ?$,2 x(B ?$,2 y(B ?$,2 z(B ?$,2 {(B ?$,2 |(B ?$,2 }(B ?$,2 ~(B 101 ?$,2 (B ?$,2! (B ?$,2!!(B ?,L!(B ?$,2!"(B ?$,2!#(B ?$,2!$(B ?$,2!%(B ?$,2!&(B ?$,2!'(B ?$,2!((B ?$,2!)(B ?$,2!*(B ?$,2!+(B ?$,2!,(B ?,A)(B 102 ?,Ln(B ?,LP(B ?,LQ(B ?,Lf(B ?,LT(B ?,LU(B ?,Ld(B ?,LS(B ?,Le(B ?,LX(B ?,LY(B ?,LZ(B ?,L[(B ?,L\(B ?,L](B ?,L^(B 103 ?,L_(B ?,Lo(B ?,L`(B ?,La(B ?,Lb(B ?,Lc(B ?,LV(B ?,LR(B ?,Ll(B ?,Lk(B ?,LW(B ?,Lh(B ?,Lm(B ?,Li(B ?,Lg(B ?,Lj(B 104 ?,LN(B ?,L0(B ?,L1(B ?,LF(B ?,L4(B ?,L5(B ?,LD(B ?,L3(B ?,LE(B ?,L8(B ?,L9(B ?,L:(B ?,L;(B ?,L<(B ?,L=(B ?,L>(B 105 ?,L?(B ?,LO(B ?,L@(B ?,LA(B ?,LB(B ?,LC(B ?,L6(B ?,L2(B ?,LL(B ?,LK(B ?,L7(B ?,LH(B ?,LM(B ?,LI(B ?,LG(B ?,LJ(B 106 ;; All Unicode: 107;; ?$,2 (B ?$,2 "(B ?$,2 ,(B ?$,2 0(B ?$,2 4(B ?$,2 8(B ?$,2 <(B ?$,2 D(B ?$,2 L(B ?$,2 T(B ?$,2 \(B ?$,2!@(B ?$,2!D(B ?$,2!H(B ?$,2!L(B ?$,2!P(B 108;; ?$,2!Q(B ?$,2!R(B ?$,2!S(B ?$,1{ (B ?$,2!`(B ?$,1s"(B ?$,1x:(B ?$,1xh(B ?$,1y$(B ?$,1y%(B ?,A (B ?$,1{!(B ?,A0(B ?,A2(B ?,A7(B ?,Aw(B 109;; ?$,2 p(B ?$,2 q(B ?$,2 r(B ?$,1(q(B ?$,2 s(B ?$,2 t(B ?$,2 u(B ?$,2 v(B ?$,2 w(B ?$,2 x(B ?$,2 y(B ?$,2 z(B ?$,2 {(B ?$,2 |(B ?$,2 }(B ?$,2 ~(B 110;; ?$,2 (B ?$,2! (B ?$,2!!(B ?$,1(!(B ?$,2!"(B ?$,2!#(B ?$,2!$(B ?$,2!%(B ?$,2!&(B ?$,2!'(B ?$,2!((B ?$,2!)(B ?$,2!*(B ?$,2!+(B ?$,2!,(B ?,A)(B 111;; ?$,1(n(B ?$,1(P(B ?$,1(Q(B ?$,1(f(B ?$,1(T(B ?$,1(U(B ?$,1(d(B ?$,1(S(B ?$,1(e(B ?$,1(X(B ?$,1(Y(B ?$,1(Z(B ?$,1([(B ?$,1(\(B ?$,1(](B ?$,1(^(B 112;; ?$,1(_(B ?$,1(o(B ?$,1(`(B ?$,1(a(B ?$,1(b(B ?$,1(c(B ?$,1(V(B ?$,1(R(B ?$,1(l(B ?$,1(k(B ?$,1(W(B ?$,1(h(B ?$,1(m(B ?$,1(i(B ?$,1(g(B ?$,1(j(B 113;; ?$,1(N(B ?$,1(0(B ?$,1(1(B ?$,1(F(B ?$,1(4(B ?$,1(5(B ?$,1(D(B ?$,1(3(B ?$,1(E(B ?$,1(8(B ?$,1(9(B ?$,1(:(B ?$,1(;(B ?$,1(<(B ?$,1(=(B ?$,1(>(B 114;; ?$,1(?(B ?$,1(O(B ?$,1(@(B ?$,1(A(B ?$,1(B(B ?$,1(C(B ?$,1(6(B ?$,1(2(B ?$,1(L(B ?$,1(K(B ?$,1(7(B ?$,1(H(B ?$,1(M(B ?$,1(I(B ?$,1(G(B ?$,1(J(B 115 ] 116 "Cyrillic KOI8-R decoding table.") 117 118(let ((table (make-translation-table-from-vector 119 cyrillic-koi8-r-decode-table))) 120 (define-translation-table 'cyrillic-koi8-r-nonascii-translation-table table) 121 (define-translation-table 'cyrillic-koi8-r-encode-table 122 (char-table-extra-slot table 0))) 123 124;; No point in keeping it around. (It can't be let-bound, since it's 125;; needed for macro expansion.) 126(makunbound 'cyrillic-koi8-r-decode-table) 127 128(define-ccl-program ccl-decode-koi8 129 `(4 130 ((loop 131 (r0 = 0) 132 (read r1) 133 (if (r1 < 128) 134 (write-repeat r1) 135 ((translate-character cyrillic-koi8-r-nonascii-translation-table r0 r1) 136 (translate-character ucs-translation-table-for-decode r0 r1) 137 (write-multibyte-character r0 r1) 138 (repeat)))))) 139 "CCL program to decode KOI8-R.") 140 141(define-ccl-program ccl-encode-koi8 142 `(1 143 ((loop 144 (read-multibyte-character r0 r1) 145 (translate-character cyrillic-koi8-r-encode-table r0 r1) 146 (if (r0 != ,(charset-id 'ascii)) 147 (if (r0 != ,(charset-id 'eight-bit-graphic)) 148 (if (r0 != ,(charset-id 'eight-bit-control)) 149 (r1 = ??)))) 150 (write-repeat r1)))) 151 "CCL program to encode KOI8-R.") 152 153(defun cyrillic-unify-encoding (table) 154 "Set up equivalent characters in the encoding TABLE. 155This works whether or not the table is Unicode-based or 1568859-5-based. (Only appropriate for Cyrillic.)" 157 (let ((table (get table 'translation-table))) 158 (dotimes (i 96) 159 (let* ((c (make-char 'cyrillic-iso8859-5 (+ i 32))) 160 (u ; equivalent Unicode char 161 (cond ((eq c ?,L (B) ?,A (B) 162 ((eq c ?,L-(B) ?,A-(B) 163 ((eq c ?,L}(B) ?,A'(B) 164 (t (decode-char 'ucs (+ #x400 i))))) 165 (ec (aref table c)) ; encoding of 8859-5 166 (uc (aref table u))) ; encoding of Unicode 167 (unless (memq c '(?,L (B ?,L-(B ?,L}(B)) ; 8859-5 exceptions 168 (unless uc 169 (aset table u ec)) 170 (unless ec 171 (aset table c uc))))))) 172 173(cyrillic-unify-encoding 'cyrillic-koi8-r-encode-table) 174 175(make-coding-system 176 'cyrillic-koi8 4 177 ;; We used to use ?K. It is true that ?K is more strictly correct, 178 ;; but it is also used for Korean. 179 ;; So people who use koi8 for languages other than Russian 180 ;; will have to forgive us. 181 ?R "KOI8-R 8-bit encoding for Cyrillic (MIME: KOI8-R)." 182 '(ccl-decode-koi8 . ccl-encode-koi8) 183 `((safe-chars . cyrillic-koi8-r-encode-table) 184 (mime-charset . koi8-r) 185 (valid-codes (0 . 255)) 186 (dependency unify-8859-on-encoding-mode unify-8859-on-decoding-mode))) 187 188(define-coding-system-alias 'koi8-r 'cyrillic-koi8) 189(define-coding-system-alias 'koi8 'cyrillic-koi8) 190(define-coding-system-alias 'cp878 'cyrillic-koi8) 191 192(let ((elt `("koi8-r" koi8-r 1 193 ,(get 'cyrillic-koi8-r-encode-table 'translation-table))) 194 (slot (assoc "koi8-r" ctext-non-standard-encodings-alist))) 195 (if slot 196 (setcdr slot (cdr elt)) 197 (push elt ctext-non-standard-encodings-alist))) 198 199;; Allow displaying some of KOI & al with an 8859-5-encoded font. We 200;; won't bother about the exceptions when encoding the font, since 201;; NBSP will fall through below and work anyhow, and we'll have 202;; avoided setting the fontset for the other two to 8859-5 -- they're 203;; not in KOI and Alternativnyj anyhow. 204(define-ccl-program ccl-encode-8859-5-font 205 `(0 206 ((if (r0 == ,(charset-id 'cyrillic-iso8859-5)) 207 (r1 += 128) 208 (if (r0 == ,(charset-id 'mule-unicode-0100-24ff)) 209 (r1 = (r2 + 128)))))) 210 "Encode ISO 8859-5 and Cyrillic Unicode chars to 8859-5 font.") 211 212(add-to-list 'font-ccl-encoder-alist '("iso8859-5" . ccl-encode-8859-5-font)) 213 214;; The table is set up later to encode both Unicode and 8859-5. 215(define-ccl-program ccl-encode-koi8-font 216 `(0 217 (if (r2 >= 0) 218 ((r1 <<= 7) 219 (r1 += r2))) 220 (translate-character cyrillic-koi8-r-encode-table r0 r1)) 221 "CCL program to encode Cyrillic chars to KOI font.") 222 223(add-to-list 'font-ccl-encoder-alist '("koi8" . ccl-encode-koi8-font)) 224 225(set-language-info-alist 226 "Cyrillic-KOI8" `((charset cyrillic-iso8859-5) 227 (nonascii-translation 228 . ,(get 'cyrillic-koi8-r-nonascii-translation-table 229 'translation-table)) 230 (coding-system cyrillic-koi8) 231 (coding-priority cyrillic-koi8 cyrillic-iso-8bit) 232 (ctext-non-standard-encodings "koi8-r") 233 (input-method . "russian-typewriter") 234 (features cyril-util) 235 (unibyte-display . cyrillic-koi8) 236 (sample-text . "Russian (,L@caaZXY(B) ,L7T`PRabRcYbU(B!") 237 (documentation . "Support for Cyrillic KOI8-R.")) 238 '("Cyrillic")) 239 240(set-language-info-alist 241 "Russian" `((charset cyrillic-iso8859-5) 242 (nonascii-translation 243 . ,(get 'cyrillic-koi8-r-nonascii-translation-table 244 'translation-table)) 245 (coding-system cyrillic-koi8) 246 (coding-priority cyrillic-koi8 cyrillic-iso-8bit) 247 (input-method . "russian-computer") 248 (features cyril-util) 249 (unibyte-display . cyrillic-koi8) 250 (sample-text . "Russian (,L@caaZXY(B) ,L7T`PRabRcYbU(B!") 251 (documentation . "\ 252Support for Russian using koi8-r and the russian-computer input method.") 253 (tutorial . "TUTORIAL.ru")) 254 '("Cyrillic")) 255 256 257(defvar cyrillic-koi8-u-decode-table 258 [ 259 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 260 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 261 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 262 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 263 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 264 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 265 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 266 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 267 ;; All Unicode: 268;; ?$,2 (B ?$,2 "(B ?$,2 ,(B ?$,2 0(B ?$,2 4(B ?$,2 8(B ?$,2 <(B ?$,2 D(B ?$,2 L(B ?$,2 T(B ?$,2 \(B ?$,2!@(B ?$,2!D(B ?$,2!H(B ?$,2!L(B ?$,2!P(B 269;; ?$,2!Q(B ?$,2!R(B ?$,2!S(B ?$,1{ (B ?$,2!`(B ?$,1x9(B ?$,1x:(B ?$,1xh(B ?$,1y$(B ?$,1y%(B ?,L (B ?$,1{!(B ?,A0(B ?,A2(B ?,A7(B ?,Aw(B 270;; ?$,2 p(B ?$,2 q(B ?$,2 r(B ?$,1(q(B ?$,1(t(B ?$,2 t(B ?$,1(v(B ?$,1(w(B ?$,2 w(B ?$,2 x(B ?$,2 y(B ?$,2 z(B ?$,2 {(B ?$,1)Q(B ?$,2 }(B ?$,2 ~(B 271;; ?$,2 (B ?$,2! (B ?$,2!!(B ?$,1(!(B ?$,1($(B ?$,2!#(B ?$,1(&(B ?$,1('(B ?$,2!&(B ?$,2!'(B ?$,2!((B ?$,2!)(B ?$,2!*(B ?$,1)P(B ?$,2!,(B ?,A)(B 272;; ?$,1(n(B ?$,1(P(B ?$,1(Q(B ?$,1(f(B ?$,1(T(B ?$,1(U(B ?$,1(d(B ?$,1(S(B ?$,1(e(B ?$,1(X(B ?$,1(Y(B ?$,1(Z(B ?$,1([(B ?$,1(\(B ?$,1(](B ?$,1(^(B 273;; ?$,1(_(B ?$,1(o(B ?$,1(`(B ?$,1(a(B ?$,1(b(B ?$,1(c(B ?$,1(V(B ?$,1(R(B ?$,1(l(B ?$,1(k(B ?$,1(W(B ?$,1(h(B ?$,1(m(B ?$,1(i(B ?$,1(g(B ?$,1(j(B 274;; ?$,1(N(B ?$,1(0(B ?$,1(1(B ?$,1(F(B ?$,1(4(B ?$,1(5(B ?$,1(D(B ?$,1(3(B ?$,1(E(B ?$,1(8(B ?$,1(9(B ?$,1(:(B ?$,1(;(B ?$,1(<(B ?$,1(=(B ?$,1(>(B 275;; ?$,1(?(B ?$,1(O(B ?$,1(@(B ?$,1(A(B ?$,1(B(B ?$,1(C(B ?$,1(6(B ?$,1(2(B ?$,1(L(B ?$,1(K(B ?$,1(7(B ?$,1(H(B ?$,1(M(B ?$,1(I(B ?$,1(G(B ?$,1(J(B 276;; 8859-5 plus Unicode: 277 ?$,2 (B ?$,2 "(B ?$,2 ,(B ?$,2 0(B ?$,2 4(B ?$,2 8(B ?$,2 <(B ?$,2 D(B ?$,2 L(B ?$,2 T(B ?$,2 \(B ?$,2!@(B ?$,2!D(B ?$,2!H(B ?$,2!L(B ?$,2!P(B 278 ?$,2!Q(B ?$,2!R(B ?$,2!S(B ?$,1{ (B ?$,2!`(B ?$,1x9(B ?$,1x:(B ?$,1xh(B ?$,1y$(B ?$,1y%(B ?,L (B ?$,1{!(B ?,A0(B ?,A2(B ?,A7(B ?,Aw(B 279 ?$,2 p(B ?$,2 q(B ?$,2 r(B ?,Lq(B ?,Lt(B ?$,2 t(B ?,Lv(B ?,Lw(B ?$,2 w(B ?$,2 x(B ?$,2 y(B ?$,2 z(B ?$,2 {(B ?$,1)Q(B ?$,2 }(B ?$,2 ~(B 280 ?$,2 (B ?$,2! (B ?$,2!!(B ?,L!(B ?,L$(B ?$,2!#(B ?,L&(B ?,L'(B ?$,2!&(B ?$,2!'(B ?$,2!((B ?$,2!)(B ?$,2!*(B ?$,1)P(B ?$,2!,(B ?,A)(B 281 ?,Ln(B ?,LP(B ?,LQ(B ?,Lf(B ?,LT(B ?,LU(B ?,Ld(B ?,LS(B ?,Le(B ?,LX(B ?,LY(B ?,LZ(B ?,L[(B ?,L\(B ?,L](B ?,L^(B 282 ?,L_(B ?,Lo(B ?,L`(B ?,La(B ?,Lb(B ?,Lc(B ?,LV(B ?,LR(B ?,Ll(B ?,Lk(B ?,LW(B ?,Lh(B ?,Lm(B ?,Li(B ?,Lg(B ?,Lj(B 283 ?,LN(B ?,L0(B ?,L1(B ?,LF(B ?,L4(B ?,L5(B ?,LD(B ?,L3(B ?,LE(B ?,L8(B ?,L9(B ?,L:(B ?,L;(B ?,L<(B ?,L=(B ?,L>(B 284 ?,L?(B ?,LO(B ?,L@(B ?,LA(B ?,LB(B ?,LC(B ?,L6(B ?,L2(B ?,LL(B ?,LK(B ?,L7(B ?,LH(B ?,LM(B ?,LI(B ?,LG(B ?,LJ(B 285 ] 286 "Cyrillic KOI8-U decoding table.") 287 288(let ((table (make-translation-table-from-vector 289 cyrillic-koi8-u-decode-table))) 290 (define-translation-table 'cyrillic-koi8-u-nonascii-translation-table table) 291 (define-translation-table 'cyrillic-koi8-u-encode-table 292 (char-table-extra-slot table 0))) 293 294(makunbound 'cyrillic-koi8-u-decode-table) 295 296(define-ccl-program ccl-decode-koi8-u 297 `(4 298 ((loop 299 (r0 = 0) 300 (read r1) 301 (if (r1 < 128) 302 (write-repeat r1) 303 ((translate-character cyrillic-koi8-u-nonascii-translation-table r0 r1) 304 (translate-character ucs-translation-table-for-decode r0 r1) 305 (write-multibyte-character r0 r1) 306 (repeat)))))) 307 "CCL program to decode KOI8-U.") 308 309(define-ccl-program ccl-encode-koi8-u 310 `(1 311 ((loop 312 (read-multibyte-character r0 r1) 313 (translate-character cyrillic-koi8-u-encode-table r0 r1) 314 (if (r0 != ,(charset-id 'ascii)) 315 (if (r0 != ,(charset-id 'eight-bit-graphic)) 316 (if (r0 != ,(charset-id 'eight-bit-control)) 317 (r1 = ??)))) 318 (write-repeat r1)))) 319 "CCL program to encode KOI8-U.") 320 321(cyrillic-unify-encoding 'cyrillic-koi8-u-encode-table) 322 323(make-coding-system 324 'koi8-u 4 325 ?U "KOI8-U 8-bit encoding for Cyrillic (MIME: KOI8-U)" 326 '(ccl-decode-koi8-u . ccl-encode-koi8-u) 327 `((safe-chars . cyrillic-koi8-u-encode-table) 328 (mime-charset . koi8-u) 329 (valid-codes (0 . 255)) 330 (dependency unify-8859-on-encoding-mode unify-8859-on-decoding-mode))) 331 332(define-ccl-program ccl-encode-koi8-u-font 333 `(0 334 (translate-character cyrillic-koi8-u-encode-table r0 r1)) 335 "CCL program to encode Cyrillic chars to KOI-U font.") 336 337(add-to-list 'font-ccl-encoder-alist '("koi8-u" . ccl-encode-koi8-u-font)) 338 339(set-language-info-alist 340 "Ukrainian" `((coding-system koi8-u) 341 (coding-priority koi8-u) 342 (nonascii-translation 343 . ,(get 'cyrillic-koi8-u-nonascii-translation-table 344 'translation-table)) 345 (input-method . "ukrainian-computer") 346 (documentation 347 . "Support for Ukrainian with KOI8-U character set.")) 348 '("Cyrillic")) 349 350;;; ALTERNATIVNYJ stuff 351 352;; Fixme: It's unclear what's the correct table. I've found 353;; statements both that it's the same as cp866 and somewhat different, 354;; but nothing that looks really definitive. 355(defvar cyrillic-alternativnyj-decode-table 356 [ 357 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 358 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 359 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 360 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 361 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 362 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 363 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 364 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 365;; ?$,1(0(B ?$,1(1(B ?$,1(2(B ?$,1(3(B ?$,1(4(B ?$,1(5(B ?$,1(6(B ?$,1(7(B ?$,1(8(B ?$,1(9(B ?$,1(:(B ?$,1(;(B ?$,1(<(B ?$,1(=(B ?$,1(>(B ?$,1(?(B 366;; ?$,1(@(B ?$,1(A(B ?$,1(B(B ?$,1(C(B ?$,1(D(B ?$,1(E(B ?$,1(F(B ?$,1(G(B ?$,1(H(B ?$,1(I(B ?$,1(J(B ?$,1(K(B ?$,1(L(B ?$,1(M(B ?$,1(N(B ?$,1(O(B 367;; ?$,1(P(B ?$,1(Q(B ?$,1(R(B ?$,1(S(B ?$,1(T(B ?$,1(U(B ?$,1(V(B ?$,1(W(B ?$,1(X(B ?$,1(Y(B ?$,1(Z(B ?$,1([(B ?$,1(\(B ?$,1(](B ?$,1(^(B ?$,1(_(B 368;; ?$,2!Q(B ?$,2!R(B ?$,2!S(B ?$,2 "(B ?$,2 D(B ?$,2!!(B ?$,2!"(B ?$,2 v(B ?$,2 u(B ?$,2!#(B ?$,2 q(B ?$,2 w(B ?$,2 }(B ?$,2 |(B ?$,2 {(B ?$,2 0(B 369;; ?$,2 4(B ?$,2 T(B ?$,2 L(B ?$,2 <(B ?$,2 (B ?$,2 \(B ?$,2 ~(B ?$,2 (B ?$,2 z(B ?$,2 t(B ?$,2!)(B ?$,2!&(B ?$,2! (B ?$,2 p(B ?$,2!,(B ?$,2!'(B 370;; ?$,2!((B ?$,2!$(B ?$,2!%(B ?$,2 y(B ?$,2 x(B ?$,2 r(B ?$,2 s(B ?$,2!+(B ?$,2!*(B ?$,2 8(B ?$,2 ,(B ?$,2!H(B ?$,2!D(B ?$,2!L(B ?$,2!P(B ?$,2!@(B 371;; ?$,1(`(B ?$,1(a(B ?$,1(b(B ?$,1(c(B ?$,1(d(B ?$,1(e(B ?$,1(f(B ?$,1(g(B ?$,1(h(B ?$,1(i(B ?$,1(j(B ?$,1(k(B ?$,1(l(B ?$,1(m(B ?$,1(n(B ?$,1(o(B 372;; ?$,1(!(B ?$,1(q(B ?$,1ry(B ?$,1rx(B ?$,1%A(B ?$,1%@(B ?$,1s:(B ?$,1s9(B ?$,1vq(B ?$,1vs(B ?,A1(B ?,Aw(B ?$,1uV(B ?,A$(B ?$,2!`(B ?,A (B ; 373;; 8859+Unicode 374 ?,L0(B ?,L1(B ?,L2(B ?,L3(B ?,L4(B ?,L5(B ?,L6(B ?,L7(B ?,L8(B ?,L9(B ?,L:(B ?,L;(B ?,L<(B ?,L=(B ?,L>(B ?,L?(B 375 ?,L@(B ?,LA(B ?,LB(B ?,LC(B ?,LD(B ?,LE(B ?,LF(B ?,LG(B ?,LH(B ?,LI(B ?,LJ(B ?,LK(B ?,LL(B ?,LM(B ?,LN(B ?,LO(B 376 ?,LP(B ?,LQ(B ?,LR(B ?,LS(B ?,LT(B ?,LU(B ?,LV(B ?,LW(B ?,LX(B ?,LY(B ?,LZ(B ?,L[(B ?,L\(B ?,L](B ?,L^(B ?,L_(B 377 ?$,2!Q(B ?$,2!R(B ?$,2!S(B ?$,2 "(B ?$,2 D(B ?$,2!!(B ?$,2!"(B ?$,2 v(B ?$,2 u(B ?$,2!#(B ?$,2 q(B ?$,2 w(B ?$,2 }(B ?$,2 |(B ?$,2 {(B ?$,2 0(B 378 ?$,2 4(B ?$,2 T(B ?$,2 L(B ?$,2 <(B ?$,2 (B ?$,2 \(B ?$,2 ~(B ?$,2 (B ?$,2 z(B ?$,2 t(B ?$,2!)(B ?$,2!&(B ?$,2! (B ?$,2 p(B ?$,2!,(B ?$,2!'(B 379 ?$,2!((B ?$,2!$(B ?$,2!%(B ?$,2 y(B ?$,2 x(B ?$,2 r(B ?$,2 s(B ?$,2!+(B ?$,2!*(B ?$,2 8(B ?$,2 ,(B ?$,2!H(B ?$,2!D(B ?$,2!L(B ?$,2!P(B ?$,2!@(B 380 ?,L`(B ?,La(B ?,Lb(B ?,Lc(B ?,Ld(B ?,Le(B ?,Lf(B ?,Lg(B ?,Lh(B ?,Li(B ?,Lj(B ?,Lk(B ?,Ll(B ?,Lm(B ?,Ln(B ?,Lo(B 381 ;; Taken from http://www.cyrillic.com/ref/cyrillic/koi-8alt.html 382 ;; with guesses for the Unicodes of the glyphs in the absence of a 383 ;; table. 384 ?,L!(B ?,Lq(B ?$,1ry(B ?$,1rx(B ?$,1%A(B ?$,1%@(B ?$,1s:(B ?$,1s9(B ?$,1vq(B ?$,1vs(B ?,A1(B ?,Aw(B ?,Lp(B ?,A$(B ?$,2!`(B ?,L (B] 385 "Cyrillic ALTERNATIVNYJ decoding table.") 386 387(let ((table (make-translation-table-from-vector 388 cyrillic-alternativnyj-decode-table))) 389 (define-translation-table 'cyrillic-alternativnyj-nonascii-translation-table 390 table) 391 (define-translation-table 'cyrillic-alternativnyj-encode-table 392 (char-table-extra-slot table 0))) 393 394(makunbound 'cyrillic-alternativnyj-decode-table) 395 396(define-ccl-program ccl-decode-alternativnyj 397 `(4 398 ((loop 399 (r0 = 0) 400 (read r1) 401 (if (r1 < 128) 402 (write-repeat r1) 403 ((translate-character cyrillic-alternativnyj-nonascii-translation-table 404 r0 r1) 405 (translate-character ucs-translation-table-for-decode r0 r1) 406 (write-multibyte-character r0 r1) 407 (repeat)))))) 408 "CCL program to decode Alternativnyj.") 409 410(define-ccl-program ccl-encode-alternativnyj 411 `(1 412 ((loop 413 (read-multibyte-character r0 r1) 414 (translate-character cyrillic-alternativnyj-encode-table r0 r1) 415 (if (r0 != ,(charset-id 'ascii)) 416 (if (r0 != ,(charset-id 'eight-bit-graphic)) 417 (if (r0 != ,(charset-id 'eight-bit-control)) 418 (r1 = ??)))) 419 (write-repeat r1)))) 420 "CCL program to encode Alternativnyj.") 421 422(cyrillic-unify-encoding 'cyrillic-alternativnyj-encode-table) 423 424(make-coding-system 425 'cyrillic-alternativnyj 4 ?A 426 "ALTERNATIVNYJ 8-bit encoding for Cyrillic." 427 '(ccl-decode-alternativnyj . ccl-encode-alternativnyj) 428 `((safe-chars . cyrillic-alternativnyj-encode-table) 429 (valid-codes (0 . 255)) 430 (dependency unify-8859-on-encoding-mode unify-8859-on-decoding-mode))) 431 432(define-coding-system-alias 'alternativnyj 'cyrillic-alternativnyj) 433 434(define-ccl-program ccl-encode-alternativnyj-font 435 `(0 436 (translate-character cyrillic-alternativnyj-encode-table r0 r1)) 437 "CCL program to encode Cyrillic chars to Alternativnyj font.") 438 439(add-to-list 'font-ccl-encoder-alist 440 '("alternativnyj" . ccl-encode-alternativnyj-font)) 441 442(set-language-info-alist 443 "Cyrillic-ALT" `((charset cyrillic-iso8859-5) 444 (nonascii-translation 445 . ,(get 'cyrillic-alternativnyj-nonascii-translation-table 446 'translation-table)) 447 (coding-system cyrillic-alternativnyj) 448 (coding-priority cyrillic-alternativnyj) 449 (input-method . "russian-typewriter") 450 (features cyril-util) 451 (unibyte-display . cyrillic-alternativnyj) 452 (sample-text . "Russian (,L@caaZXY(B) ,L7T`PRabRcYbU(B!") 453 (documentation . "Support for Cyrillic ALTERNATIVNYJ.")) 454 '("Cyrillic")) 455 456(set-language-info-alist 457 "Tajik" `((coding-system cyrillic-koi8-t) 458 (coding-priority cyrillic-koi8-t) 459 (nonascii-translation 460 . ,(get 'decode-koi8-t 'translation-table)) 461 (input-method . "russian-typewriter") ; fixme? 462 (features code-pages) 463 (documentation . "Support for Tajik using KOI8-T.")) 464 '("Cyrillic")) 465 466(eval-and-compile 467 (setq 468 non-iso-charset-alist 469 (cp-make-coding-system 470 windows-1251 471 [?\$,1("(B ?\$,1(#(B ?\$,1rz(B ?\$,1(s(B ?\$,1r~(B ?\$,1s&(B ?\$,1s (B ?\$,1s!(B ?\$,1tL(B ?\$,1s0(B ?\$,1()(B ?\$,1s9(B ?\$,1(*(B ?\$,1(,(B ?\$,1(+(B ?\$,1(/(B ?\$,1(r(B 472 ?\$,1rx(B ?\$,1ry(B ?\$,1r|(B ?\$,1r}(B ?\$,1s"(B ?\$,1rs(B ?\$,1rt(B nil ?\$,1ub(B ?\$,1(y(B ?\$,1s:(B ?\$,1(z(B ?\$,1(|(B ?\$,1({(B ?\$,1((B ?\,A (B ?\$,1(.(B 473 ?\$,1(~(B ?\$,1(((B ?\,A$(B ?\$,1)P(B ?\,A&(B ?\,A'(B ?\$,1(!(B ?\,A)(B ?\$,1($(B ?\,A+(B ?\,A,(B ?\,A-(B ?\,A.(B ?\$,1('(B ?\,A0(B ?\,A1(B ?\$,1(&(B 474 ?\$,1(v(B ?\$,1)Q(B ?\,A5(B ?\,A6(B ?\,A7(B ?\$,1(q(B ?\$,1uV(B ?\$,1(t(B ?\,A;(B ?\$,1(x(B ?\$,1(%(B ?\$,1(u(B ?\$,1(w(B ?\$,1(0(B ?\$,1(1(B ?\$,1(2(B ?\$,1(3(B 475 ?\$,1(4(B ?\$,1(5(B ?\$,1(6(B ?\$,1(7(B ?\$,1(8(B ?\$,1(9(B ?\$,1(:(B ?\$,1(;(B ?\$,1(<(B ?\$,1(=(B ?\$,1(>(B ?\$,1(?(B ?\$,1(@(B ?\$,1(A(B ?\$,1(B(B ?\$,1(C(B ?\$,1(D(B 476 ?\$,1(E(B ?\$,1(F(B ?\$,1(G(B ?\$,1(H(B ?\$,1(I(B ?\$,1(J(B ?\$,1(K(B ?\$,1(L(B ?\$,1(M(B ?\$,1(N(B ?\$,1(O(B ?\$,1(P(B ?\$,1(Q(B ?\$,1(R(B ?\$,1(S(B ?\$,1(T(B ?\$,1(U(B 477 ?\$,1(V(B ?\$,1(W(B ?\$,1(X(B ?\$,1(Y(B ?\$,1(Z(B ?\$,1([(B ?\$,1(\(B ?\$,1(](B ?\$,1(^(B ?\$,1(_(B ?\$,1(`(B ?\$,1(a(B ?\$,1(b(B ?\$,1(c(B ?\$,1(d(B ?\$,1(e(B ?\$,1(f(B 478 ?\$,1(g(B ?\$,1(h(B ?\$,1(i(B ?\$,1(j(B ?\$,1(k(B ?\$,1(l(B ?\$,1(m(B ?\$,1(n(B ?\$,1(o(B] nil ?b))) 479 480;; Register cyrillic-iso8859-5 characters in the encode table of 481;; windows-1251. 482(let ((table (get 'encode-windows-1251 'translation-table)) 483 ;; Nth element is a cyrillic-iso8859-5 character encoded to a 484 ;; code (128 + N), or nil. 485 (vec [?\,L"(B ?\,L#(B nil ?\,Ls(B nil nil nil nil nil nil ?\,L)(B nil ?\,L*(B ?\,L,(B ?\,L+(B ?\,L/(B 486 ?\,Lr(B nil nil nil nil nil nil nil nil nil ?\,Ly(B nil ?\,Lz(B ?\,L|(B ?\,L{(B ?\,L(B 487 nil ?\,L.(B ?\,L~(B ?\,L((B nil nil nil nil ?\,L!(B nil ?\,L$(B nil nil nil nil ?\,L'(B 488 nil nil ?\,L&(B ?\,Lv(B nil nil nil nil ?\,Lq(B ?\,Lp(B ?\,Lt(B nil ?\,Lx(B ?\,L%(B ?\,Lu(B ?\,Lw(B 489 ?\,L0(B ?\,L1(B ?\,L2(B ?\,L3(B ?\,L4(B ?\,L5(B ?\,L6(B ?\,L7(B ?\,L8(B ?\,L9(B ?\,L:(B ?\,L;(B ?\,L<(B ?\,L=(B ?\,L>(B ?\,L?(B 490 ?\,L@(B ?\,LA(B ?\,LB(B ?\,LC(B ?\,LD(B ?\,LE(B ?\,LF(B ?\,LG(B ?\,LH(B ?\,LI(B ?\,LJ(B ?\,LK(B ?\,LL(B ?\,LM(B ?\,LN(B ?\,LO(B 491 ?\,LP(B ?\,LQ(B ?\,LR(B ?\,LS(B ?\,LT(B ?\,LU(B ?\,LV(B ?\,LW(B ?\,LX(B ?\,LY(B ?\,LZ(B ?\,L[(B ?\,L\(B ?\,L](B ?\,L^(B ?\,L_(B 492 ?\,L`(B ?\,La(B ?\,Lb(B ?\,Lc(B ?\,Ld(B ?\,Le(B ?\,Lf(B ?\,Lg(B ?\,Lh(B ?\,Li(B ?\,Lj(B ?\,Lk(B ?\,Ll(B ?\,Lm(B ?\,Ln(B ?\,Lo(B])) 493 (dotimes (i (length vec)) 494 (if (aref vec i) 495 (aset table (aref vec i) (+ 128 i))))) 496 497(define-coding-system-alias 'cp1251 'windows-1251) 498 499(let ((elt `("microsoft-cp1251" windows-1251 1 500 ,(get 'encode-windows-1251 'translation-table))) 501 (slot (assoc "microsoft-cp1251" ctext-non-standard-encodings-alist))) 502 (if slot 503 (setcdr slot (cdr elt)) 504 (push elt ctext-non-standard-encodings-alist))) 505 506(define-ccl-program ccl-encode-windows-1251-font 507 `(0 508 ((if (r0 == ,(charset-id 'mule-unicode-0100-24ff)) 509 ((r1 <<= 7) 510 (r1 += r2))) 511 (translate-character encode-windows-1251 r0 r1)))) 512 513(add-to-list 'font-ccl-encoder-alist 514 '("microsoft-cp1251" . ccl-encode-windows-1251-font)) 515 516(set-language-info-alist 517 "Bulgarian" `((coding-system windows-1251) 518 (coding-priority windows-1251) 519 (ctext-non-standard-encodings "microsoft-cp1251") 520 (overriding-fontspec 521 (,(get 'encode-windows-1251 'translation-table) 522 . (nil . "microsoft-cp1251")) 523 (,(get 'cyrillic-koi8-r-encode-table 'translation-table) 524 . (nil . "koi8-r"))) 525 (nonascii-translation 526 . ,(get 'decode-windows-1251 'translation-table)) 527 (input-method . "bulgarian-bds") 528 (documentation 529 . "Support for Bulgarian with windows-1251 character set.") 530 (tutorial . "TUTORIAL.bg")) 531 '("Cyrillic")) 532 533(set-language-info-alist 534 "Belarusian" `((coding-system windows-1251) 535 (coding-priority windows-1251) 536 (ctext-non-standard-encodings "microsoft-cp1251") 537 (overriding-fontspec 538 (,(get 'encode-windows-1251 'translation-table) 539 . (nil . "microsoft-cp1251")) 540 (,(get 'cyrillic-koi8-r-encode-table 'translation-table) 541 . (nil . "koi8-r"))) 542 (nonascii-translation 543 . ,(get 'decode-windows-1251 'translation-table)) 544 (input-method . "belarusian") 545 (documentation 546 . "Support for Belarusian with windows-1251 character set. 547\(The name Belarusian replaced Byelorussian in the early 1990s.)")) 548 '("Cyrillic")) 549 550(provide 'cyrillic) 551 552;;; arch-tag: bda71ae0-ba41-4cb6-a6e0-1dff542313d3 553;;; cyrillic.el ends here 554