1;;; cyrillic.el --- support for Cyrillic -*- coding: iso-2022-7bit; -*-
2
3;; Copyright (C) 1997, 1998, 2001, 2002, 2003, 2004, 2005, 2006, 2007
4;;   Free Software Foundation, Inc.
5;; Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
6;;   2005, 2006, 2007
7;;   National Institute of Advanced Industrial Science and Technology (AIST)
8;;   Registration Number H14PRO021
9
10;; Author: Kenichi Handa <handa@etl.go.jp>
11;; Keywords: multilingual, Cyrillic, i18n
12
13;; This file is part of GNU Emacs.
14
15;; GNU Emacs is free software; you can redistribute it and/or modify
16;; it under the terms of the GNU General Public License as published by
17;; the Free Software Foundation; either version 2, or (at your option)
18;; any later version.
19
20;; GNU Emacs is distributed in the hope that it will be useful,
21;; but WITHOUT ANY WARRANTY; without even the implied warranty of
22;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
23;; GNU General Public License for more details.
24
25;; You should have received a copy of the GNU General Public License
26;; along with GNU Emacs; see the file COPYING.  If not, write to the
27;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
28;; Boston, MA 02110-1301, USA.
29
30;;; Commentary:
31
32;; The character set ISO8859-5 is supported.  KOI-8 and ALTERNATIVNYJ
33;; are converted to Unicode internally.  See
34;; <URL:http://www.ecma.ch/ecma1/STAND/ECMA-113.HTM>.  For more info
35;; on Cyrillic charsets, see
36;; <URL:http://czyborra.com/charsets/cyrillic.html>.  The KOI and
37;; Alternativnyj coding systems should live in code-pages.el, but
38;; they've always been preloaded and the coding system autoload
39;; mechanism didn't get accepted, so they have to stay here and
40;; duplicate code-pages stuff.
41
42;; Note that 8859-5 maps directly onto the Unicode Cyrillic block,
43;; apart from codepoints 160 (NBSP, c.f. U+0400), 173 (soft hyphen,
44;; c.f. U+04OD) and 253 (section sign, c.f U+045D).  The KOI-8 and
45;; Alternativnyj coding systems encode both 8859-5 and Unicode.
46;; ucs-tables.el provides unification for cyrillic-iso-8bit.
47
48;; Customizing `utf-fragment-on-decoding' allows decoding characters
49;; from KOI and Alternativnyj into 8859-5 where that's possible.
50;; cyrillic-iso8859-5 characters take half as much space in the buffer
51;; as the mule-unicode-0100-24ff equivalents, though that's probably
52;; not normally a big deal.
53
54;;; Code:
55
56;; Cyrillic (general)
57
58;; ISO-8859-5 stuff
59
60(make-coding-system
61 'cyrillic-iso-8bit 2 ?5
62 "ISO 2022 based 8-bit encoding for Cyrillic script (MIME:ISO-8859-5)."
63 '(ascii cyrillic-iso8859-5  nil nil
64   nil nil nil nil nil nil nil nil nil nil nil t)
65 '((safe-charsets ascii cyrillic-iso8859-5)
66   (mime-charset . iso-8859-5)))
67
68(define-coding-system-alias 'iso-8859-5 'cyrillic-iso-8bit)
69
70(set-language-info-alist
71 "Cyrillic-ISO" '((charset cyrillic-iso8859-5)
72		  (coding-system cyrillic-iso-8bit)
73		  (coding-priority cyrillic-iso-8bit)
74		  (input-method . "cyrillic-yawerty") ; fixme
75		  (nonascii-translation . cyrillic-iso8859-5)
76		  (unibyte-display . cyrillic-iso-8bit)
77		  (features cyril-util)
78		  (sample-text . "Russian (,L@caaZXY(B)	,L7T`PRabRcYbU(B!")
79		  (documentation . "Support for Cyrillic ISO-8859-5."))
80 '("Cyrillic"))
81
82;; KOI-8R stuff
83
84;; The mule-unicode portion of this is from
85;; http://www.unicode.org/Public/MAPPINGS/VENDORS/MISC/KOI8-R.TXT,
86;; which references RFC 1489.
87(defvar cyrillic-koi8-r-decode-table
88  [
89   0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
90   16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
91   32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
92   48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63
93   64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79
94   80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95
95   96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111
96   112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127
97   ;; 8859-5 plus Unicode
98   ?$,2  (B ?$,2 "(B ?$,2 ,(B ?$,2 0(B ?$,2 4(B ?$,2 8(B ?$,2 <(B ?$,2 D(B ?$,2 L(B ?$,2 T(B ?$,2 \(B ?$,2!@(B ?$,2!D(B ?$,2!H(B ?$,2!L(B ?$,2!P(B
99   ?$,2!Q(B ?$,2!R(B ?$,2!S(B ?$,1{ (B ?$,2!`(B ?$,1s"(B ?$,1x:(B ?$,1xh(B ?$,1y$(B ?$,1y%(B ?,L (B ?$,1{!(B ?,A0(B ?,A2(B ?,A7(B ?,Aw(B
100   ?$,2 p(B ?$,2 q(B ?$,2 r(B ?,Lq(B ?$,2 s(B ?$,2 t(B ?$,2 u(B ?$,2 v(B ?$,2 w(B ?$,2 x(B ?$,2 y(B ?$,2 z(B ?$,2 {(B ?$,2 |(B ?$,2 }(B ?$,2 ~(B
101   ?$,2 (B ?$,2! (B ?$,2!!(B ?,L!(B ?$,2!"(B ?$,2!#(B ?$,2!$(B ?$,2!%(B ?$,2!&(B ?$,2!'(B ?$,2!((B ?$,2!)(B ?$,2!*(B ?$,2!+(B ?$,2!,(B ?,A)(B
102   ?,Ln(B  ?,LP(B  ?,LQ(B  ?,Lf(B  ?,LT(B  ?,LU(B  ?,Ld(B  ?,LS(B  ?,Le(B  ?,LX(B  ?,LY(B  ?,LZ(B  ?,L[(B  ?,L\(B  ?,L](B  ?,L^(B
103   ?,L_(B  ?,Lo(B  ?,L`(B  ?,La(B  ?,Lb(B  ?,Lc(B  ?,LV(B  ?,LR(B  ?,Ll(B  ?,Lk(B  ?,LW(B  ?,Lh(B  ?,Lm(B  ?,Li(B  ?,Lg(B  ?,Lj(B
104   ?,LN(B  ?,L0(B  ?,L1(B  ?,LF(B  ?,L4(B  ?,L5(B  ?,LD(B  ?,L3(B  ?,LE(B  ?,L8(B  ?,L9(B  ?,L:(B  ?,L;(B  ?,L<(B  ?,L=(B  ?,L>(B
105   ?,L?(B  ?,LO(B  ?,L@(B  ?,LA(B  ?,LB(B  ?,LC(B  ?,L6(B  ?,L2(B  ?,LL(B  ?,LK(B  ?,L7(B  ?,LH(B  ?,LM(B  ?,LI(B  ?,LG(B  ?,LJ(B
106   ;; All Unicode:
107;;    ?$,2  (B ?$,2 "(B ?$,2 ,(B ?$,2 0(B ?$,2 4(B ?$,2 8(B ?$,2 <(B ?$,2 D(B ?$,2 L(B ?$,2 T(B ?$,2 \(B ?$,2!@(B ?$,2!D(B ?$,2!H(B ?$,2!L(B ?$,2!P(B
108;;    ?$,2!Q(B ?$,2!R(B ?$,2!S(B ?$,1{ (B ?$,2!`(B ?$,1s"(B ?$,1x:(B ?$,1xh(B ?$,1y$(B ?$,1y%(B ?,A (B ?$,1{!(B ?,A0(B ?,A2(B ?,A7(B ?,Aw(B
109;;    ?$,2 p(B ?$,2 q(B ?$,2 r(B ?$,1(q(B ?$,2 s(B ?$,2 t(B ?$,2 u(B ?$,2 v(B ?$,2 w(B ?$,2 x(B ?$,2 y(B ?$,2 z(B ?$,2 {(B ?$,2 |(B ?$,2 }(B ?$,2 ~(B
110;;    ?$,2 (B ?$,2! (B ?$,2!!(B ?$,1(!(B ?$,2!"(B ?$,2!#(B ?$,2!$(B ?$,2!%(B ?$,2!&(B ?$,2!'(B ?$,2!((B ?$,2!)(B ?$,2!*(B ?$,2!+(B ?$,2!,(B ?,A)(B
111;;    ?$,1(n(B ?$,1(P(B ?$,1(Q(B ?$,1(f(B ?$,1(T(B ?$,1(U(B ?$,1(d(B ?$,1(S(B ?$,1(e(B ?$,1(X(B ?$,1(Y(B ?$,1(Z(B ?$,1([(B ?$,1(\(B ?$,1(](B ?$,1(^(B
112;;    ?$,1(_(B ?$,1(o(B ?$,1(`(B ?$,1(a(B ?$,1(b(B ?$,1(c(B ?$,1(V(B ?$,1(R(B ?$,1(l(B ?$,1(k(B ?$,1(W(B ?$,1(h(B ?$,1(m(B ?$,1(i(B ?$,1(g(B ?$,1(j(B
113;;    ?$,1(N(B ?$,1(0(B ?$,1(1(B ?$,1(F(B ?$,1(4(B ?$,1(5(B ?$,1(D(B ?$,1(3(B ?$,1(E(B ?$,1(8(B ?$,1(9(B ?$,1(:(B ?$,1(;(B ?$,1(<(B ?$,1(=(B ?$,1(>(B
114;;    ?$,1(?(B ?$,1(O(B ?$,1(@(B ?$,1(A(B ?$,1(B(B ?$,1(C(B ?$,1(6(B ?$,1(2(B ?$,1(L(B ?$,1(K(B ?$,1(7(B ?$,1(H(B ?$,1(M(B ?$,1(I(B ?$,1(G(B ?$,1(J(B
115   ]
116  "Cyrillic KOI8-R decoding table.")
117
118(let ((table (make-translation-table-from-vector
119	      cyrillic-koi8-r-decode-table)))
120  (define-translation-table 'cyrillic-koi8-r-nonascii-translation-table table)
121  (define-translation-table 'cyrillic-koi8-r-encode-table
122    (char-table-extra-slot table 0)))
123
124;; No point in keeping it around.  (It can't be let-bound, since it's
125;; needed for macro expansion.)
126(makunbound 'cyrillic-koi8-r-decode-table)
127
128(define-ccl-program ccl-decode-koi8
129  `(4
130    ((loop
131      (r0 = 0)
132      (read r1)
133      (if (r1 < 128)
134	  (write-repeat r1)
135	((translate-character cyrillic-koi8-r-nonascii-translation-table r0 r1)
136	 (translate-character ucs-translation-table-for-decode r0 r1)
137	 (write-multibyte-character r0 r1)
138	 (repeat))))))
139  "CCL program to decode KOI8-R.")
140
141(define-ccl-program ccl-encode-koi8
142  `(1
143    ((loop
144      (read-multibyte-character r0 r1)
145      (translate-character cyrillic-koi8-r-encode-table r0 r1)
146      (if (r0 != ,(charset-id 'ascii))
147	  (if (r0 != ,(charset-id 'eight-bit-graphic))
148	      (if (r0 != ,(charset-id 'eight-bit-control))
149		  (r1 = ??))))
150      (write-repeat r1))))
151  "CCL program to encode KOI8-R.")
152
153(defun cyrillic-unify-encoding (table)
154  "Set up equivalent characters in the encoding TABLE.
155This works whether or not the table is Unicode-based or
1568859-5-based.  (Only appropriate for Cyrillic.)"
157  (let ((table (get table 'translation-table)))
158    (dotimes (i 96)
159      (let* ((c (make-char 'cyrillic-iso8859-5 (+ i 32)))
160	     (u				; equivalent Unicode char
161	      (cond ((eq c ?,L (B) ?,A (B)
162		    ((eq c ?,L-(B) ?,A-(B)
163		    ((eq c ?,L}(B) ?,A'(B)
164		    (t (decode-char 'ucs (+ #x400 i)))))
165	     (ec (aref table c))	; encoding of 8859-5
166	     (uc (aref table u)))	; encoding of Unicode
167	(unless (memq c '(?,L (B ?,L-(B ?,L}(B))	; 8859-5 exceptions
168	  (unless uc
169	    (aset table u ec))
170	  (unless ec
171	    (aset table c uc)))))))
172
173(cyrillic-unify-encoding 'cyrillic-koi8-r-encode-table)
174
175(make-coding-system
176 'cyrillic-koi8 4
177 ;; We used to use ?K.  It is true that ?K is more strictly correct,
178 ;; but it is also used for Korean.
179 ;; So people who use koi8 for languages other than Russian
180 ;; will have to forgive us.
181 ?R "KOI8-R 8-bit encoding for Cyrillic (MIME: KOI8-R)."
182 '(ccl-decode-koi8 . ccl-encode-koi8)
183 `((safe-chars . cyrillic-koi8-r-encode-table)
184   (mime-charset . koi8-r)
185   (valid-codes (0 . 255))
186   (dependency unify-8859-on-encoding-mode unify-8859-on-decoding-mode)))
187
188(define-coding-system-alias 'koi8-r 'cyrillic-koi8)
189(define-coding-system-alias 'koi8 'cyrillic-koi8)
190(define-coding-system-alias 'cp878 'cyrillic-koi8)
191
192(let ((elt `("koi8-r" koi8-r 1
193	     ,(get 'cyrillic-koi8-r-encode-table 'translation-table)))
194      (slot (assoc "koi8-r" ctext-non-standard-encodings-alist)))
195  (if slot
196      (setcdr slot (cdr elt))
197    (push elt ctext-non-standard-encodings-alist)))
198
199;; Allow displaying some of KOI & al with an 8859-5-encoded font.  We
200;; won't bother about the exceptions when encoding the font, since
201;; NBSP will fall through below and work anyhow, and we'll have
202;; avoided setting the fontset for the other two to 8859-5 -- they're
203;; not in KOI and Alternativnyj anyhow.
204(define-ccl-program ccl-encode-8859-5-font
205  `(0
206    ((if (r0 == ,(charset-id 'cyrillic-iso8859-5))
207	 (r1 += 128)
208       (if (r0 == ,(charset-id 'mule-unicode-0100-24ff))
209	   (r1 = (r2 + 128))))))
210  "Encode ISO 8859-5 and Cyrillic Unicode chars to 8859-5 font.")
211
212(add-to-list 'font-ccl-encoder-alist '("iso8859-5" . ccl-encode-8859-5-font))
213
214;; The table is set up later to encode both Unicode and 8859-5.
215(define-ccl-program ccl-encode-koi8-font
216  `(0
217    (if (r2 >= 0)
218	((r1 <<= 7)
219	 (r1 += r2)))
220    (translate-character cyrillic-koi8-r-encode-table r0 r1))
221  "CCL program to encode Cyrillic chars to KOI font.")
222
223(add-to-list 'font-ccl-encoder-alist '("koi8" . ccl-encode-koi8-font))
224
225(set-language-info-alist
226 "Cyrillic-KOI8" `((charset cyrillic-iso8859-5)
227		   (nonascii-translation
228		    . ,(get 'cyrillic-koi8-r-nonascii-translation-table
229			    'translation-table))
230		   (coding-system cyrillic-koi8)
231		   (coding-priority cyrillic-koi8 cyrillic-iso-8bit)
232		   (ctext-non-standard-encodings "koi8-r")
233		   (input-method . "russian-typewriter")
234		   (features cyril-util)
235		   (unibyte-display . cyrillic-koi8)
236		   (sample-text . "Russian (,L@caaZXY(B)	,L7T`PRabRcYbU(B!")
237		   (documentation . "Support for Cyrillic KOI8-R."))
238 '("Cyrillic"))
239
240(set-language-info-alist
241 "Russian" `((charset cyrillic-iso8859-5)
242	     (nonascii-translation
243	      . ,(get 'cyrillic-koi8-r-nonascii-translation-table
244		      'translation-table))
245	     (coding-system cyrillic-koi8)
246	     (coding-priority cyrillic-koi8 cyrillic-iso-8bit)
247	     (input-method . "russian-computer")
248	     (features cyril-util)
249	     (unibyte-display . cyrillic-koi8)
250	     (sample-text . "Russian (,L@caaZXY(B)	,L7T`PRabRcYbU(B!")
251	     (documentation . "\
252Support for Russian using koi8-r and the russian-computer input method.")
253	     (tutorial . "TUTORIAL.ru"))
254 '("Cyrillic"))
255
256
257(defvar cyrillic-koi8-u-decode-table
258  [
259   0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
260   16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
261   32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
262   48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63
263   64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79
264   80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95
265   96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111
266   112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127
267   ;; All Unicode:
268;;    ?$,2  (B ?$,2 "(B ?$,2 ,(B ?$,2 0(B ?$,2 4(B ?$,2 8(B ?$,2 <(B ?$,2 D(B ?$,2 L(B ?$,2 T(B ?$,2 \(B ?$,2!@(B ?$,2!D(B ?$,2!H(B ?$,2!L(B ?$,2!P(B
269;;    ?$,2!Q(B ?$,2!R(B ?$,2!S(B ?$,1{ (B ?$,2!`(B ?$,1x9(B ?$,1x:(B ?$,1xh(B ?$,1y$(B ?$,1y%(B ?,L (B ?$,1{!(B ?,A0(B ?,A2(B ?,A7(B ?,Aw(B
270;;    ?$,2 p(B ?$,2 q(B ?$,2 r(B ?$,1(q(B ?$,1(t(B ?$,2 t(B ?$,1(v(B ?$,1(w(B ?$,2 w(B ?$,2 x(B ?$,2 y(B ?$,2 z(B ?$,2 {(B ?$,1)Q(B ?$,2 }(B ?$,2 ~(B
271;;    ?$,2 (B ?$,2! (B ?$,2!!(B ?$,1(!(B ?$,1($(B ?$,2!#(B ?$,1(&(B ?$,1('(B ?$,2!&(B ?$,2!'(B ?$,2!((B ?$,2!)(B ?$,2!*(B ?$,1)P(B ?$,2!,(B ?,A)(B
272;;    ?$,1(n(B ?$,1(P(B ?$,1(Q(B ?$,1(f(B ?$,1(T(B ?$,1(U(B ?$,1(d(B ?$,1(S(B ?$,1(e(B ?$,1(X(B ?$,1(Y(B ?$,1(Z(B ?$,1([(B ?$,1(\(B ?$,1(](B ?$,1(^(B
273;;    ?$,1(_(B ?$,1(o(B ?$,1(`(B ?$,1(a(B ?$,1(b(B ?$,1(c(B ?$,1(V(B ?$,1(R(B ?$,1(l(B ?$,1(k(B ?$,1(W(B ?$,1(h(B ?$,1(m(B ?$,1(i(B ?$,1(g(B ?$,1(j(B
274;;    ?$,1(N(B ?$,1(0(B ?$,1(1(B ?$,1(F(B ?$,1(4(B ?$,1(5(B ?$,1(D(B ?$,1(3(B ?$,1(E(B ?$,1(8(B ?$,1(9(B ?$,1(:(B ?$,1(;(B ?$,1(<(B ?$,1(=(B ?$,1(>(B
275;;    ?$,1(?(B ?$,1(O(B ?$,1(@(B ?$,1(A(B ?$,1(B(B ?$,1(C(B ?$,1(6(B ?$,1(2(B ?$,1(L(B ?$,1(K(B ?$,1(7(B ?$,1(H(B ?$,1(M(B ?$,1(I(B ?$,1(G(B ?$,1(J(B
276;; 8859-5 plus Unicode:
277   ?$,2  (B ?$,2 "(B ?$,2 ,(B ?$,2 0(B ?$,2 4(B ?$,2 8(B ?$,2 <(B ?$,2 D(B ?$,2 L(B ?$,2 T(B ?$,2 \(B ?$,2!@(B ?$,2!D(B ?$,2!H(B ?$,2!L(B ?$,2!P(B
278   ?$,2!Q(B ?$,2!R(B ?$,2!S(B ?$,1{ (B ?$,2!`(B ?$,1x9(B ?$,1x:(B ?$,1xh(B ?$,1y$(B ?$,1y%(B ?,L (B ?$,1{!(B ?,A0(B ?,A2(B ?,A7(B ?,Aw(B
279   ?$,2 p(B ?$,2 q(B ?$,2 r(B ?,Lq(B ?,Lt(B ?$,2 t(B ?,Lv(B ?,Lw(B ?$,2 w(B ?$,2 x(B ?$,2 y(B ?$,2 z(B ?$,2 {(B ?$,1)Q(B ?$,2 }(B ?$,2 ~(B
280   ?$,2 (B ?$,2! (B ?$,2!!(B ?,L!(B ?,L$(B ?$,2!#(B ?,L&(B ?,L'(B ?$,2!&(B ?$,2!'(B ?$,2!((B ?$,2!)(B ?$,2!*(B ?$,1)P(B ?$,2!,(B ?,A)(B
281   ?,Ln(B ?,LP(B ?,LQ(B ?,Lf(B ?,LT(B ?,LU(B ?,Ld(B ?,LS(B ?,Le(B ?,LX(B ?,LY(B ?,LZ(B ?,L[(B ?,L\(B ?,L](B ?,L^(B
282   ?,L_(B ?,Lo(B ?,L`(B ?,La(B ?,Lb(B ?,Lc(B ?,LV(B ?,LR(B ?,Ll(B ?,Lk(B ?,LW(B ?,Lh(B ?,Lm(B ?,Li(B ?,Lg(B ?,Lj(B
283   ?,LN(B ?,L0(B ?,L1(B ?,LF(B ?,L4(B ?,L5(B ?,LD(B ?,L3(B ?,LE(B ?,L8(B ?,L9(B ?,L:(B ?,L;(B ?,L<(B ?,L=(B ?,L>(B
284   ?,L?(B ?,LO(B ?,L@(B ?,LA(B ?,LB(B ?,LC(B ?,L6(B ?,L2(B ?,LL(B ?,LK(B ?,L7(B ?,LH(B ?,LM(B ?,LI(B ?,LG(B ?,LJ(B
285   ]
286  "Cyrillic KOI8-U decoding table.")
287
288(let ((table (make-translation-table-from-vector
289	      cyrillic-koi8-u-decode-table)))
290  (define-translation-table 'cyrillic-koi8-u-nonascii-translation-table table)
291  (define-translation-table 'cyrillic-koi8-u-encode-table
292    (char-table-extra-slot table 0)))
293
294(makunbound 'cyrillic-koi8-u-decode-table)
295
296(define-ccl-program ccl-decode-koi8-u
297  `(4
298    ((loop
299      (r0 = 0)
300      (read r1)
301      (if (r1 < 128)
302	  (write-repeat r1)
303	((translate-character cyrillic-koi8-u-nonascii-translation-table r0 r1)
304	 (translate-character ucs-translation-table-for-decode r0 r1)
305	 (write-multibyte-character r0 r1)
306	 (repeat))))))
307  "CCL program to decode KOI8-U.")
308
309(define-ccl-program ccl-encode-koi8-u
310  `(1
311    ((loop
312      (read-multibyte-character r0 r1)
313      (translate-character cyrillic-koi8-u-encode-table r0 r1)
314      (if (r0 != ,(charset-id 'ascii))
315	  (if (r0 != ,(charset-id 'eight-bit-graphic))
316	      (if (r0 != ,(charset-id 'eight-bit-control))
317		  (r1 = ??))))
318      (write-repeat r1))))
319  "CCL program to encode KOI8-U.")
320
321(cyrillic-unify-encoding 'cyrillic-koi8-u-encode-table)
322
323(make-coding-system
324 'koi8-u 4
325 ?U "KOI8-U 8-bit encoding for Cyrillic (MIME: KOI8-U)"
326 '(ccl-decode-koi8-u . ccl-encode-koi8-u)
327 `((safe-chars . cyrillic-koi8-u-encode-table)
328   (mime-charset . koi8-u)
329   (valid-codes (0 . 255))
330   (dependency unify-8859-on-encoding-mode unify-8859-on-decoding-mode)))
331
332(define-ccl-program ccl-encode-koi8-u-font
333  `(0
334    (translate-character cyrillic-koi8-u-encode-table r0 r1))
335  "CCL program to encode Cyrillic chars to KOI-U font.")
336
337(add-to-list 'font-ccl-encoder-alist '("koi8-u" . ccl-encode-koi8-u-font))
338
339(set-language-info-alist
340 "Ukrainian" `((coding-system koi8-u)
341	       (coding-priority koi8-u)
342	       (nonascii-translation
343		. ,(get 'cyrillic-koi8-u-nonascii-translation-table
344			'translation-table))
345	       (input-method . "ukrainian-computer")
346	       (documentation
347		. "Support for Ukrainian with KOI8-U character set."))
348 '("Cyrillic"))
349
350;;; ALTERNATIVNYJ stuff
351
352;; Fixme: It's unclear what's the correct table.  I've found
353;; statements both that it's the same as cp866 and somewhat different,
354;; but nothing that looks really definitive.
355(defvar cyrillic-alternativnyj-decode-table
356  [
357   0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15
358   16  17  18  19  20  21  22  23  24  25  26  27  28  29  30  31
359   32  33  34  35  36  37  38  39  40  41  42  43  44  45  46  47
360   48  49  50  51  52  53  54  55  56  57  58  59  60  61  62  63
361   64  65  66  67  68  69  70  71  72  73  74  75  76  77  78  79
362   80  81  82  83  84  85  86  87  88  89  90  91  92  93  94  95
363   96  97  98  99 100 101 102 103 104 105 106 107 108 109 110 111
364   112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127
365;;    ?$,1(0(B  ?$,1(1(B  ?$,1(2(B  ?$,1(3(B  ?$,1(4(B  ?$,1(5(B  ?$,1(6(B  ?$,1(7(B  ?$,1(8(B  ?$,1(9(B  ?$,1(:(B  ?$,1(;(B  ?$,1(<(B  ?$,1(=(B  ?$,1(>(B  ?$,1(?(B
366;;    ?$,1(@(B  ?$,1(A(B  ?$,1(B(B  ?$,1(C(B  ?$,1(D(B  ?$,1(E(B  ?$,1(F(B  ?$,1(G(B  ?$,1(H(B  ?$,1(I(B  ?$,1(J(B  ?$,1(K(B  ?$,1(L(B  ?$,1(M(B  ?$,1(N(B  ?$,1(O(B
367;;    ?$,1(P(B  ?$,1(Q(B  ?$,1(R(B  ?$,1(S(B  ?$,1(T(B  ?$,1(U(B  ?$,1(V(B  ?$,1(W(B  ?$,1(X(B  ?$,1(Y(B  ?$,1(Z(B  ?$,1([(B  ?$,1(\(B  ?$,1(](B  ?$,1(^(B  ?$,1(_(B
368;;    ?$,2!Q(B  ?$,2!R(B  ?$,2!S(B  ?$,2 "(B  ?$,2 D(B  ?$,2!!(B  ?$,2!"(B  ?$,2 v(B  ?$,2 u(B  ?$,2!#(B  ?$,2 q(B  ?$,2 w(B  ?$,2 }(B  ?$,2 |(B  ?$,2 {(B  ?$,2 0(B
369;;    ?$,2 4(B  ?$,2 T(B  ?$,2 L(B  ?$,2 <(B  ?$,2  (B  ?$,2 \(B  ?$,2 ~(B  ?$,2 (B  ?$,2 z(B  ?$,2 t(B  ?$,2!)(B  ?$,2!&(B  ?$,2! (B  ?$,2 p(B  ?$,2!,(B  ?$,2!'(B
370;;    ?$,2!((B  ?$,2!$(B  ?$,2!%(B  ?$,2 y(B  ?$,2 x(B  ?$,2 r(B  ?$,2 s(B  ?$,2!+(B  ?$,2!*(B  ?$,2 8(B  ?$,2 ,(B  ?$,2!H(B  ?$,2!D(B  ?$,2!L(B  ?$,2!P(B  ?$,2!@(B
371;;    ?$,1(`(B  ?$,1(a(B  ?$,1(b(B  ?$,1(c(B  ?$,1(d(B  ?$,1(e(B  ?$,1(f(B  ?$,1(g(B  ?$,1(h(B  ?$,1(i(B  ?$,1(j(B  ?$,1(k(B  ?$,1(l(B  ?$,1(m(B  ?$,1(n(B  ?$,1(o(B
372;;    ?$,1(!(B  ?$,1(q(B  ?$,1ry(B  ?$,1rx(B  ?$,1%A(B  ?$,1%@(B  ?$,1s:(B  ?$,1s9(B  ?$,1vq(B  ?$,1vs(B  ?,A1(B  ?,Aw(B  ?$,1uV(B  ?,A$(B  ?$,2!`(B  ?,A (B ;
373;; 8859+Unicode
374   ?,L0(B  ?,L1(B  ?,L2(B  ?,L3(B  ?,L4(B  ?,L5(B  ?,L6(B  ?,L7(B  ?,L8(B  ?,L9(B  ?,L:(B  ?,L;(B  ?,L<(B  ?,L=(B  ?,L>(B  ?,L?(B
375   ?,L@(B  ?,LA(B  ?,LB(B  ?,LC(B  ?,LD(B  ?,LE(B  ?,LF(B  ?,LG(B  ?,LH(B  ?,LI(B  ?,LJ(B  ?,LK(B  ?,LL(B  ?,LM(B  ?,LN(B  ?,LO(B
376   ?,LP(B  ?,LQ(B  ?,LR(B  ?,LS(B  ?,LT(B  ?,LU(B  ?,LV(B  ?,LW(B  ?,LX(B  ?,LY(B  ?,LZ(B  ?,L[(B  ?,L\(B  ?,L](B  ?,L^(B  ?,L_(B
377   ?$,2!Q(B  ?$,2!R(B  ?$,2!S(B  ?$,2 "(B  ?$,2 D(B  ?$,2!!(B  ?$,2!"(B  ?$,2 v(B  ?$,2 u(B  ?$,2!#(B  ?$,2 q(B  ?$,2 w(B  ?$,2 }(B  ?$,2 |(B  ?$,2 {(B  ?$,2 0(B
378   ?$,2 4(B  ?$,2 T(B  ?$,2 L(B  ?$,2 <(B  ?$,2  (B  ?$,2 \(B  ?$,2 ~(B  ?$,2 (B  ?$,2 z(B  ?$,2 t(B  ?$,2!)(B  ?$,2!&(B  ?$,2! (B  ?$,2 p(B  ?$,2!,(B  ?$,2!'(B
379   ?$,2!((B  ?$,2!$(B  ?$,2!%(B  ?$,2 y(B  ?$,2 x(B  ?$,2 r(B  ?$,2 s(B  ?$,2!+(B  ?$,2!*(B  ?$,2 8(B  ?$,2 ,(B  ?$,2!H(B  ?$,2!D(B  ?$,2!L(B  ?$,2!P(B  ?$,2!@(B
380   ?,L`(B  ?,La(B  ?,Lb(B  ?,Lc(B  ?,Ld(B  ?,Le(B  ?,Lf(B  ?,Lg(B  ?,Lh(B  ?,Li(B  ?,Lj(B  ?,Lk(B  ?,Ll(B  ?,Lm(B  ?,Ln(B  ?,Lo(B
381   ;; Taken from http://www.cyrillic.com/ref/cyrillic/koi-8alt.html
382   ;; with guesses for the Unicodes of the glyphs in the absence of a
383   ;; table.
384   ?,L!(B  ?,Lq(B  ?$,1ry(B  ?$,1rx(B  ?$,1%A(B  ?$,1%@(B  ?$,1s:(B  ?$,1s9(B  ?$,1vq(B  ?$,1vs(B  ?,A1(B  ?,Aw(B  ?,Lp(B  ?,A$(B  ?$,2!`(B  ?,L (B]
385  "Cyrillic ALTERNATIVNYJ decoding table.")
386
387(let ((table (make-translation-table-from-vector
388	      cyrillic-alternativnyj-decode-table)))
389  (define-translation-table 'cyrillic-alternativnyj-nonascii-translation-table
390    table)
391  (define-translation-table 'cyrillic-alternativnyj-encode-table
392    (char-table-extra-slot table 0)))
393
394(makunbound 'cyrillic-alternativnyj-decode-table)
395
396(define-ccl-program ccl-decode-alternativnyj
397  `(4
398    ((loop
399      (r0 = 0)
400      (read r1)
401      (if (r1 < 128)
402	  (write-repeat r1)
403	((translate-character cyrillic-alternativnyj-nonascii-translation-table
404			      r0 r1)
405	 (translate-character ucs-translation-table-for-decode r0 r1)
406	 (write-multibyte-character r0 r1)
407	 (repeat))))))
408  "CCL program to decode Alternativnyj.")
409
410(define-ccl-program ccl-encode-alternativnyj
411  `(1
412    ((loop
413      (read-multibyte-character r0 r1)
414      (translate-character cyrillic-alternativnyj-encode-table r0 r1)
415      (if (r0 != ,(charset-id 'ascii))
416	  (if (r0 != ,(charset-id 'eight-bit-graphic))
417	      (if (r0 != ,(charset-id 'eight-bit-control))
418		  (r1 = ??))))
419      (write-repeat r1))))
420  "CCL program to encode Alternativnyj.")
421
422(cyrillic-unify-encoding 'cyrillic-alternativnyj-encode-table)
423
424(make-coding-system
425 'cyrillic-alternativnyj 4 ?A
426 "ALTERNATIVNYJ 8-bit encoding for Cyrillic."
427 '(ccl-decode-alternativnyj . ccl-encode-alternativnyj)
428 `((safe-chars . cyrillic-alternativnyj-encode-table)
429   (valid-codes (0 . 255))
430   (dependency unify-8859-on-encoding-mode unify-8859-on-decoding-mode)))
431
432(define-coding-system-alias 'alternativnyj 'cyrillic-alternativnyj)
433
434(define-ccl-program ccl-encode-alternativnyj-font
435  `(0
436    (translate-character cyrillic-alternativnyj-encode-table r0 r1))
437  "CCL program to encode Cyrillic chars to Alternativnyj font.")
438
439(add-to-list 'font-ccl-encoder-alist
440	     '("alternativnyj" . ccl-encode-alternativnyj-font))
441
442(set-language-info-alist
443 "Cyrillic-ALT" `((charset cyrillic-iso8859-5)
444		  (nonascii-translation
445		   . ,(get 'cyrillic-alternativnyj-nonascii-translation-table
446			   'translation-table))
447		  (coding-system cyrillic-alternativnyj)
448		  (coding-priority cyrillic-alternativnyj)
449		  (input-method . "russian-typewriter")
450		  (features cyril-util)
451		  (unibyte-display . cyrillic-alternativnyj)
452		  (sample-text . "Russian (,L@caaZXY(B)	,L7T`PRabRcYbU(B!")
453		  (documentation . "Support for Cyrillic ALTERNATIVNYJ."))
454 '("Cyrillic"))
455
456(set-language-info-alist
457 "Tajik" `((coding-system cyrillic-koi8-t)
458	   (coding-priority cyrillic-koi8-t)
459	   (nonascii-translation
460	    . ,(get 'decode-koi8-t 'translation-table))
461	   (input-method . "russian-typewriter") ; fixme?
462	   (features code-pages)
463	   (documentation . "Support for Tajik using KOI8-T."))
464 '("Cyrillic"))
465
466(eval-and-compile
467  (setq
468   non-iso-charset-alist
469   (cp-make-coding-system
470    windows-1251
471    [?\$,1("(B ?\$,1(#(B ?\$,1rz(B ?\$,1(s(B ?\$,1r~(B ?\$,1s&(B ?\$,1s (B ?\$,1s!(B ?\$,1tL(B ?\$,1s0(B ?\$,1()(B ?\$,1s9(B ?\$,1(*(B ?\$,1(,(B ?\$,1(+(B ?\$,1(/(B ?\$,1(r(B
472	 ?\$,1rx(B ?\$,1ry(B ?\$,1r|(B ?\$,1r}(B ?\$,1s"(B ?\$,1rs(B ?\$,1rt(B nil ?\$,1ub(B ?\$,1(y(B ?\$,1s:(B ?\$,1(z(B ?\$,1(|(B ?\$,1({(B ?\$,1((B ?\,A (B ?\$,1(.(B
473	 ?\$,1(~(B ?\$,1(((B ?\,A$(B ?\$,1)P(B ?\,A&(B ?\,A'(B ?\$,1(!(B ?\,A)(B ?\$,1($(B ?\,A+(B ?\,A,(B ?\,A-(B ?\,A.(B ?\$,1('(B ?\,A0(B ?\,A1(B ?\$,1(&(B
474	 ?\$,1(v(B ?\$,1)Q(B ?\,A5(B ?\,A6(B ?\,A7(B ?\$,1(q(B ?\$,1uV(B ?\$,1(t(B ?\,A;(B ?\$,1(x(B ?\$,1(%(B ?\$,1(u(B ?\$,1(w(B ?\$,1(0(B ?\$,1(1(B ?\$,1(2(B ?\$,1(3(B
475	 ?\$,1(4(B ?\$,1(5(B ?\$,1(6(B ?\$,1(7(B ?\$,1(8(B ?\$,1(9(B ?\$,1(:(B ?\$,1(;(B ?\$,1(<(B ?\$,1(=(B ?\$,1(>(B ?\$,1(?(B ?\$,1(@(B ?\$,1(A(B ?\$,1(B(B ?\$,1(C(B ?\$,1(D(B
476	 ?\$,1(E(B ?\$,1(F(B ?\$,1(G(B ?\$,1(H(B ?\$,1(I(B ?\$,1(J(B ?\$,1(K(B ?\$,1(L(B ?\$,1(M(B ?\$,1(N(B ?\$,1(O(B ?\$,1(P(B ?\$,1(Q(B ?\$,1(R(B ?\$,1(S(B ?\$,1(T(B ?\$,1(U(B
477	 ?\$,1(V(B ?\$,1(W(B ?\$,1(X(B ?\$,1(Y(B ?\$,1(Z(B ?\$,1([(B ?\$,1(\(B ?\$,1(](B ?\$,1(^(B ?\$,1(_(B ?\$,1(`(B ?\$,1(a(B ?\$,1(b(B ?\$,1(c(B ?\$,1(d(B ?\$,1(e(B ?\$,1(f(B
478	 ?\$,1(g(B ?\$,1(h(B ?\$,1(i(B ?\$,1(j(B ?\$,1(k(B ?\$,1(l(B ?\$,1(m(B ?\$,1(n(B ?\$,1(o(B] nil ?b)))
479
480;; Register cyrillic-iso8859-5 characters in the encode table of
481;; windows-1251.
482(let ((table (get 'encode-windows-1251 'translation-table))
483      ;; Nth element is a cyrillic-iso8859-5 character encoded to a
484      ;; code (128 + N), or nil.
485      (vec [?\,L"(B ?\,L#(B nil ?\,Ls(B nil nil nil nil nil nil ?\,L)(B nil ?\,L*(B ?\,L,(B ?\,L+(B ?\,L/(B
486	    ?\,Lr(B nil nil nil nil nil nil nil nil nil ?\,Ly(B nil ?\,Lz(B ?\,L|(B ?\,L{(B ?\,L(B
487	    nil ?\,L.(B ?\,L~(B ?\,L((B nil nil nil nil ?\,L!(B nil ?\,L$(B nil nil nil nil ?\,L'(B
488	    nil nil ?\,L&(B ?\,Lv(B nil nil nil nil ?\,Lq(B ?\,Lp(B ?\,Lt(B nil ?\,Lx(B ?\,L%(B ?\,Lu(B ?\,Lw(B
489	    ?\,L0(B ?\,L1(B ?\,L2(B ?\,L3(B ?\,L4(B ?\,L5(B ?\,L6(B ?\,L7(B ?\,L8(B ?\,L9(B ?\,L:(B ?\,L;(B ?\,L<(B ?\,L=(B ?\,L>(B ?\,L?(B
490	    ?\,L@(B ?\,LA(B ?\,LB(B ?\,LC(B ?\,LD(B ?\,LE(B ?\,LF(B ?\,LG(B ?\,LH(B ?\,LI(B ?\,LJ(B ?\,LK(B ?\,LL(B ?\,LM(B ?\,LN(B ?\,LO(B
491	    ?\,LP(B ?\,LQ(B ?\,LR(B ?\,LS(B ?\,LT(B ?\,LU(B ?\,LV(B ?\,LW(B ?\,LX(B ?\,LY(B ?\,LZ(B ?\,L[(B ?\,L\(B ?\,L](B ?\,L^(B ?\,L_(B
492	    ?\,L`(B ?\,La(B ?\,Lb(B ?\,Lc(B ?\,Ld(B ?\,Le(B ?\,Lf(B ?\,Lg(B ?\,Lh(B ?\,Li(B ?\,Lj(B ?\,Lk(B ?\,Ll(B ?\,Lm(B ?\,Ln(B ?\,Lo(B]))
493  (dotimes (i (length vec))
494    (if (aref vec i)
495	(aset table (aref vec i) (+ 128 i)))))
496
497(define-coding-system-alias 'cp1251 'windows-1251)
498
499(let ((elt `("microsoft-cp1251" windows-1251 1
500	     ,(get 'encode-windows-1251 'translation-table)))
501      (slot (assoc "microsoft-cp1251" ctext-non-standard-encodings-alist)))
502  (if slot
503      (setcdr slot (cdr elt))
504    (push elt ctext-non-standard-encodings-alist)))
505
506(define-ccl-program ccl-encode-windows-1251-font
507  `(0
508    ((if (r0 == ,(charset-id 'mule-unicode-0100-24ff))
509	 ((r1 <<= 7)
510	  (r1 += r2)))
511     (translate-character encode-windows-1251 r0 r1))))
512
513(add-to-list 'font-ccl-encoder-alist
514	     '("microsoft-cp1251" . ccl-encode-windows-1251-font))
515
516(set-language-info-alist
517 "Bulgarian" `((coding-system windows-1251)
518	       (coding-priority windows-1251)
519	       (ctext-non-standard-encodings "microsoft-cp1251")
520	       (overriding-fontspec
521		(,(get 'encode-windows-1251 'translation-table)
522		 . (nil . "microsoft-cp1251"))
523		(,(get 'cyrillic-koi8-r-encode-table 'translation-table)
524		 . (nil . "koi8-r")))
525	       (nonascii-translation
526		. ,(get 'decode-windows-1251 'translation-table))
527	       (input-method . "bulgarian-bds")
528	       (documentation
529		. "Support for Bulgarian with windows-1251 character set.")
530	       (tutorial . "TUTORIAL.bg"))
531 '("Cyrillic"))
532
533(set-language-info-alist
534 "Belarusian" `((coding-system windows-1251)
535		(coding-priority windows-1251)
536		(ctext-non-standard-encodings "microsoft-cp1251")
537		(overriding-fontspec
538		 (,(get 'encode-windows-1251 'translation-table)
539		  . (nil . "microsoft-cp1251"))
540		 (,(get 'cyrillic-koi8-r-encode-table 'translation-table)
541		  . (nil . "koi8-r")))
542		(nonascii-translation
543		 . ,(get 'decode-windows-1251 'translation-table))
544		(input-method . "belarusian")
545		(documentation
546		 . "Support for Belarusian with windows-1251 character set.
547\(The name Belarusian replaced Byelorussian in the early 1990s.)"))
548 '("Cyrillic"))
549
550(provide 'cyrillic)
551
552;;; arch-tag: bda71ae0-ba41-4cb6-a6e0-1dff542313d3
553;;; cyrillic.el ends here
554