1;;; mlm-util.el --- support for composing malayalam characters  -*-coding: iso-2022-7bit;-*-
2
3;; Copyright (C) 2003, 2004, 2005, 2006, 2007  Free Software Foundation, Inc.
4
5;; Maintainer:  KAWABATA, Taichi <kawabata@m17n.org>
6;; Keywords: multilingual, Malayalam
7
8;; This file is part of GNU Emacs.
9
10;; GNU Emacs is free software; you can redistribute it and/or modify
11;; it under the terms of the GNU General Public License as published by
12;; the Free Software Foundation; either version 2, or (at your option)
13;; any later version.
14
15;; GNU Emacs is distributed in the hope that it will be useful,
16;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18;; GNU General Public License for more details.
19
20;; You should have received a copy of the GNU General Public License
21;; along with GNU Emacs; see the file COPYING.  If not, write to the
22;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
23;; Boston, MA 02110-1301, USA.
24
25;; Created: Feb. 11. 2003
26
27;;; Commentary:
28
29;; This file provides character(Unicode) to glyph(CDAC) conversion and
30;; composition of Malayalam script characters.
31
32;;; Code:
33
34;; Malayalam Composable Pattern
35;;    C .. Consonants
36;;    V .. Vowel
37;;    H .. Halant
38;;    M .. Matra
39;;    V .. Vowel
40;;    A .. Anuswar
41;;    D .. Chandrabindu
42;;    (N .. Zerowidth Non Joiner)
43;;    (J .. Zerowidth Joiner.  )
44;; 1. vowel
45;;  V(A|visargam)?
46;; 2. syllable : maximum of 5 consecutive consonants.  (e.g. kartsnya)
47;;  ((CH)?(CH)?(CH)?CH)?C(H|M?(A|D)?)?
48
49(defconst malayalam-consonant
50  "[$,1@5(B-$,1@Y(B]")
51
52(defconst malayalam-composable-pattern
53  (concat
54   "\\([$,1@%(B-$,1@4(B][$,1@"(B]?\\)\\|$,1@#(B"
55   "\\|\\("
56   "\\(?:\\(?:[$,1@5(B-$,1@Y(B]$,1@m(B\\)?\\(?:[$,1@5(B-$,1@Y(B]$,1@m(B\\)?\\(?:[$,1@5(B-$,1@Y(B]$,1@m(B\\)?[$,1@5(B-$,1@Y(B]$,1@m(B\\)?"
57   "[$,1@5(B-$,1@Y(B]\\(?:$,1@m(B\\|[$,1@^(B-$,1@c@f@g@h@j@j@k@l(B]?[$,1@"@m(B]?\\)?"
58   "\\)")
59  "Regexp matching a composable sequence of Malayalam characters.")
60
61;;;###autoload
62(defun malayalam-compose-region (from to)
63  (interactive "r")
64  (save-excursion
65    (save-restriction
66      (narrow-to-region from to)
67      (goto-char (point-min))
68      (while (re-search-forward malayalam-composable-pattern nil t)
69	(malayalam-compose-syllable-region (match-beginning 0)
70					    (match-end 0))))))
71(defun malayalam-compose-string (string)
72  (with-temp-buffer
73    (insert (decompose-string string))
74    (malayalam-compose-region (point-min) (point-max))
75    (buffer-string)))
76
77;;;###autoload
78(defun malayalam-post-read-conversion (len)
79  (save-excursion
80    (save-restriction
81      (let ((buffer-modified-p (buffer-modified-p)))
82	(narrow-to-region (point) (+ (point) len))
83	(malayalam-compose-region (point-min) (point-max))
84	(set-buffer-modified-p buffer-modified-p)
85	(- (point-max) (point-min))))))
86
87(defun malayalam-range (from to)
88  "Make the list of the integers of range FROM to TO."
89  (let (result)
90    (while (<= from to) (setq result (cons to result) to (1- to))) result))
91
92(defun malayalam-regexp-of-hashtbl-keys (hashtbl)
93  "Return a regular expression that matches all keys in hashtable HASHTBL."
94  (let ((max-specpdl-size 1000))
95    (regexp-opt
96     (sort
97      (let (dummy)
98	(maphash (function (lambda (key val) (setq dummy (cons key dummy)))) hashtbl)
99	dummy)
100      (function (lambda (x y) (> (length x) (length y))))))))
101
102
103;;;###autoload
104(defun malayalam-composition-function (from to pattern  &optional string)
105  "Compose Malayalam characters in REGION, or STRING if specified.
106Assume that the REGION or STRING must fully match the composable
107PATTERN regexp."
108  (if string (malayalam-compose-syllable-string string)
109    (malayalam-compose-syllable-region from to))
110  (- to from))
111
112;; Register a function to compose Malayalam characters.
113(mapc
114 (function (lambda (ucs)
115   (aset composition-function-table (decode-char 'ucs ucs)
116	 (list (cons malayalam-composable-pattern
117                     'malayalam-composition-function)))))
118 (nconc '(#x0d02 #x0d03) (malayalam-range #x0d05 #x0d39)))
119
120;; Notes on conversion steps.
121
122;; 1. chars to glyphs
123;;
124;; Simple replacement of characters to glyphs is done.
125
126;; 2. glyphs reordering.
127;;
128;; Two special reordering rule takes place.
129;; a. following "$,46[(B" goes to the front.
130;; b. following "$,46S6S(B", "$,46S(B" or "$,46T(B" goes to the front.
131;; This reordering occurs only to the last cluster of consonants.
132;; Preceding consonants with halant characters are not affected.
133
134;; 3. Composition.
135;;
136;; left modifiers will be attached at the left.
137;; others will be attached right.
138
139(defvar mlm-char-glyph
140  '(;; various signs
141    ("$,1@"(B" . "$,46W(B")
142    ("$,1@#(B" . "$,46X(B")
143    ;; Independent Vowels
144    ("$,1@%(B" . "$,46!(B")
145    ("$,1@&(B" . "$,46"(B")
146    ("$,1@'(B" . "$,46#(B")
147    ("$,1@((B" . "$,46#6U(B")
148    ("$,1@)(B" . "$,46$(B")
149    ("$,1@*(B" . "$,46$6U(B")
150    ("$,1@+(B" . "$,46%(B")
151    ("$,1@,(B" . "nil") ;; not in present use, not supported.
152    ("$,1@.(B" . "$,46&(B")
153    ("$,1@/(B" . "$,46'(B")
154    ("$,1@0(B" . "$,46S6&(B")
155    ("$,1@2(B" . "$,46((B")
156    ("$,1@3(B" . "$,46(6M(B")
157    ("$,1@4(B" . "$,46(6U(B")
158    ;; Consonants
159    ("$,1@5(B" . "$,46)(B")
160    ("$,1@5@m@5(B" . "$,47!(B")
161    ("$,1@5@m@S(B" . "$,47"(B")
162    ("$,1@5@m@W(B" . "$,47#(B")
163    ("$,1@5@m@?(B" . "$,47N(B")
164    ("$,1@5@m@D(B" . "$,47`(B")
165    ("$,1@5@a(B" . "$,47f(B")
166    ("$,1@5@m@5@a(B" . "$,47g(B")
167    ("$,1@5@a(B" . "$,47f(B")
168    ("$,1@5@m@5@a(B" . "$,47g(B")
169
170    ("$,1@6(B" . "$,46*(B")
171
172    ("$,1@7(B" . "$,46+(B")
173    ("$,1@7@m@7(B" . "$,47$(B")
174    ("$,1@7@m@R(B" . "$,47%(B")
175    ("$,1@7@m@N(B" . "$,47\(B")
176    ("$,1@7@m@H(B" . "$,47a(B")
177
178    ("$,1@8(B" . "$,46,(B")
179
180    ("$,1@9(B" . "$,46-(B")
181    ("$,1@9@m@5(B" . "$,47&(B")
182    ("$,1@9@m@9(B" . "$,47'(B")
183    ("$,1@9@m@5@a(B" . "$,47h(B")
184
185    ("$,1@:(B" . "$,46.(B")
186    ("$,1@:@m@:(B" . "$,47((B") ;; duplicate
187    ("$,1@:@m@;(B" . "$,47Q(B")
188
189    ("$,1@;(B" . "$,46/(B")
190
191    ("$,1@<(B" . "$,460(B")
192    ("$,1@<@m@<(B" . "$,47V(B")
193    ("$,1@<@m@>(B" . "$,47Z(B")
194
195    ("$,1@=(B" . "$,461(B")
196
197    ("$,1@>(B" . "$,462(B")
198    ("$,1@>@m@:(B" . "$,47)(B")
199    ("$,1@>@m@>(B" . "$,47*(B")
200
201    ("$,1@?(B" . "$,463(B")
202    ("$,1@?@m@?(B" . "$,47+(B")
203
204    ("$,1@@(B" . "$,464(B")
205    ("$,1@A(B" . "$,465(B")
206    ("$,1@A@m@A(B" . "$,47M(B")
207    ("$,1@B(B" . "$,466(B")
208
209    ("$,1@C(B" . "$,467(B")
210    ("$,1@C@a@m(B" . "$,47,(B") ;; half consonant
211    ("$,1@C@m@?(B" . "$,47-(B")
212    ("$,1@C@m@C(B" . "$,47.(B")
213    ("$,1@C@m@N(B" . "$,47W(B")
214    ("$,1@C@m@A(B" . "$,47^(B")
215    ("$,1@C@a(B" . "$,47i(B")
216
217    ("$,1@D(B" . "$,468(B")
218    ("$,1@D@m@D(B" . "$,47/(B")
219    ("$,1@D@m@E(B" . "$,470(B")
220    ("$,1@D@m@X(B" . "$,47U(B")
221    ("$,1@D@m@M(B" . "$,47[(B")
222    ("$,1@D@m@N(B" . "$,47_(B")
223
224    ("$,1@E(B" . "$,469(B")
225
226    ("$,1@F(B" . "$,46:(B")
227    ("$,1@F@m@F(B" . "$,471(B")
228    ("$,1@F@m@G(B" . "$,472(B")
229
230    ("$,1@G(B" . "$,46;(B")
231
232    ("$,1@H(B" . "$,46<(B")
233    ("$,1@H@a@m(B" . "$,473(B") ;; half consonant
234    ("$,1@H@m@D(B" . "$,474(B")
235    ("$,1@H@m@F(B" . "$,475(B")
236    ("$,1@H@m@H(B" . "$,476(B")
237    ("$,1@H@m@N(B" . "$,477(B")
238    ("$,1@H@m@G(B" . "$,47T(B")
239    ("$,1@H@m@E(B" . "$,47Y(B")
240    ("$,1@H@m@Q(B" . "$,47b(B")
241    ("$,1@H@a(B" . "$,47k(B")
242    ("$,1@H@m@H@a(B" . "$,47l(B")
243
244    ("$,1@J(B" . "$,46=(B")
245    ("$,1@J@m@J(B" . "$,478(B") ;; duplicate
246    ("$,1@J@m@R(B" . "$,479(B") ;; lakar
247
248    ("$,1@K(B" . "$,46>(B")
249
250    ("$,1@L(B" . "$,46?(B")
251    ("$,1@L@m@L(B" . "$,47:(B") ;; duplicate
252    ("$,1@L@m@R(B" . "$,47;(B") ;; lakar
253    ("$,1@L@m@G(B" . "$,47O(B")
254    ("$,1@L@m@F(B" . "$,47P(B")
255
256    ("$,1@M(B" . "$,46@(B")
257
258    ("$,1@N(B" . "$,46A(B")
259    ("$,1@N@m@J(B" . "$,47<(B")
260    ("$,1@N@m@N(B" . "$,47=(B")
261    ("$,1@N@m@R(B" . "$,47>(B") ;; lakar
262
263    ("$,1@O(B" . "$,46B(B")
264    ("$,1@O@m@O(B" . "$,47?(B") ;; duplicate
265    ("$,1@O@m@5@m@5(B" . "$,47m(B")
266
267    ("$,1@P(B" . "$,46C(B")
268    ("$,1@P@a@m(B" . "$,47@(B")
269    ("$,1@P@a(B" . "$,47j(B")
270
271    ("$,1@Q(B" . "$,46D(B")
272    ("$,1@Q@m(B" . "$,47@(B") ;; same glyph as "$,1@P@m(B"
273    ("$,1@Q@a@m(B" . "$,47@(B") ;; same glyph as "$,1@P@m(B"
274    ;;("$,1@Q@m@Q(B" . "$,47A(B")
275    ("$,1@Q@m@Q(B" . "$,47d(B")
276
277    ("$,1@R(B" . "$,46E(B")
278    ("$,1@R@a@m(B" . "$,47B(B")
279    ("$,1@R@m@R(B" . "$,47C(B") ;; lakar
280    ("$,1@R@m@J(B" . "$,47e(B")
281
282    ("$,1@S(B" . "$,46F(B")
283    ("$,1@S@a@m(B" . "$,47D(B")
284    ("$,1@S@m@S(B" . "$,47E(B")
285
286    ("$,1@T(B" . "$,46G(B")
287
288    ("$,1@U(B" . "$,46H(B")
289    ("$,1@U@m@U(B" . "$,47F(B")
290
291    ("$,1@V(B" . "$,46I(B")
292    ("$,1@V@m@R(B" . "$,47G(B")
293    ("$,1@V@m@V(B" . "$,47H(B")
294    ("$,1@V@m@:(B" . "$,47](B")
295
296    ("$,1@W(B" . "$,46J(B")
297    ("$,1@W@m@?(B" . "$,47c(B")
298
299    ("$,1@X(B" . "$,46K(B")
300    ("$,1@X@m@R(B" . "$,47I(B")
301    ("$,1@X@m@X(B" . "$,47J(B")
302    ("$,1@X@m@Q@m@Q(B" . "$,47L(B")
303    ("$,1@X@m@E(B" . "$,47X(B")
304
305    ("$,1@Y(B" . "$,46L(B")
306    ("$,1@Y@m@R(B" . "$,47K(B")
307    ("$,1@Y@m@N(B" . "$,47R(B")
308    ("$,1@Y@m@H(B" . "$,47S(B")
309
310    ;; Dependent vowel signs
311    ("$,1@^(B" . "$,46M(B")
312    ("$,1@_(B" . "$,46N(B")
313    ("$,1@`(B" . "$,46O(B")
314    ("$,1@a(B" . "$,46P(B")
315    ("$,1@b(B" . "$,46Q(B")
316    ("$,1@c(B" . "$,46R(B")
317    ("$,1@f(B" . "$,46S(B")
318    ("$,1@g(B" . "$,46T(B")
319    ("$,1@h(B" . "$,46S6S(B")
320    ("$,1@j(B" . "$,46S6M(B")
321    ("$,1@k(B" . "$,46T6M(B")
322    ("$,1@l(B" . "$,46U(B")
323    ;; Various signs
324    ("$,1@m(B" . "$,46V(B")
325    ("$,1@m@O(B" . "$,46Y(B") ;; yakar
326    ("$,1@m@O@a(B" . "$,46\(B") ;; yakar + u
327    ("$,1@m@O@b(B" . "$,46](B") ;; yakar + uu
328    ("$,1@m@U(B" . "$,46Z(B") ;; vakar modifier
329    ("$,1@m@P(B" . "$,46[(B") ;; rakar modifier is the same to rra modifier.
330    ("$,1@m@P@m(B" . "$,46R(B") ;; halant + rakar + halant
331    ("$,1@m@Q(B" . "$,46[(B") ;; rrakar modifier
332    ("$,1@m@Q@m(B" . "$,46R(B") ;; halant + rrakar + halant
333    ("$,1@m@m(B" . "$,46V(B") ;; double omission sign to stop forming half consonant.
334    ("$,1@w(B" . "$,46U(B") ;; not in present use, already at 0D4C.
335    ))
336
337(defvar mlm-char-glyph-hash
338  (let* ((hash (make-hash-table :test 'equal)))
339    (mapc (function (lambda (x) (puthash (car x) (cdr x) hash)))
340	  mlm-char-glyph)
341    hash))
342
343(defvar mlm-char-glyph-regexp
344  (malayalam-regexp-of-hashtbl-keys mlm-char-glyph-hash))
345
346;; Malayalam languages needed to be reordered in a complex mannar.
347
348(defvar mlm-consonants
349  (concat
350  "$,46)6*6+6,6-6.6/606162636465666768696:6;6<6=6>6?6@6A6B6C6D6E6F6G6H6I6J6K6L(B"
351  "$,47!7"7#7$7%7&7'7(7)7*7+7,7-7.7/707172737475767778797:7;7<7=7>7?7@7A7B7C7D7E7F7G7H7I7J7K7L7M7N7O7P7Q7R7S7T7U7V7W7X7Y7Z7[7\7]7^7_7`7a7b7c7d7e(B"
352  ))
353
354(defvar mlm-consonants-regexp
355  (concat "\\($,46[(B?[" mlm-consonants "][$,46Y6Z(B]?\\)"))
356
357(defvar mlm-glyph-reorder-key-glyphs "[$,46[6S6T(B]")
358
359(defvar mlm-glyph-reordering-regexp-list
360  `((,(concat "\\([" mlm-consonants "][$,46Y6Z(B]?\\)$,46[(B") . "$,46[(B\\1")
361    (,(concat mlm-consonants-regexp "$,46S6S(B") . "$,46S6S(B\\1")
362    (,(concat mlm-consonants-regexp "$,46S(B") . "$,46S(B\\1")
363    (,(concat mlm-consonants-regexp "$,46T(B") . "$,46T(B\\1")))
364
365(defun malayalam-compose-syllable-string (string)
366  (with-temp-buffer
367    (insert (decompose-string string))
368    (malayalam-compose-syllable-region (point-min) (point-max))
369    (buffer-string)))
370
371(defun malayalam-compose-syllable-region (from to)
372  "Compose malayalam syllable in region FROM to TO."
373  (let (glyph-str
374	match-str
375	glyph-reorder-regexps
376	glyph-reorder-replace
377	glyph-reorder-regexp)
378    (save-excursion
379      (save-restriction
380        (narrow-to-region from to)
381        (goto-char (point-min))
382        ;; char-glyph-conversion
383        (while (re-search-forward mlm-char-glyph-regexp nil t)
384          (setq match-str (match-string 0))
385          (setq glyph-str
386                (concat glyph-str (gethash match-str mlm-char-glyph-hash))))
387        (when (string-match mlm-glyph-reorder-key-glyphs glyph-str)
388          ;; glyph reordering
389          (setq glyph-reorder-regexps mlm-glyph-reordering-regexp-list)
390          (while glyph-reorder-regexps
391            (setq glyph-reorder-regexp (caar glyph-reorder-regexps))
392            (setq glyph-reorder-replace (cdar glyph-reorder-regexps))
393            (setq glyph-reorder-regexps (cdr glyph-reorder-regexps))
394            (if (string-match glyph-reorder-regexp glyph-str)
395                (setq glyph-str
396                      (replace-match glyph-reorder-replace nil nil
397                                     glyph-str)))))
398        ;; concatenate and attach reference-points.
399        (setq glyph-str
400              (cdr
401               (apply
402                'nconc
403                (mapcar
404                 (function
405                  (lambda (x) (list '(5 . 3) x))) ;; default ref. point.
406                 glyph-str))))
407        (compose-region from to glyph-str)))))
408
409(provide 'mlm-util)
410
411;;; arch-tag: 7f25ee67-8f9d-49f2-837b-35c412c00eba
412;;; devan-util.el ends here
413