• Home
  • History
  • Annotate
  • Raw
  • Download
  • only in /macosx-10.9.5/emacs-92/emacs/lisp/international/

Lines Matching +defs:mule +defs:utf

0 ;;; utf-8.el --- UTF-8 decoding/encoding support -*- coding: iso-2022-7bit -*-
32 ;; The coding-system `mule-utf-8' basically supports encoding/decoding
38 ;; mule-unicode-0100-24ff
39 ;; mule-unicode-2500-33ff
40 ;; mule-unicode-e000-ffff
48 ;; Fixme: note that reading and writing invalid utf-8 may not be
51 ;; Characters from other character sets can be encoded with mule-utf-8
53 ;; `utf-translation-table-for-encode'. Hash tables
54 ;; `utf-subst-table-for-decode' and `utf-subst-table-for-encode' are
60 ;; scalar | utf-8
69 (defvar ucs-mule-to-mule-unicode (make-char-table 'translation-table nil)
70 "Char table mapping characters to latin-iso8859-1 or mule-unicode-*.
73 translation-table named `utf-translation-table-for-encode'.")
75 (define-translation-table 'utf-translation-table-for-encode)
79 ;; space of mule-unicode. For Latin scripts this isn't very
83 (defvar utf-fragmentation-table (make-char-table 'translation-table nil)
84 "Char-table normally mapping non-Latin mule-unicode-* chars to iso-8859-*.
86 If `utf-fragment-on-decoding' is non-nil, this table populates the
87 translation-table named `utf-translation-table-for-decode'")
89 (defvar utf-defragmentation-table (make-char-table 'translation-table nil)
90 "Char-table for reverse mapping of `utf-fragmentation-table'.
92 If `utf-fragment-on-decoding' is non-nil and
94 translation-table named `utf-translation-table-for-encode'")
96 (define-translation-table 'utf-translation-table-for-decode)
99 (defvar ucs-mule-cjk-to-unicode (make-hash-table :test 'eq)
102 If `utf-translate-cjk-mode' is non-nil, this table populates the
103 translation-hash-table named `utf-subst-table-for-encode'.")
105 (define-translation-hash-table 'utf-subst-table-for-encode
106 ucs-mule-cjk-to-unicode)
108 (defvar ucs-unicode-to-mule-cjk (make-hash-table :test 'eq)
111 If `utf-translate-cjk-mode' is non-nil, this table populates the
112 translation-hash-table named `utf-subst-table-for-decode'.")
114 (define-translation-hash-table 'utf-subst-table-for-decode
115 ucs-unicode-to-mule-cjk)
119 (aset utf-fragmentation-table (car pair) (cdr pair))
120 (aset utf-defragmentation-table (cdr pair) (car pair)))
158 (defcustom utf-fragment-on-decoding nil
162 mule-unicode-0100-24ff. The iso8859 charsets take half as much space
172 (define-translation-table 'utf-translation-table-for-decode
173 utf-fragmentation-table)
175 ;; mule-utf-* encode characters in
176 ;; utf-fragmentation-table.
177 (unless (eq (get 'utf-translation-table-for-encode
179 ucs-mule-to-mule-unicode)
180 (define-translation-table 'utf-translation-table-for-encode
181 utf-defragmentation-table)))
182 (define-translation-table 'utf-translation-table-for-decode)
184 ;; mule-utf-* disabled for characters in
185 ;; utf-fragmentation-table.
186 (unless (eq (get 'utf-translation-table-for-encode
188 ucs-mule-to-mule-unicode)
189 (define-translation-table 'utf-translation-table-for-encode)))
193 :group 'mule)
196 (defconst utf-translate-cjk-charsets '(chinese-gb2312
201 "List of charsets supported by `utf-translate-cjk-mode'.")
203 (defvar utf-translate-cjk-lang-env nil
204 "Language environment in which tables for `utf-translate-cjk-mode' is loaded.
207 (defvar utf-translate-cjk-unicode-range)
209 ;; String generated from utf-translate-cjk-unicode-range. It is
211 (defvar utf-translate-cjk-unicode-range-string nil)
213 (defun utf-translate-cjk-set-unicode-range (range)
214 (setq utf-translate-cjk-unicode-range range)
215 (setq utf-translate-cjk-unicode-range-string
221 (make-char 'mule-unicode-0100-24ff
225 (make-char 'mule-unicode-2500-33ff
229 (make-char 'mule-unicode-e000-ffff
265 ;; utf-translate-cjk-mode.
266 (setq utf-translate-cjk-lang-env nil
267 ucs-mule-cjk-to-unicode (make-hash-table :test 'eq)
268 ucs-unicode-to-mule-cjk (make-hash-table :test 'eq)))
270 (defcustom utf-translate-cjk-unicode-range '((#x2e80 . #xd7a3)
272 "List of Unicode code ranges supported by `utf-translate-cjk-mode'.
275 `utf-translate-cjk-set-unicode-range'."
279 (utf-translate-cjk-set-unicode-range value))
280 :group 'mule)
282 ;; Return non-nil if CODE-POINT is in `utf-translate-cjk-unicode-range'.
283 (defsubst utf-translate-cjk-substitutable-p (code-point)
284 (let ((tail utf-translate-cjk-unicode-range)
293 (defun utf-translate-cjk-load-tables ()
294 "Load tables for `utf-translate-cjk-mode'."
297 (let ((redefined (< (hash-table-size ucs-mule-cjk-to-unicode) 43000)))
302 (setq ucs-mule-cjk-to-unicode
304 ucs-unicode-to-mule-cjk
339 (define-translation-hash-table 'utf-subst-table-for-decode
340 ucs-unicode-to-mule-cjk)
341 (define-translation-hash-table 'utf-subst-table-for-encode
342 ucs-mule-cjk-to-unicode)
343 (set-char-table-extra-slot (get 'utf-translation-table-for-encode
345 1 ucs-mule-cjk-to-unicode))
347 (setq utf-translate-cjk-lang-env current-language-environment)))
349 (defun utf-lookup-subst-table-for-decode (code-point)
350 (if (and utf-translate-cjk-mode
351 (not utf-translate-cjk-lang-env)
352 (utf-translate-cjk-substitutable-p code-point))
353 (utf-translate-cjk-load-tables))
355 (get 'utf-subst-table-for-decode 'translation-hash-table)))
358 (defun utf-lookup-subst-table-for-encode (char)
359 (if (and utf-translate-cjk-mode
360 (not utf-translate-cjk-lang-env)
361 (memq (char-charset char) utf-translate-cjk-charsets))
362 (utf-translate-cjk-load-tables))
364 (get 'utf-subst-table-for-encode 'translation-hash-table)))
366 (define-minor-mode utf-translate-cjk-mode
370 Enabling this allows the coding systems mule-utf-8,
371 mule-utf-16le and mule-utf-16be to encode characters in the charsets
384 `utf-translate-cjk-mode' to nil."
388 :group 'mule
390 (if utf-translate-cjk-mode
392 (define-translation-hash-table 'utf-subst-table-for-decode
393 ucs-unicode-to-mule-cjk)
394 (define-translation-hash-table 'utf-subst-table-for-encode
395 ucs-mule-cjk-to-unicode)
396 (set-char-table-extra-slot (get 'utf-translation-table-for-encode
398 1 ucs-mule-cjk-to-unicode))
399 (define-translation-hash-table 'utf-subst-table-for-decode
401 (define-translation-hash-table 'utf-subst-table-for-encode
403 (set-char-table-extra-slot (get 'utf-translation-table-for-encode
407 ;; Update safe-chars of mule-utf-* coding systems.
409 (if (string-match "^mule-utf" (symbol-name elt))
413 (dolist (charset utf-translate-cjk-charsets)
414 (unless (eq utf-translate-cjk-mode (memq charset safe-charsets))
416 (if utf-translate-cjk-mode
420 (aset safe-chars (make-char charset) utf-translate-cjk-mode)))
425 (define-ccl-program ccl-mule-utf-untrans
458 (define-ccl-program ccl-decode-mule-utf-8
460 ;; charset | bytes in utf-8 | bytes in emacs
468 ;; mule-unicode-0100-24ff | 2 | 4
471 ;; mule-unicode-0100-24ff | 3 | 4
473 ;; mule-unicode-2500-33ff | 3 | 4
474 ;; mule-unicode-e000-ffff | 3 | 4
490 ((call ccl-mule-utf-untrans)
498 ((call ccl-mule-utf-untrans)
512 ;; mule-unicode-0100-24ff (< 0800)
514 (lookup-integer utf-subst-table-for-decode r0 r1)
516 ((r0 = ,(charset-id 'mule-unicode-0100-24ff))
522 utf-translation-table-for-decode r0 r1)))
529 (lookup-integer utf-subst-table-for-decode r0 r1)
545 ((call ccl-mule-utf-untrans)
547 (call ccl-mule-utf-untrans)
561 ((call ccl-mule-utf-untrans)
563 (call ccl-mule-utf-untrans)
565 (call ccl-mule-utf-untrans)
571 ;; mule-unicode-0100-24ff (>= 0800)
573 (lookup-integer utf-subst-table-for-decode r0 r1)
575 ((r0 = ,(charset-id 'mule-unicode-0100-24ff))
581 utf-translation-table-for-decode r0 r1)))
587 ;; mule-unicode-2500-33ff
589 (lookup-integer utf-subst-table-for-decode r0 r1)
591 ((r0 = ,(charset-id 'mule-unicode-2500-33ff))
604 (lookup-integer utf-subst-table-for-decode r3 r1)
610 ((call ccl-mule-utf-untrans)
615 ;; mule-unicode-e000-ffff
618 (lookup-integer utf-subst-table-for-decode r0 r1)
620 ((r0 = ,(charset-id 'mule-unicode-e000-ffff))
632 ((call ccl-mule-utf-untrans)
634 (call ccl-mule-utf-untrans)
636 (call ccl-mule-utf-untrans)
646 ;; Fixme: allow lookup in utf-subst-table-for-decode.
653 ((call ccl-mule-utf-untrans)
655 (call ccl-mule-utf-untrans)
657 (call ccl-mule-utf-untrans)
659 (call ccl-mule-utf-untrans))
661 (call ccl-mule-utf-untrans))))
664 ((call ccl-mule-utf-untrans)
666 (call ccl-mule-utf-untrans)
668 (call ccl-mule-utf-untrans)
670 (call ccl-mule-utf-untrans)))
679 ((call ccl-mule-utf-untrans)
682 (call ccl-mule-utf-untrans)
685 (call ccl-mule-utf-untrans)
688 (call ccl-mule-utf-untrans))))))))))
692 mule-unicode-*, but see also `utf-fragmentation-table' and
693 `ucs-mule-cjk-to-unicode'.
697 (define-ccl-program ccl-mule-utf-8-encode-untrans
760 (define-ccl-program ccl-encode-mule-utf-8
770 (translate-character utf-translation-table-for-encode r0 r1)
776 ;; r1 scalar utf-8
785 (if (r0 == ,(charset-id 'mule-unicode-0100-24ff))
804 (if (r0 == ,(charset-id 'mule-unicode-2500-33ff))
815 (if (r0 == ,(charset-id 'mule-unicode-e000-ffff))
827 ;; r1 scalar utf-8
835 ;; r1 scalar utf-8
845 ((call ccl-mule-utf-8-encode-untrans)
849 (lookup-character utf-subst-table-for-encode r0 r1)
895 (defvar utf-8-ccl-regs (make-vector 8 0))
897 (defsubst utf-8-untranslated-to-ucs ()
900 (aset utf-8-ccl-regs 0 (or (char-after) 0))
901 (aset utf-8-ccl-regs 1 (or (char-after (1+ (point))) 0))
902 (aset utf-8-ccl-regs 2 (or (char-after (+ 2 (point))) 0))
903 (aset utf-8-ccl-regs 3 (or (char-after (+ 3 (point))) 0))
904 (ccl-execute 'ccl-untranslated-to-ucs utf-8-ccl-regs))
906 (defun utf-8-help-echo (window object position)
908 (get-char-property position 'untranslated-utf-8 object)))
917 ;; If utf-translate-cjk-mode is non-nil, this function is called with
921 (defsubst utf-8-compose (hash-table)
925 (utf-8-untranslated-to-ucs)
926 (let ((l (aref utf-8-ccl-regs 1))
930 (setq ch (gethash (aref utf-8-ccl-regs 0) hash-table)))
934 (setq ch (aref utf-8-ccl-regs 0))
936 'untranslated-utf-8 ch)
938 'help-echo 'utf-8-help-echo)
947 (defcustom utf-8-compose-scripts nil
948 "*Non-nil means compose various scripts on decoding utf-8 text."
949 :group 'mule
953 (defun utf-8-post-read-conversion (length)
954 "Compose untranslated utf-8 sequences into single characters.
955 If `utf-translate-cjk-mode' is non-nil, tries to translate CJK characters.
956 Also compose particular scripts if `utf-8-compose-scripts' is non-nil."
967 (when utf-translate-cjk-mode
968 (unless utf-translate-cjk-lang-env
969 ;; Check these characters in utf-translate-cjk-range.
972 (concat range utf-translate-cjk-unicode-range-string))
974 (utf-translate-cjk-load-tables)
976 (concat range utf-translate-cjk-unicode-range-string)))
977 (setq hash-table (get 'utf-subst-table-for-decode
983 (utf-8-compose hash-table)
993 (when (and utf-8-compose-scripts (> length 1))
1006 (defun utf-8-pre-write-conversion (beg end)
1007 "Prepare for `utf-translate-cjk-mode' to encode text between BEG and END.
1008 This is used as a post-read-conversion of utf-8 coding system."
1009 (if (and utf-translate-cjk-mode
1010 (not utf-translate-cjk-lang-env)
1016 (utf-translate-cjk-load-tables))
1020 'mule-utf-8 4 ?u
1025 ascii, latin-iso8859-1, mule-unicode-0100-24ff,
1026 mule-unicode-2500-33ff, mule-unicode-e000-ffff
1031 byte sequence is preserved on i/o for valid utf-8, but not necessarily
1032 for invalid utf-8.
1038 '(ccl-decode-mule-utf-8 . ccl-encode-mule-utf-8)
1044 mule-unicode-0100-24ff
1045 mule-unicode-2500-33ff
1046 mule-unicode-e000-ffff
1047 ,@(if utf-translate-cjk-mode
1048 utf-translate-cjk-charsets))
1049 (mime-charset . utf-8)
1050 (coding-category . coding-category-utf-8)
1052 (pre-write-conversion . utf-8-pre-write-conversion)
1053 (post-read-conversion . utf-8-post-read-conversion)
1054 (translation-table-for-encode . utf-translation-table-for-encode)
1057 utf-fragment-on-decoding
1058 utf-translate-cjk-mode)))
1060 (define-coding-system-alias 'utf-8 'mule-utf-8)
1065 ;;; (defun utf-8-compose-function (pos to pattern &optional string)
1074 ;;; (utf-8-compose)))
1081 ;;; . utf-8-compose-function))))
1084 ;;; utf-8.el ends here