1;;; po-compat.el --- basic support of PO translation files -*- coding: latin-1; -*-
2
3;; Copyright (C) 1995-1999, 2000-2002 Free Software Foundation, Inc.
4
5;; Authors: Fran�ois Pinard <pinard@iro.umontreal.ca>,
6;;          Greg McGary <gkm@magilla.cichlid.com>,
7;;          Bruno Haible <bruno@clisp.org>.
8;; Keywords: i18n, files
9
10;; This file is part of GNU gettext.
11
12;; GNU gettext is free software; you can redistribute it and/or modify
13;; it under the terms of the GNU General Public License as published by
14;; the Free Software Foundation; either version 2, or (at your option)
15;; any later version.
16
17;; GNU gettext is distributed in the hope that it will be useful,
18;; but WITHOUT ANY WARRANTY; without even the implied warranty of
19;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20;; GNU General Public License for more details.
21
22;; You should have received a copy of the GNU General Public License
23;; along with GNU Emacs; see the file COPYING.  If not, write to the
24;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
25;; Boston, MA 02111-1307, USA.
26
27;;; Commentary:
28
29;; Emacs 21.2 and newer already contain this file, under the name po.el,
30;; and without portability hassles.
31
32;; This package makes sure visiting PO files decodes them correctly,
33;; according to the Charset= header in the PO file.  For more support
34;; for editing PO files, see po-mode.el.
35
36;;; Code:
37
38;;; Emacs portability matters.
39
40;; Identify which Emacs variety is being used.
41;; This file supports:
42;;   - XEmacs (version 19 and above) -> po-XEMACS = t,
43;;   - GNU Emacs (version 20 and above) -> po-EMACS20 = t,
44;;   - GNU Emacs (version 19) -> no flag.
45(eval-and-compile
46  (cond ((string-match "XEmacs\\|Lucid" emacs-version)
47	 (setq po-EMACS20 nil po-XEMACS t))
48	((and (string-lessp "19" emacs-version) (featurep 'faces))
49	 (setq po-EMACS20 t po-XEMACS nil))
50	(t (setq po-EMACS20 nil po-XEMACS nil))))
51
52;; Handle missing 'with-temp-buffer' function.
53(eval-and-compile
54  (if (fboundp 'with-temp-buffer)
55      (fset 'po-with-temp-buffer (symbol-function 'with-temp-buffer))
56
57    (defmacro po-with-temp-buffer (&rest forms)
58      "Create a temporary buffer, and evaluate FORMS there like 'progn'."
59      (let ((curr-buffer (make-symbol "curr-buffer"))
60	    (temp-buffer (make-symbol "temp-buffer")))
61	`(let ((,curr-buffer (current-buffer))
62	       (,temp-buffer (get-buffer-create
63			      (generate-new-buffer-name " *po-temp*"))))
64	   (unwind-protect
65	       (progn
66		 (set-buffer ,temp-buffer)
67		 ,@forms)
68	     (set-buffer ,curr-buffer)
69	     (and (buffer-name ,temp-buffer)
70		  (kill-buffer ,temp-buffer))))))))
71
72(defconst po-content-type-charset-alist
73  '(; Note: Emacs 21 doesn't support all encodings, thus the missing entries.
74    ("ASCII" . undecided)
75    ("ANSI_X3.4-1968" . undecided)
76    ("US-ASCII" . undecided)
77    ("ISO-8859-1" . iso-8859-1)
78    ("ISO_8859-1" . iso-8859-1)
79    ("ISO-8859-2" . iso-8859-2)
80    ("ISO_8859-2" . iso-8859-2)
81    ("ISO-8859-3" . iso-8859-3)
82    ("ISO_8859-3" . iso-8859-3)
83    ("ISO-8859-4" . iso-8859-4)
84    ("ISO_8859-4" . iso-8859-4)
85    ("ISO-8859-5" . iso-8859-5)
86    ("ISO_8859-5" . iso-8859-5)
87    ;("ISO-8859-6" . ??)
88    ;("ISO_8859-6" . ??)
89    ("ISO-8859-7" . iso-8859-7)
90    ("ISO_8859-7" . iso-8859-7)
91    ("ISO-8859-8" . iso-8859-8)
92    ("ISO_8859-8" . iso-8859-8)
93    ("ISO-8859-9" . iso-8859-9)
94    ("ISO_8859-9" . iso-8859-9)
95    ;("ISO-8859-13" . ??)
96    ;("ISO_8859-13" . ??)
97    ;("ISO-8859-14" . ??)
98    ;("ISO_8859-14" . ??)
99    ("ISO-8859-15" . iso-8859-15) ; requires Emacs 21
100    ("ISO_8859-15" . iso-8859-15) ; requires Emacs 21
101    ("KOI8-R" . koi8-r)
102    ;("KOI8-U" . ??)
103    ;("KOI8-T" . ??)
104    ("CP437" . cp437) ; requires Emacs 20
105    ("CP775" . cp775) ; requires Emacs 20
106    ("CP850" . cp850) ; requires Emacs 20
107    ("CP852" . cp852) ; requires Emacs 20
108    ("CP855" . cp855) ; requires Emacs 20
109    ;("CP856" . ??)
110    ("CP857" . cp857) ; requires Emacs 20
111    ("CP861" . cp861) ; requires Emacs 20
112    ("CP862" . cp862) ; requires Emacs 20
113    ("CP864" . cp864) ; requires Emacs 20
114    ("CP865" . cp865) ; requires Emacs 20
115    ("CP866" . cp866) ; requires Emacs 21
116    ("CP869" . cp869) ; requires Emacs 20
117    ;("CP874" . ??)
118    ;("CP922" . ??)
119    ;("CP932" . ??)
120    ;("CP943" . ??)
121    ;("CP949" . ??)
122    ;("CP950" . ??)
123    ;("CP1046" . ??)
124    ;("CP1124" . ??)
125    ;("CP1129" . ??)
126    ("CP1250" . cp1250) ; requires Emacs 20
127    ("CP1251" . cp1251) ; requires Emacs 20
128    ("CP1252" . iso-8859-1) ; approximation
129    ("CP1253" . cp1253) ; requires Emacs 20
130    ("CP1254" . iso-8859-9) ; approximation
131    ("CP1255" . iso-8859-8) ; approximation
132    ;("CP1256" . ??)
133    ("CP1257" . cp1257) ; requires Emacs 20
134    ("GB2312" . cn-gb-2312)  ; also named 'gb2312' in XEmacs 21 or Emacs 21
135                             ; also named 'euc-cn' in Emacs 20 or Emacs 21
136    ("EUC-JP" . euc-jp)
137    ("EUC-KR" . euc-kr)
138    ;("EUC-TW" . ??)
139    ("BIG5" . big5)
140    ;("BIG5-HKSCS" . ??)
141    ;("GBK" . ??)
142    ;("GB18030" . ??)
143    ("SHIFT_JIS" . shift_jis)
144    ;("JOHAB" . ??)
145    ("TIS-620" . tis-620)    ; requires Emacs 20 or Emacs 21
146    ("VISCII" . viscii)      ; requires Emacs 20 or Emacs 21
147    ;("GEORGIAN-PS" . ??)
148    ("UTF-8" . utf-8)        ; requires Mule-UCS in Emacs 20, or Emacs 21
149    )
150  "How to convert a GNU libc/libiconv canonical charset name as seen in
151Content-Type into a Mule coding system.")
152
153(defun po-find-charset (filename)
154  "Return PO file charset value."
155  (interactive)
156  (let ((charset-regexp
157	 "^\"Content-Type: text/plain;[ \t]*charset=\\(.*\\)\\\\n\"")
158	(short-read nil))
159    ;; Try the first 4096 bytes.  In case we cannot find the charset value
160    ;; within the first 4096 bytes (the PO file might start with a long
161    ;; comment) try the next 4096 bytes repeatedly until we'll know for sure
162    ;; we've checked the empty header entry entirely.
163    (while (not (or short-read (re-search-forward "^msgid" nil t)))
164      (save-excursion
165        (goto-char (point-max))
166	(let ((pair (insert-file-contents-literally filename nil
167						    (1- (point))
168						    (1- (+ (point) 4096)))))
169	  (setq short-read (< (nth 1 pair) 4096)))))
170    (cond ((re-search-forward charset-regexp nil t) (match-string 1))
171	  (short-read nil)
172	  ;; We've found the first msgid; maybe, only a part of the msgstr
173	  ;; value was loaded.  Load the next 1024 bytes; if charset still
174	  ;; isn't available, give up.
175	  (t (save-excursion
176	       (goto-char (point-max))
177	       (insert-file-contents-literally filename nil
178					       (1- (point))
179					       (1- (+ (point) 1024))))
180	     (if (re-search-forward charset-regexp nil t)
181		 (match-string 1))))))
182
183(eval-and-compile
184  (if po-EMACS20
185      (defun po-find-file-coding-system-guts (operation filename)
186	"\
187Return a Mule (DECODING . ENCODING) pair, according to PO file charset.
188Called through file-coding-system-alist, before the file is visited for real."
189	(and (eq operation 'insert-file-contents)
190	     (file-exists-p filename)
191	     (po-with-temp-buffer
192	      (let* ((coding-system-for-read 'no-conversion)
193		     (charset (or (po-find-charset filename) "ascii"))
194		     (charset-upper (upcase charset))
195		     (charset-lower (downcase charset))
196		     (candidate
197		      (cdr (assoc charset-upper po-content-type-charset-alist)))
198		     (try-symbol (or candidate (intern-soft charset-lower)))
199		     (try-string
200		      (if try-symbol (symbol-name try-symbol) charset-lower)))
201		(list (cond ((and try-symbol (coding-system-p try-symbol))
202			     try-symbol)
203			    ((and po-EMACS20
204				  (string-match "\\`cp[1-9][0-9][0-9]?\\'"
205						try-string)
206				  (assoc (substring try-string 2)
207					 (cp-supported-codepages)))
208			     (codepage-setup (substring try-string 2))
209			     (intern try-string))
210			    (t
211			     'no-conversion))))))))
212
213  (if po-XEMACS
214      (defun po-find-file-coding-system-guts (operation filename)
215	"\
216Return a Mule (DECODING . ENCODING) pair, according to PO file charset.
217Called through file-coding-system-alist, before the file is visited for real."
218	(and (eq operation 'insert-file-contents)
219	     (file-exists-p filename)
220	     (po-with-temp-buffer
221	       (let ((coding-system-for-read 'no-conversion))
222                 (let* ((charset (or (po-find-charset filename)
223				     "ascii"))
224                        (charset-upper (upcase charset))
225                        (charset-lower (intern (downcase charset))))
226                   (list (or (cdr (assoc charset-upper
227                                         po-content-type-charset-alist))
228                             (if (memq charset-lower (coding-system-list))
229                                 charset-lower
230                               'no-conversion)))))))))
231
232  (if po-EMACS20
233      (defun po-find-file-coding-system (arg-list)
234	"\
235Return a Mule (DECODING . ENCODING) pair, according to PO file charset.
236Called through file-coding-system-alist, before the file is visited for real."
237	(po-find-file-coding-system-guts (car arg-list) (car (cdr arg-list)))))
238
239  (if po-XEMACS
240      (defun po-find-file-coding-system (operation filename)
241	"\
242Return a Mule (DECODING . ENCODING) pair, according to PO file charset.
243Called through file-coding-system-alist, before the file is visited for real."
244	(po-find-file-coding-system-guts operation filename)))
245
246  )
247
248(provide 'po-compat)
249
250;;; Testing this file:
251
252;; For each emacsimpl in { emacs, xemacs } do
253;;   For each pofile in {
254;;     cs.po           ; gettext/po/cs.el, charset=ISO-8859-2
255;;     cs-modified.po  ; gettext/po/cs.el, charset=ISO_8859-2
256;;     de.po           ; gettext/po/de.el, charset=UTF-8, if $emacsimpl = emacs
257;;   } do
258;;     Start $emacsimpl
259;;     M-x load-file  po-compat.el RET
260;;     C-x C-f  $pofile RET
261;;     Verify charset marker in status line ('2' = ISO-8859-2, 'u' = UTF-8).
262
263;;; po-compat.el ends here
264