1;;; po-compat.el --- basic support of PO translation files -*- coding: latin-1; -*- 2 3;; Copyright (C) 1995-1999, 2000-2002 Free Software Foundation, Inc. 4 5;; Authors: Fran�ois Pinard <pinard@iro.umontreal.ca>, 6;; Greg McGary <gkm@magilla.cichlid.com>, 7;; Bruno Haible <bruno@clisp.org>. 8;; Keywords: i18n, files 9 10;; This file is part of GNU gettext. 11 12;; GNU gettext is free software; you can redistribute it and/or modify 13;; it under the terms of the GNU General Public License as published by 14;; the Free Software Foundation; either version 2, or (at your option) 15;; any later version. 16 17;; GNU gettext is distributed in the hope that it will be useful, 18;; but WITHOUT ANY WARRANTY; without even the implied warranty of 19;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 20;; GNU General Public License for more details. 21 22;; You should have received a copy of the GNU General Public License 23;; along with GNU Emacs; see the file COPYING. If not, write to the 24;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, 25;; Boston, MA 02111-1307, USA. 26 27;;; Commentary: 28 29;; Emacs 21.2 and newer already contain this file, under the name po.el, 30;; and without portability hassles. 31 32;; This package makes sure visiting PO files decodes them correctly, 33;; according to the Charset= header in the PO file. For more support 34;; for editing PO files, see po-mode.el. 35 36;;; Code: 37 38;;; Emacs portability matters. 39 40;; Identify which Emacs variety is being used. 41;; This file supports: 42;; - XEmacs (version 19 and above) -> po-XEMACS = t, 43;; - GNU Emacs (version 20 and above) -> po-EMACS20 = t, 44;; - GNU Emacs (version 19) -> no flag. 45(eval-and-compile 46 (cond ((string-match "XEmacs\\|Lucid" emacs-version) 47 (setq po-EMACS20 nil po-XEMACS t)) 48 ((and (string-lessp "19" emacs-version) (featurep 'faces)) 49 (setq po-EMACS20 t po-XEMACS nil)) 50 (t (setq po-EMACS20 nil po-XEMACS nil)))) 51 52;; Handle missing 'with-temp-buffer' function. 53(eval-and-compile 54 (if (fboundp 'with-temp-buffer) 55 (fset 'po-with-temp-buffer (symbol-function 'with-temp-buffer)) 56 57 (defmacro po-with-temp-buffer (&rest forms) 58 "Create a temporary buffer, and evaluate FORMS there like 'progn'." 59 (let ((curr-buffer (make-symbol "curr-buffer")) 60 (temp-buffer (make-symbol "temp-buffer"))) 61 `(let ((,curr-buffer (current-buffer)) 62 (,temp-buffer (get-buffer-create 63 (generate-new-buffer-name " *po-temp*")))) 64 (unwind-protect 65 (progn 66 (set-buffer ,temp-buffer) 67 ,@forms) 68 (set-buffer ,curr-buffer) 69 (and (buffer-name ,temp-buffer) 70 (kill-buffer ,temp-buffer)))))))) 71 72(defconst po-content-type-charset-alist 73 '(; Note: Emacs 21 doesn't support all encodings, thus the missing entries. 74 ("ASCII" . undecided) 75 ("ANSI_X3.4-1968" . undecided) 76 ("US-ASCII" . undecided) 77 ("ISO-8859-1" . iso-8859-1) 78 ("ISO_8859-1" . iso-8859-1) 79 ("ISO-8859-2" . iso-8859-2) 80 ("ISO_8859-2" . iso-8859-2) 81 ("ISO-8859-3" . iso-8859-3) 82 ("ISO_8859-3" . iso-8859-3) 83 ("ISO-8859-4" . iso-8859-4) 84 ("ISO_8859-4" . iso-8859-4) 85 ("ISO-8859-5" . iso-8859-5) 86 ("ISO_8859-5" . iso-8859-5) 87 ;("ISO-8859-6" . ??) 88 ;("ISO_8859-6" . ??) 89 ("ISO-8859-7" . iso-8859-7) 90 ("ISO_8859-7" . iso-8859-7) 91 ("ISO-8859-8" . iso-8859-8) 92 ("ISO_8859-8" . iso-8859-8) 93 ("ISO-8859-9" . iso-8859-9) 94 ("ISO_8859-9" . iso-8859-9) 95 ;("ISO-8859-13" . ??) 96 ;("ISO_8859-13" . ??) 97 ;("ISO-8859-14" . ??) 98 ;("ISO_8859-14" . ??) 99 ("ISO-8859-15" . iso-8859-15) ; requires Emacs 21 100 ("ISO_8859-15" . iso-8859-15) ; requires Emacs 21 101 ("KOI8-R" . koi8-r) 102 ;("KOI8-U" . ??) 103 ;("KOI8-T" . ??) 104 ("CP437" . cp437) ; requires Emacs 20 105 ("CP775" . cp775) ; requires Emacs 20 106 ("CP850" . cp850) ; requires Emacs 20 107 ("CP852" . cp852) ; requires Emacs 20 108 ("CP855" . cp855) ; requires Emacs 20 109 ;("CP856" . ??) 110 ("CP857" . cp857) ; requires Emacs 20 111 ("CP861" . cp861) ; requires Emacs 20 112 ("CP862" . cp862) ; requires Emacs 20 113 ("CP864" . cp864) ; requires Emacs 20 114 ("CP865" . cp865) ; requires Emacs 20 115 ("CP866" . cp866) ; requires Emacs 21 116 ("CP869" . cp869) ; requires Emacs 20 117 ;("CP874" . ??) 118 ;("CP922" . ??) 119 ;("CP932" . ??) 120 ;("CP943" . ??) 121 ;("CP949" . ??) 122 ;("CP950" . ??) 123 ;("CP1046" . ??) 124 ;("CP1124" . ??) 125 ;("CP1129" . ??) 126 ("CP1250" . cp1250) ; requires Emacs 20 127 ("CP1251" . cp1251) ; requires Emacs 20 128 ("CP1252" . iso-8859-1) ; approximation 129 ("CP1253" . cp1253) ; requires Emacs 20 130 ("CP1254" . iso-8859-9) ; approximation 131 ("CP1255" . iso-8859-8) ; approximation 132 ;("CP1256" . ??) 133 ("CP1257" . cp1257) ; requires Emacs 20 134 ("GB2312" . cn-gb-2312) ; also named 'gb2312' in XEmacs 21 or Emacs 21 135 ; also named 'euc-cn' in Emacs 20 or Emacs 21 136 ("EUC-JP" . euc-jp) 137 ("EUC-KR" . euc-kr) 138 ;("EUC-TW" . ??) 139 ("BIG5" . big5) 140 ;("BIG5-HKSCS" . ??) 141 ;("GBK" . ??) 142 ;("GB18030" . ??) 143 ("SHIFT_JIS" . shift_jis) 144 ;("JOHAB" . ??) 145 ("TIS-620" . tis-620) ; requires Emacs 20 or Emacs 21 146 ("VISCII" . viscii) ; requires Emacs 20 or Emacs 21 147 ;("GEORGIAN-PS" . ??) 148 ("UTF-8" . utf-8) ; requires Mule-UCS in Emacs 20, or Emacs 21 149 ) 150 "How to convert a GNU libc/libiconv canonical charset name as seen in 151Content-Type into a Mule coding system.") 152 153(defun po-find-charset (filename) 154 "Return PO file charset value." 155 (interactive) 156 (let ((charset-regexp 157 "^\"Content-Type: text/plain;[ \t]*charset=\\(.*\\)\\\\n\"") 158 (short-read nil)) 159 ;; Try the first 4096 bytes. In case we cannot find the charset value 160 ;; within the first 4096 bytes (the PO file might start with a long 161 ;; comment) try the next 4096 bytes repeatedly until we'll know for sure 162 ;; we've checked the empty header entry entirely. 163 (while (not (or short-read (re-search-forward "^msgid" nil t))) 164 (save-excursion 165 (goto-char (point-max)) 166 (let ((pair (insert-file-contents-literally filename nil 167 (1- (point)) 168 (1- (+ (point) 4096))))) 169 (setq short-read (< (nth 1 pair) 4096))))) 170 (cond ((re-search-forward charset-regexp nil t) (match-string 1)) 171 (short-read nil) 172 ;; We've found the first msgid; maybe, only a part of the msgstr 173 ;; value was loaded. Load the next 1024 bytes; if charset still 174 ;; isn't available, give up. 175 (t (save-excursion 176 (goto-char (point-max)) 177 (insert-file-contents-literally filename nil 178 (1- (point)) 179 (1- (+ (point) 1024)))) 180 (if (re-search-forward charset-regexp nil t) 181 (match-string 1)))))) 182 183(eval-and-compile 184 (if po-EMACS20 185 (defun po-find-file-coding-system-guts (operation filename) 186 "\ 187Return a Mule (DECODING . ENCODING) pair, according to PO file charset. 188Called through file-coding-system-alist, before the file is visited for real." 189 (and (eq operation 'insert-file-contents) 190 (file-exists-p filename) 191 (po-with-temp-buffer 192 (let* ((coding-system-for-read 'no-conversion) 193 (charset (or (po-find-charset filename) "ascii")) 194 (charset-upper (upcase charset)) 195 (charset-lower (downcase charset)) 196 (candidate 197 (cdr (assoc charset-upper po-content-type-charset-alist))) 198 (try-symbol (or candidate (intern-soft charset-lower))) 199 (try-string 200 (if try-symbol (symbol-name try-symbol) charset-lower))) 201 (list (cond ((and try-symbol (coding-system-p try-symbol)) 202 try-symbol) 203 ((and po-EMACS20 204 (string-match "\\`cp[1-9][0-9][0-9]?\\'" 205 try-string) 206 (assoc (substring try-string 2) 207 (cp-supported-codepages))) 208 (codepage-setup (substring try-string 2)) 209 (intern try-string)) 210 (t 211 'no-conversion)))))))) 212 213 (if po-XEMACS 214 (defun po-find-file-coding-system-guts (operation filename) 215 "\ 216Return a Mule (DECODING . ENCODING) pair, according to PO file charset. 217Called through file-coding-system-alist, before the file is visited for real." 218 (and (eq operation 'insert-file-contents) 219 (file-exists-p filename) 220 (po-with-temp-buffer 221 (let ((coding-system-for-read 'no-conversion)) 222 (let* ((charset (or (po-find-charset filename) 223 "ascii")) 224 (charset-upper (upcase charset)) 225 (charset-lower (intern (downcase charset)))) 226 (list (or (cdr (assoc charset-upper 227 po-content-type-charset-alist)) 228 (if (memq charset-lower (coding-system-list)) 229 charset-lower 230 'no-conversion))))))))) 231 232 (if po-EMACS20 233 (defun po-find-file-coding-system (arg-list) 234 "\ 235Return a Mule (DECODING . ENCODING) pair, according to PO file charset. 236Called through file-coding-system-alist, before the file is visited for real." 237 (po-find-file-coding-system-guts (car arg-list) (car (cdr arg-list))))) 238 239 (if po-XEMACS 240 (defun po-find-file-coding-system (operation filename) 241 "\ 242Return a Mule (DECODING . ENCODING) pair, according to PO file charset. 243Called through file-coding-system-alist, before the file is visited for real." 244 (po-find-file-coding-system-guts operation filename))) 245 246 ) 247 248(provide 'po-compat) 249 250;;; Testing this file: 251 252;; For each emacsimpl in { emacs, xemacs } do 253;; For each pofile in { 254;; cs.po ; gettext/po/cs.el, charset=ISO-8859-2 255;; cs-modified.po ; gettext/po/cs.el, charset=ISO_8859-2 256;; de.po ; gettext/po/de.el, charset=UTF-8, if $emacsimpl = emacs 257;; } do 258;; Start $emacsimpl 259;; M-x load-file po-compat.el RET 260;; C-x C-f $pofile RET 261;; Verify charset marker in status line ('2' = ISO-8859-2, 'u' = UTF-8). 262 263;;; po-compat.el ends here 264