1;;; mm-bodies.el --- Functions for decoding MIME things 2 3;; Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 4;; 2005, 2006, 2007 Free Software Foundation, Inc. 5 6;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org> 7;; MORIOKA Tomohiko <morioka@jaist.ac.jp> 8;; This file is part of GNU Emacs. 9 10;; GNU Emacs is free software; you can redistribute it and/or modify 11;; it under the terms of the GNU General Public License as published by 12;; the Free Software Foundation; either version 2, or (at your option) 13;; any later version. 14 15;; GNU Emacs is distributed in the hope that it will be useful, 16;; but WITHOUT ANY WARRANTY; without even the implied warranty of 17;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18;; GNU General Public License for more details. 19 20;; You should have received a copy of the GNU General Public License 21;; along with GNU Emacs; see the file COPYING. If not, write to the 22;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 23;; Boston, MA 02110-1301, USA. 24 25;;; Commentary: 26 27;;; Code: 28 29(eval-and-compile 30 (or (fboundp 'base64-decode-region) 31 (require 'base64))) 32 33(eval-when-compile 34 (defvar mm-uu-decode-function) 35 (defvar mm-uu-binhex-decode-function)) 36 37(require 'mm-util) 38(require 'rfc2047) 39(require 'mm-encode) 40 41;; 8bit treatment gets any char except: 0x32 - 0x7f, LF, TAB, BEL, 42;; BS, vertical TAB, form feed, and ^_ 43;; 44;; Note that CR is *not* included, as that would allow a non-paired CR 45;; in the body contrary to RFC 2822: 46;; 47;; - CR and LF MUST only occur together as CRLF; they MUST NOT 48;; appear independently in the body. 49 50(defvar mm-7bit-chars "\x20-\x7f\n\t\x7\x8\xb\xc\x1f") 51 52(defcustom mm-body-charset-encoding-alist 53 '((iso-2022-jp . 7bit) 54 (iso-2022-jp-2 . 7bit) 55 ;; We MUST encode UTF-16 because it can contain \0's which is 56 ;; known to break servers. 57 ;; Note: UTF-16 variants are invalid for text parts [RFC 2781], 58 ;; so this can't happen :-/. 59 ;; PPS: Yes, it can happen if the user specifies UTF-16 in the MML 60 ;; markup. - jh. 61 (utf-16 . base64) 62 (utf-16be . base64) 63 (utf-16le . base64)) 64 "Alist of MIME charsets to encodings. 65Valid encodings are `7bit', `8bit', `quoted-printable' and `base64'." 66 :type '(repeat (cons (symbol :tag "charset") 67 (choice :tag "encoding" 68 (const 7bit) 69 (const 8bit) 70 (const quoted-printable) 71 (const base64)))) 72 :group 'mime) 73 74(defun mm-encode-body (&optional charset) 75 "Encode a body. 76Should be called narrowed to the body that is to be encoded. 77If there is more than one non-ASCII MULE charset in the body, then the 78list of MULE charsets found is returned. 79If CHARSET is non-nil, it is used as the MIME charset to encode the body. 80If successful, the MIME charset is returned. 81If no encoding was done, nil is returned." 82 (if (not (mm-multibyte-p)) 83 ;; In the non-Mule case, we search for non-ASCII chars and 84 ;; return the value of `mail-parse-charset' if any are found. 85 (or charset 86 (save-excursion 87 (goto-char (point-min)) 88 (if (re-search-forward "[^\x0-\x7f]" nil t) 89 (or mail-parse-charset 90 (message-options-get 'mm-encody-body-charset) 91 (message-options-set 92 'mm-encody-body-charset 93 (mm-read-coding-system "Charset used in the article: "))) 94 ;; The logic in `mml-generate-mime-1' confirms that it's OK 95 ;; to return nil here. 96 nil))) 97 (save-excursion 98 (if charset 99 (progn 100 (mm-encode-coding-region (point-min) (point-max) 101 (mm-charset-to-coding-system charset)) 102 charset) 103 (goto-char (point-min)) 104 (let ((charsets (mm-find-mime-charset-region (point-min) (point-max) 105 mm-hack-charsets))) 106 (cond 107 ;; No encoding. 108 ((null charsets) 109 nil) 110 ;; Too many charsets. 111 ((> (length charsets) 1) 112 charsets) 113 ;; We encode. 114 (t 115 (prog1 116 (setq charset (car charsets)) 117 (mm-encode-coding-region (point-min) (point-max) 118 (mm-charset-to-coding-system charset)))) 119 )))))) 120 121(defun mm-long-lines-p (length) 122 "Say whether any of the lines in the buffer is longer than LENGTH." 123 (save-excursion 124 (goto-char (point-min)) 125 (end-of-line) 126 (while (and (not (eobp)) 127 (not (> (current-column) length))) 128 (forward-line 1) 129 (end-of-line)) 130 (and (> (current-column) length) 131 (current-column)))) 132 133(defvar message-posting-charset) 134 135(defun mm-body-encoding (charset &optional encoding) 136 "Do Content-Transfer-Encoding and return the encoding of the current buffer." 137 (when (stringp encoding) 138 (setq encoding (intern (downcase encoding)))) 139 (let ((bits (mm-body-7-or-8)) 140 (longp (mm-long-lines-p 1000))) 141 (require 'message) 142 (cond 143 ((and (not longp) 144 (not (and mm-use-ultra-safe-encoding 145 (or (save-excursion (re-search-forward " $" nil t)) 146 (save-excursion (re-search-forward "^From " nil t))))) 147 (eq bits '7bit)) 148 bits) 149 ((and (not mm-use-ultra-safe-encoding) 150 (not longp) 151 (not (cdr (assq charset mm-body-charset-encoding-alist))) 152 (or (eq t (cdr message-posting-charset)) 153 (memq charset (cdr message-posting-charset)) 154 (eq charset mail-parse-charset))) 155 bits) 156 (t 157 (let ((encoding (or encoding 158 (cdr (assq charset mm-body-charset-encoding-alist)) 159 (mm-qp-or-base64)))) 160 (when mm-use-ultra-safe-encoding 161 (setq encoding (mm-safer-encoding encoding))) 162 (mm-encode-content-transfer-encoding encoding "text/plain") 163 encoding))))) 164 165(defun mm-body-7-or-8 () 166 "Say whether the body is 7bit or 8bit." 167 (if (save-excursion 168 (goto-char (point-min)) 169 (skip-chars-forward mm-7bit-chars) 170 (eobp)) 171 '7bit 172 '8bit)) 173 174;;; 175;;; Functions for decoding 176;;; 177 178(eval-when-compile (defvar mm-uu-yenc-decode-function)) 179 180(defun mm-decode-content-transfer-encoding (encoding &optional type) 181 "Decodes buffer encoded with ENCODING, returning success status. 182If TYPE is `text/plain' CRLF->LF translation may occur." 183 (prog1 184 (condition-case error 185 (cond 186 ((eq encoding 'quoted-printable) 187 (quoted-printable-decode-region (point-min) (point-max)) 188 t) 189 ((eq encoding 'base64) 190 (base64-decode-region 191 (point-min) 192 ;; Some mailers insert whitespace 193 ;; junk at the end which 194 ;; base64-decode-region dislikes. 195 ;; Also remove possible junk which could 196 ;; have been added by mailing list software. 197 (save-excursion 198 (goto-char (point-min)) 199 (while (re-search-forward "^[\t ]*\r?\n" nil t) 200 (delete-region (match-beginning 0) (match-end 0))) 201 (goto-char (point-max)) 202 (when (re-search-backward "^[A-Za-z0-9+/]+=*[\t ]*$" nil t) 203 (forward-line)) 204 (point)))) 205 ((memq encoding '(7bit 8bit binary)) 206 ;; Do nothing. 207 t) 208 ((null encoding) 209 ;; Do nothing. 210 t) 211 ((memq encoding '(x-uuencode x-uue)) 212 (require 'mm-uu) 213 (funcall mm-uu-decode-function (point-min) (point-max)) 214 t) 215 ((eq encoding 'x-binhex) 216 (require 'mm-uu) 217 (funcall mm-uu-binhex-decode-function (point-min) (point-max)) 218 t) 219 ((eq encoding 'x-yenc) 220 (require 'mm-uu) 221 (funcall mm-uu-yenc-decode-function (point-min) (point-max)) 222 ) 223 ((functionp encoding) 224 (funcall encoding (point-min) (point-max)) 225 t) 226 (t 227 (message "Unknown encoding %s; defaulting to 8bit" encoding))) 228 (error 229 (message "Error while decoding: %s" error) 230 nil)) 231 (when (and 232 type 233 (memq encoding '(base64 x-uuencode x-uue x-binhex x-yenc)) 234 (string-match "\\`text/" type)) 235 (goto-char (point-min)) 236 (while (search-forward "\r\n" nil t) 237 (replace-match "\n" t t))))) 238 239(defun mm-decode-body (charset &optional encoding type) 240 "Decode the current article that has been encoded with ENCODING to CHARSET. 241ENCODING is a MIME content transfer encoding. 242CHARSET is the MIME charset with which to decode the data after transfer 243decoding. If it is nil, default to `mail-parse-charset'." 244 (when (stringp charset) 245 (setq charset (intern (downcase charset)))) 246 (when (or (not charset) 247 (eq 'gnus-all mail-parse-ignored-charsets) 248 (memq 'gnus-all mail-parse-ignored-charsets) 249 (memq charset mail-parse-ignored-charsets)) 250 (setq charset mail-parse-charset)) 251 (save-excursion 252 (when encoding 253 (mm-decode-content-transfer-encoding encoding type)) 254 (when (and (featurep 'mule) ;; Fixme: Wrong test for unibyte session. 255 (not (eq charset 'gnus-decoded))) 256 (let ((coding-system (mm-charset-to-coding-system 257 ;; Allow overwrite using 258 ;; `mm-charset-override-alist'. 259 charset nil t))) 260 (if (and (not coding-system) 261 (listp mail-parse-ignored-charsets) 262 (memq 'gnus-unknown mail-parse-ignored-charsets)) 263 (setq coding-system 264 (mm-charset-to-coding-system mail-parse-charset))) 265 (when (and charset coding-system 266 ;; buffer-file-coding-system 267 ;;Article buffer is nil coding system 268 ;;in XEmacs 269 (mm-multibyte-p) 270 (or (not (eq coding-system 'ascii)) 271 (setq coding-system mail-parse-charset))) 272 (mm-decode-coding-region (point-min) (point-max) 273 coding-system)) 274 (setq buffer-file-coding-system 275 (if (boundp 'last-coding-system-used) 276 (symbol-value 'last-coding-system-used) 277 coding-system)))))) 278 279(defun mm-decode-string (string charset) 280 "Decode STRING with CHARSET." 281 (when (stringp charset) 282 (setq charset (intern (downcase charset)))) 283 (when (or (not charset) 284 (eq 'gnus-all mail-parse-ignored-charsets) 285 (memq 'gnus-all mail-parse-ignored-charsets) 286 (memq charset mail-parse-ignored-charsets)) 287 (setq charset mail-parse-charset)) 288 (or 289 (when (featurep 'mule) 290 (let ((coding-system (mm-charset-to-coding-system 291 charset 292 ;; Allow overwrite using 293 ;; `mm-charset-override-alist'. 294 nil t))) 295 (if (and (not coding-system) 296 (listp mail-parse-ignored-charsets) 297 (memq 'gnus-unknown mail-parse-ignored-charsets)) 298 (setq coding-system 299 (mm-charset-to-coding-system mail-parse-charset))) 300 (when (and charset coding-system 301 (mm-multibyte-p) 302 (or (not (eq coding-system 'ascii)) 303 (setq coding-system mail-parse-charset))) 304 (mm-decode-coding-string string coding-system)))) 305 string)) 306 307(provide 'mm-bodies) 308 309;;; arch-tag: 41104bb6-4443-4ca9-8d5c-ff87ecf27d8d 310;;; mm-bodies.el ends here 311