1/* Basic multilingual character support.
2   Copyright (C) 2001, 2002, 2003, 2004, 2005,
3                 2006, 2007 Free Software Foundation, Inc.
4   Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
5     2005, 2006, 2007
6     National Institute of Advanced Industrial Science and Technology (AIST)
7     Registration Number H14PRO021
8
9This file is part of GNU Emacs.
10
11GNU Emacs is free software; you can redistribute it and/or modify
12it under the terms of the GNU General Public License as published by
13the Free Software Foundation; either version 2, or (at your option)
14any later version.
15
16GNU Emacs is distributed in the hope that it will be useful,
17but WITHOUT ANY WARRANTY; without even the implied warranty of
18MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19GNU General Public License for more details.
20
21You should have received a copy of the GNU General Public License
22along with GNU Emacs; see the file COPYING.  If not, write to
23the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
24Boston, MA 02110-1301, USA.  */
25
26/* At first, see the document in `charset.h' to understand the code in
27   this file.  */
28
29#ifdef emacs
30#include <config.h>
31#endif
32
33#include <stdio.h>
34
35#ifdef emacs
36
37#include <sys/types.h>
38#include "lisp.h"
39#include "buffer.h"
40#include "charset.h"
41#include "composite.h"
42#include "coding.h"
43#include "disptab.h"
44
45#else  /* not emacs */
46
47#include "mulelib.h"
48
49#endif /* emacs */
50
51Lisp_Object Qcharset, Qascii, Qeight_bit_control, Qeight_bit_graphic;
52Lisp_Object Qunknown;
53
54/* Declaration of special leading-codes.  */
55EMACS_INT leading_code_private_11; /* for private DIMENSION1 of 1-column */
56EMACS_INT leading_code_private_12; /* for private DIMENSION1 of 2-column */
57EMACS_INT leading_code_private_21; /* for private DIMENSION2 of 1-column */
58EMACS_INT leading_code_private_22; /* for private DIMENSION2 of 2-column */
59
60/* Declaration of special charsets.  The values are set by
61   Fsetup_special_charsets.  */
62int charset_latin_iso8859_1;	/* ISO8859-1 (Latin-1) */
63int charset_jisx0208_1978;	/* JISX0208.1978 (Japanese Kanji old set) */
64int charset_jisx0208;		/* JISX0208.1983 (Japanese Kanji) */
65int charset_katakana_jisx0201;	/* JISX0201.Kana (Japanese Katakana) */
66int charset_latin_jisx0201;	/* JISX0201.Roman (Japanese Roman) */
67int charset_big5_1;		/* Big5 Level 1 (Chinese Traditional) */
68int charset_big5_2;		/* Big5 Level 2 (Chinese Traditional) */
69int charset_mule_unicode_0100_24ff;
70int charset_mule_unicode_2500_33ff;
71int charset_mule_unicode_e000_ffff;
72
73Lisp_Object Qcharset_table;
74
75/* A char-table containing information of each character set.  */
76Lisp_Object Vcharset_table;
77
78/* A vector of charset symbol indexed by charset-id.  This is used
79   only for returning charset symbol from C functions.  */
80Lisp_Object Vcharset_symbol_table;
81
82/* A list of charset symbols ever defined.  */
83Lisp_Object Vcharset_list;
84
85/* Vector of translation table ever defined.
86   ID of a translation table is used to index this vector.  */
87Lisp_Object Vtranslation_table_vector;
88
89/* A char-table for characters which may invoke auto-filling.  */
90Lisp_Object Vauto_fill_chars;
91
92Lisp_Object Qauto_fill_chars;
93
94/* Tables used by macros BYTES_BY_CHAR_HEAD and WIDTH_BY_CHAR_HEAD.  */
95int bytes_by_char_head[256];
96int width_by_char_head[256];
97
98/* Mapping table from ISO2022's charset (specified by DIMENSION,
99   CHARS, and FINAL-CHAR) to Emacs' charset.  */
100int iso_charset_table[2][2][128];
101
102/* Variables used locally in the macro FETCH_MULTIBYTE_CHAR.  */
103unsigned char *_fetch_multibyte_char_p;
104int _fetch_multibyte_char_len;
105
106/* Offset to add to a non-ASCII value when inserting it.  */
107EMACS_INT nonascii_insert_offset;
108
109/* Translation table for converting non-ASCII unibyte characters
110   to multibyte codes, or nil.  */
111Lisp_Object Vnonascii_translation_table;
112
113/* List of all possible generic characters.  */
114Lisp_Object Vgeneric_character_list;
115
116
117void
118invalid_character (c)
119     int c;
120{
121  error ("Invalid character: %d, #o%o, #x%x", c, c, c);
122}
123
124/* Parse string STR of length LENGTH and fetch information of a
125   character at STR.  Set BYTES to the byte length the character
126   occupies, CHARSET, C1, C2 to proper values of the character. */
127
128#define SPLIT_MULTIBYTE_SEQ(str, length, bytes, charset, c1, c2)	     \
129  do {									     \
130    (c1) = *(str);							     \
131    (bytes) = BYTES_BY_CHAR_HEAD (c1);					     \
132    if ((bytes) == 1)							     \
133      (charset) = ASCII_BYTE_P (c1) ? CHARSET_ASCII : CHARSET_8_BIT_GRAPHIC; \
134    else if ((bytes) == 2)						     \
135      {									     \
136	if ((c1) == LEADING_CODE_8_BIT_CONTROL)				     \
137	  (charset) = CHARSET_8_BIT_CONTROL, (c1) = (str)[1] - 0x20;	     \
138	else								     \
139	  (charset) = (c1), (c1) = (str)[1] & 0x7F;			     \
140      }									     \
141    else if ((bytes) == 3)						     \
142      {									     \
143	if ((c1) < LEADING_CODE_PRIVATE_11)				     \
144	  (charset) = (c1), (c1) = (str)[1] & 0x7F, (c2) = (str)[2] & 0x7F;  \
145	else								     \
146	  (charset) = (str)[1], (c1) = (str)[2] & 0x7F;			     \
147      }									     \
148    else								     \
149      (charset) = (str)[1], (c1) = (str)[2] & 0x7F, (c2) = (str)[3] & 0x7F;  \
150  } while (0)
151
152/* 1 if CHARSET, C1, and C2 compose a valid character, else 0.
153   Note that this intentionally allows invalid components, such
154   as 0xA0 0xA0, because there exist many files that contain
155   such invalid byte sequences, especially in EUC-GB. */
156#define CHAR_COMPONENTS_VALID_P(charset, c1, c2)	\
157  ((charset) == CHARSET_ASCII				\
158   ? ((c1) >= 0 && (c1) <= 0x7F)			\
159   : ((charset) == CHARSET_8_BIT_CONTROL		\
160      ? ((c1) >= 0x80 && (c1) <= 0x9F)			\
161      : ((charset) == CHARSET_8_BIT_GRAPHIC		\
162	 ? ((c1) >= 0x80 && (c1) <= 0xFF)		\
163	 : (CHARSET_DIMENSION (charset) == 1		\
164	    ? ((c1) >= 0x20 && (c1) <= 0x7F)		\
165	    : ((c1) >= 0x20 && (c1) <= 0x7F		\
166	       && (c2) >= 0x20 && (c2) <= 0x7F)))))
167
168/* Store multi-byte form of the character C in STR.  The caller should
169   allocate at least 4-byte area at STR in advance.  Returns the
170   length of the multi-byte form.  If C is an invalid character code,
171   return -1.  */
172
173int
174char_to_string_1 (c, str)
175     int c;
176     unsigned char *str;
177{
178  unsigned char *p = str;
179
180  if (c & CHAR_MODIFIER_MASK)	/* This includes the case C is negative.  */
181    {
182      /* Multibyte character can't have a modifier bit.  */
183      if (! SINGLE_BYTE_CHAR_P ((c & ~CHAR_MODIFIER_MASK)))
184	return -1;
185
186      /* For Meta, Shift, and Control modifiers, we need special care.  */
187      if (c & CHAR_META)
188	{
189	  /* Move the meta bit to the right place for a string.  */
190	  c = (c & ~CHAR_META) | 0x80;
191	}
192      if (c & CHAR_SHIFT)
193	{
194	  /* Shift modifier is valid only with [A-Za-z].  */
195	  if ((c & 0377) >= 'A' && (c & 0377) <= 'Z')
196	    c &= ~CHAR_SHIFT;
197	  else if ((c & 0377) >= 'a' && (c & 0377) <= 'z')
198	    c = (c & ~CHAR_SHIFT) - ('a' - 'A');
199	}
200      if (c & CHAR_CTL)
201	{
202	  /* Simulate the code in lread.c.  */
203	  /* Allow `\C- ' and `\C-?'.  */
204	  if (c == (CHAR_CTL | ' '))
205	    c = 0;
206	  else if (c == (CHAR_CTL | '?'))
207	    c = 127;
208	  /* ASCII control chars are made from letters (both cases),
209	     as well as the non-letters within 0100...0137.  */
210	  else if ((c & 0137) >= 0101 && (c & 0137) <= 0132)
211	    c &= (037 | (~0177 & ~CHAR_CTL));
212	  else if ((c & 0177) >= 0100 && (c & 0177) <= 0137)
213	    c &= (037 | (~0177 & ~CHAR_CTL));
214	}
215
216      /* If C still has any modifier bits, just ignore it.  */
217      c &= ~CHAR_MODIFIER_MASK;
218    }
219
220  if (SINGLE_BYTE_CHAR_P (c))
221    {
222      if (ASCII_BYTE_P (c) || c >= 0xA0)
223	*p++ = c;
224      else
225	{
226	  *p++ = LEADING_CODE_8_BIT_CONTROL;
227	  *p++ = c + 0x20;
228	}
229    }
230  else if (CHAR_VALID_P (c, 0))
231    {
232      int charset, c1, c2;
233
234      SPLIT_CHAR (c, charset, c1, c2);
235
236      if (charset >= LEADING_CODE_EXT_11)
237	*p++ = (charset < LEADING_CODE_EXT_12
238		? LEADING_CODE_PRIVATE_11
239		: (charset < LEADING_CODE_EXT_21
240		   ? LEADING_CODE_PRIVATE_12
241		   : (charset < LEADING_CODE_EXT_22
242		      ? LEADING_CODE_PRIVATE_21
243		      : LEADING_CODE_PRIVATE_22)));
244      *p++ = charset;
245      if ((c1 > 0 && c1 < 32) || (c2 > 0 && c2 < 32))
246	return -1;
247      if (c1)
248	{
249	  *p++ = c1 | 0x80;
250	  if (c2 > 0)
251	    *p++ = c2 | 0x80;
252	}
253    }
254  else
255    return -1;
256
257  return (p - str);
258}
259
260
261/* Store multi-byte form of the character C in STR.  The caller should
262   allocate at least 4-byte area at STR in advance.  Returns the
263   length of the multi-byte form.  If C is an invalid character code,
264   signal an error.
265
266   Use macro `CHAR_STRING (C, STR)' instead of calling this function
267   directly if C can be an ASCII character.  */
268
269int
270char_to_string (c, str)
271     int c;
272     unsigned char *str;
273{
274  int len;
275  len = char_to_string_1 (c, str);
276  if (len == -1)
277    invalid_character (c);
278  return len;
279}
280
281
282/* Return the non-ASCII character corresponding to multi-byte form at
283   STR of length LEN.  If ACTUAL_LEN is not NULL, store the byte
284   length of the multibyte form in *ACTUAL_LEN.
285
286   Use macros STRING_CHAR or STRING_CHAR_AND_LENGTH instead of calling
287   this function directly if you want ot handle ASCII characters as
288   well.  */
289
290int
291string_to_char (str, len, actual_len)
292     const unsigned char *str;
293     int len, *actual_len;
294{
295  int c, bytes, charset, c1, c2;
296
297  SPLIT_MULTIBYTE_SEQ (str, len, bytes, charset, c1, c2);
298  c = MAKE_CHAR (charset, c1, c2);
299  if (actual_len)
300    *actual_len = bytes;
301  return c;
302}
303
304/* Return the length of the multi-byte form at string STR of length LEN.
305   Use the macro MULTIBYTE_FORM_LENGTH instead.  */
306int
307multibyte_form_length (str, len)
308     const unsigned char *str;
309     int len;
310{
311  int bytes;
312
313  PARSE_MULTIBYTE_SEQ (str, len, bytes);
314  return bytes;
315}
316
317/* Check multibyte form at string STR of length LEN and set variables
318   pointed by CHARSET, C1, and C2 to charset and position codes of the
319   character at STR, and return 0.  If there's no multibyte character,
320   return -1.  This should be used only in the macro SPLIT_STRING
321   which checks range of STR in advance.  */
322
323int
324split_string (str, len, charset, c1, c2)
325     const unsigned char *str;
326     unsigned char *c1, *c2;
327     int len, *charset;
328{
329  register int bytes, cs, code1, code2 = -1;
330
331  SPLIT_MULTIBYTE_SEQ (str, len, bytes, cs, code1, code2);
332  if (cs == CHARSET_ASCII)
333    return -1;
334  *charset = cs;
335  *c1 = code1;
336  *c2 = code2;
337  return 0;
338}
339
340/* Return 1 iff character C has valid printable glyph.
341   Use the macro CHAR_PRINTABLE_P instead.  */
342int
343char_printable_p (c)
344     int c;
345{
346  int charset, c1, c2;
347
348  if (ASCII_BYTE_P (c))
349    return 1;
350  else if (SINGLE_BYTE_CHAR_P (c))
351    return 0;
352  else if (c >= MAX_CHAR)
353    return 0;
354
355  SPLIT_CHAR (c, charset, c1, c2);
356  if (! CHARSET_DEFINED_P (charset))
357    return 0;
358  if (CHARSET_CHARS (charset) == 94
359      ? c1 <= 32 || c1 >= 127
360      : c1 < 32)
361    return 0;
362  if (CHARSET_DIMENSION (charset) == 2
363      && (CHARSET_CHARS (charset) == 94
364	  ? c2 <= 32 || c2 >= 127
365	  : c2 < 32))
366    return 0;
367  return 1;
368}
369
370/* Translate character C by translation table TABLE.  If C
371   is negative, translate a character specified by CHARSET, C1, and C2
372   (C1 and C2 are code points of the character).  If no translation is
373   found in TABLE, return C.  */
374int
375translate_char (table, c, charset, c1, c2)
376     Lisp_Object table;
377     int c, charset, c1, c2;
378{
379  Lisp_Object ch;
380  int alt_charset, alt_c1, alt_c2, dimension;
381
382  if (c < 0) c = MAKE_CHAR (charset, (c1 & 0x7F) , (c2 & 0x7F));
383  if (!CHAR_TABLE_P (table)
384      || (ch = Faref (table, make_number (c)), !NATNUMP (ch)))
385    return c;
386
387  SPLIT_CHAR (XFASTINT (ch), alt_charset, alt_c1, alt_c2);
388  dimension = CHARSET_DIMENSION (alt_charset);
389  if ((dimension == 1 && alt_c1 > 0) || (dimension == 2 && alt_c2 > 0))
390    /* CH is not a generic character, just return it.  */
391    return XFASTINT (ch);
392
393  /* Since CH is a generic character, we must return a specific
394     charater which has the same position codes as C from CH.  */
395  if (charset < 0)
396    SPLIT_CHAR (c, charset, c1, c2);
397  if (dimension != CHARSET_DIMENSION (charset))
398    /* We can't make such a character because of dimension mismatch.  */
399    return c;
400  return MAKE_CHAR (alt_charset, c1, c2);
401}
402
403/* Convert the unibyte character C to multibyte based on
404   Vnonascii_translation_table or nonascii_insert_offset.  If they can't
405   convert C to a valid multibyte character, convert it based on
406   DEFAULT_NONASCII_INSERT_OFFSET which makes C a Latin-1 character.  */
407
408int
409unibyte_char_to_multibyte (c)
410     int c;
411{
412  if (c < 0400 && c >= 0200)
413    {
414      int c_save = c;
415
416      if (! NILP (Vnonascii_translation_table))
417	{
418	  c = XINT (Faref (Vnonascii_translation_table, make_number (c)));
419	  if (c >= 0400 && ! char_valid_p (c, 0))
420	    c = c_save + DEFAULT_NONASCII_INSERT_OFFSET;
421	}
422      else if (c >= 0240 && nonascii_insert_offset > 0)
423	{
424	  c += nonascii_insert_offset;
425	  if (c < 0400 || ! char_valid_p (c, 0))
426	    c = c_save + DEFAULT_NONASCII_INSERT_OFFSET;
427	}
428      else if (c >= 0240)
429	c = c_save + DEFAULT_NONASCII_INSERT_OFFSET;
430    }
431  return c;
432}
433
434
435/* Convert the multibyte character C to unibyte 8-bit character based
436   on Vnonascii_translation_table or nonascii_insert_offset.  If
437   REV_TBL is non-nil, it should be a reverse table of
438   Vnonascii_translation_table, i.e. what given by:
439     Fchar_table_extra_slot (Vnonascii_translation_table, make_number (0))  */
440
441int
442multibyte_char_to_unibyte (c, rev_tbl)
443     int c;
444     Lisp_Object rev_tbl;
445{
446  if (!SINGLE_BYTE_CHAR_P (c))
447    {
448      int c_save = c;
449
450      if (! CHAR_TABLE_P (rev_tbl)
451	  && CHAR_TABLE_P (Vnonascii_translation_table))
452	rev_tbl = Fchar_table_extra_slot (Vnonascii_translation_table,
453					  make_number (0));
454      if (CHAR_TABLE_P (rev_tbl))
455	{
456	  Lisp_Object temp;
457	  temp = Faref (rev_tbl, make_number (c));
458	  if (INTEGERP (temp))
459	    c = XINT (temp);
460	  if (c >= 256)
461	    c = (c_save & 0177) + 0200;
462	}
463      else
464	{
465	  if (nonascii_insert_offset > 0)
466	    c -= nonascii_insert_offset;
467	  if (c < 128 || c >= 256)
468	    c = (c_save & 0177) + 0200;
469	}
470    }
471
472  return c;
473}
474
475
476/* Update the table Vcharset_table with the given arguments (see the
477   document of `define-charset' for the meaning of each argument).
478   Several other table contents are also updated.  The caller should
479   check the validity of CHARSET-ID and the remaining arguments in
480   advance.  */
481
482void
483update_charset_table (charset_id, dimension, chars, width, direction,
484		      iso_final_char, iso_graphic_plane,
485		      short_name, long_name, description)
486     Lisp_Object charset_id, dimension, chars, width, direction;
487     Lisp_Object iso_final_char, iso_graphic_plane;
488     Lisp_Object short_name, long_name, description;
489{
490  int charset = XINT (charset_id);
491  int bytes;
492  unsigned char leading_code_base, leading_code_ext;
493
494  if (NILP (CHARSET_TABLE_ENTRY (charset)))
495    CHARSET_TABLE_ENTRY (charset)
496      = Fmake_vector (make_number (CHARSET_MAX_IDX), Qnil);
497
498  if (NILP (long_name))
499    long_name = short_name;
500  if (NILP (description))
501    description = long_name;
502
503  /* Get byte length of multibyte form, base leading-code, and
504     extended leading-code of the charset.  See the comment under the
505     title "GENERAL NOTE on CHARACTER SET (CHARSET)" in charset.h.  */
506  bytes = XINT (dimension);
507  if (charset < MIN_CHARSET_PRIVATE_DIMENSION1)
508    {
509      /* Official charset, it doesn't have an extended leading-code.  */
510      if (charset != CHARSET_ASCII && charset != CHARSET_8_BIT_GRAPHIC)
511	bytes += 1; /* For a base leading-code.  */
512      leading_code_base = charset;
513      leading_code_ext = 0;
514    }
515  else
516    {
517      /* Private charset.  */
518      bytes += 2; /* For base and extended leading-codes.  */
519      leading_code_base
520	= (charset < LEADING_CODE_EXT_12
521	   ? LEADING_CODE_PRIVATE_11
522	   : (charset < LEADING_CODE_EXT_21
523	      ? LEADING_CODE_PRIVATE_12
524	      : (charset < LEADING_CODE_EXT_22
525		 ? LEADING_CODE_PRIVATE_21
526		 : LEADING_CODE_PRIVATE_22)));
527      leading_code_ext = charset;
528      if (BYTES_BY_CHAR_HEAD (leading_code_base) != bytes)
529	error ("Invalid dimension for the charset-ID %d", charset);
530    }
531
532  CHARSET_TABLE_INFO (charset, CHARSET_ID_IDX) = charset_id;
533  CHARSET_TABLE_INFO (charset, CHARSET_BYTES_IDX) = make_number (bytes);
534  CHARSET_TABLE_INFO (charset, CHARSET_DIMENSION_IDX) = dimension;
535  CHARSET_TABLE_INFO (charset, CHARSET_CHARS_IDX) = chars;
536  CHARSET_TABLE_INFO (charset, CHARSET_WIDTH_IDX) = width;
537  CHARSET_TABLE_INFO (charset, CHARSET_DIRECTION_IDX) = direction;
538  CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_BASE_IDX)
539    = make_number (leading_code_base);
540  CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_EXT_IDX)
541    = make_number (leading_code_ext);
542  CHARSET_TABLE_INFO (charset, CHARSET_ISO_FINAL_CHAR_IDX) = iso_final_char;
543  CHARSET_TABLE_INFO (charset, CHARSET_ISO_GRAPHIC_PLANE_IDX)
544    = iso_graphic_plane;
545  CHARSET_TABLE_INFO (charset, CHARSET_SHORT_NAME_IDX) = short_name;
546  CHARSET_TABLE_INFO (charset, CHARSET_LONG_NAME_IDX) = long_name;
547  CHARSET_TABLE_INFO (charset, CHARSET_DESCRIPTION_IDX) = description;
548  CHARSET_TABLE_INFO (charset, CHARSET_PLIST_IDX) = Qnil;
549
550  {
551    /* If we have already defined a charset which has the same
552       DIMENSION, CHARS and ISO-FINAL-CHAR but the different
553       DIRECTION, we must update the entry REVERSE-CHARSET of both
554       charsets.  If there's no such charset, the value of the entry
555       is set to nil.  */
556    int i;
557
558    for (i = 0; i <= MAX_CHARSET; i++)
559      if (!NILP (CHARSET_TABLE_ENTRY (i)))
560	{
561	  if (CHARSET_DIMENSION (i) == XINT (dimension)
562	      && CHARSET_CHARS (i) == XINT (chars)
563	      && CHARSET_ISO_FINAL_CHAR (i) == XINT (iso_final_char)
564	      && CHARSET_DIRECTION (i) != XINT (direction))
565	    {
566	      CHARSET_TABLE_INFO (charset, CHARSET_REVERSE_CHARSET_IDX)
567		= make_number (i);
568	      CHARSET_TABLE_INFO (i, CHARSET_REVERSE_CHARSET_IDX) = charset_id;
569	      break;
570	    }
571	}
572    if (i > MAX_CHARSET)
573      /* No such a charset.  */
574      CHARSET_TABLE_INFO (charset, CHARSET_REVERSE_CHARSET_IDX)
575	= make_number (-1);
576  }
577
578  if (charset != CHARSET_ASCII && charset != CHARSET_8_BIT_GRAPHIC
579      && charset < MIN_CHARSET_PRIVATE_DIMENSION1)
580    {
581      bytes_by_char_head[leading_code_base] = bytes;
582      width_by_char_head[leading_code_base] = XINT (width);
583
584      /* Update table emacs_code_class.  */
585      emacs_code_class[charset] = (bytes == 2
586				   ? EMACS_leading_code_2
587				   : (bytes == 3
588				      ? EMACS_leading_code_3
589				      : EMACS_leading_code_4));
590    }
591
592  /* Update table iso_charset_table.  */
593  if (XINT (iso_final_char) >= 0
594      && ISO_CHARSET_TABLE (dimension, chars, iso_final_char) < 0)
595    ISO_CHARSET_TABLE (dimension, chars, iso_final_char) = charset;
596}
597
598#ifdef emacs
599
600/* Return charset id of CHARSET_SYMBOL, or return -1 if CHARSET_SYMBOL
601   is invalid.  */
602int
603get_charset_id (charset_symbol)
604     Lisp_Object charset_symbol;
605{
606  Lisp_Object val;
607  int charset;
608
609  /* This originally used a ?: operator, but reportedly the HP-UX
610     compiler version HP92453-01 A.10.32.22 miscompiles that.  */
611  if (SYMBOLP (charset_symbol)
612      && VECTORP (val = Fget (charset_symbol, Qcharset))
613      && CHARSET_VALID_P (charset =
614			  XINT (XVECTOR (val)->contents[CHARSET_ID_IDX])))
615    return charset;
616  else
617    return -1;
618}
619
620/* Return an identification number for a new private charset of
621   DIMENSION and WIDTH.  If there's no more room for the new charset,
622   return 0.  */
623Lisp_Object
624get_new_private_charset_id (dimension, width)
625     int dimension, width;
626{
627  int charset, from, to;
628
629  if (dimension == 1)
630    {
631      from = LEADING_CODE_EXT_11;
632      to = LEADING_CODE_EXT_21;
633    }
634  else
635    {
636      from = LEADING_CODE_EXT_21;
637      to = LEADING_CODE_EXT_MAX + 1;
638    }
639
640  for (charset = from; charset < to; charset++)
641    if (!CHARSET_DEFINED_P (charset)) break;
642
643  return make_number (charset < to ? charset : 0);
644}
645
646DEFUN ("define-charset", Fdefine_charset, Sdefine_charset, 3, 3, 0,
647       doc: /* Define CHARSET-ID as the identification number of CHARSET with INFO-VECTOR.
648If CHARSET-ID is nil, it is decided automatically, which means CHARSET is
649 treated as a private charset.
650INFO-VECTOR is a vector of the format:
651   [DIMENSION CHARS WIDTH DIRECTION ISO-FINAL-CHAR ISO-GRAPHIC-PLANE
652    SHORT-NAME LONG-NAME DESCRIPTION]
653The meanings of each elements is as follows:
654DIMENSION (integer) is the number of bytes to represent a character: 1 or 2.
655CHARS (integer) is the number of characters in a dimension: 94 or 96.
656WIDTH (integer) is the number of columns a character in the charset
657occupies on the screen: one of 0, 1, and 2.
658
659DIRECTION (integer) is the rendering direction of characters in the
660charset when rendering.  If 0, render from left to right, else
661render from right to left.
662
663ISO-FINAL-CHAR (character) is the final character of the
664corresponding ISO 2022 charset.
665It may be -1 if the charset is internal use only.
666
667ISO-GRAPHIC-PLANE (integer) is the graphic plane to be invoked
668while encoding to variants of ISO 2022 coding system, one of the
669following: 0/graphic-plane-left(GL), 1/graphic-plane-right(GR).
670It may be -1 if the charset is internal use only.
671
672SHORT-NAME (string) is the short name to refer to the charset.
673
674LONG-NAME (string) is the long name to refer to the charset.
675
676DESCRIPTION (string) is the description string of the charset.  */)
677       (charset_id, charset_symbol, info_vector)
678     Lisp_Object charset_id, charset_symbol, info_vector;
679{
680  Lisp_Object *vec;
681
682  if (!NILP (charset_id))
683    CHECK_NUMBER (charset_id);
684  CHECK_SYMBOL (charset_symbol);
685  CHECK_VECTOR (info_vector);
686
687  if (! NILP (charset_id))
688    {
689      if (! CHARSET_VALID_P (XINT (charset_id)))
690	error ("Invalid CHARSET: %d", XINT (charset_id));
691      else if (CHARSET_DEFINED_P (XINT (charset_id)))
692	error ("Already defined charset: %d", XINT (charset_id));
693    }
694
695  vec = XVECTOR (info_vector)->contents;
696  if (XVECTOR (info_vector)->size != 9
697      || !INTEGERP (vec[0]) || !(XINT (vec[0]) == 1 || XINT (vec[0]) == 2)
698      || !INTEGERP (vec[1]) || !(XINT (vec[1]) == 94 || XINT (vec[1]) == 96)
699      || !INTEGERP (vec[2]) || !(XINT (vec[2]) == 1 || XINT (vec[2]) == 2)
700      || !INTEGERP (vec[3]) || !(XINT (vec[3]) == 0 || XINT (vec[3]) == 1)
701      || !INTEGERP (vec[4])
702      || !(XINT (vec[4]) == -1 || (XINT (vec[4]) >= '0' && XINT (vec[4]) <= '~'))
703      || !INTEGERP (vec[5])
704      || !(XINT (vec[5]) == -1 || XINT (vec[5]) == 0 || XINT (vec[5]) == 1)
705      || !STRINGP (vec[6])
706      || !STRINGP (vec[7])
707      || !STRINGP (vec[8]))
708    error ("Invalid info-vector argument for defining charset %s",
709	   SDATA (SYMBOL_NAME (charset_symbol)));
710
711  if (NILP (charset_id))
712    {
713      charset_id = get_new_private_charset_id (XINT (vec[0]), XINT (vec[2]));
714      if (XINT (charset_id) == 0)
715	error ("There's no room for a new private charset %s",
716	       SDATA (SYMBOL_NAME (charset_symbol)));
717    }
718
719  update_charset_table (charset_id, vec[0], vec[1], vec[2], vec[3],
720			vec[4], vec[5], vec[6], vec[7], vec[8]);
721  Fput (charset_symbol, Qcharset, CHARSET_TABLE_ENTRY (XINT (charset_id)));
722  CHARSET_SYMBOL (XINT (charset_id)) = charset_symbol;
723  Vcharset_list = Fcons (charset_symbol, Vcharset_list);
724  Fupdate_coding_systems_internal ();
725  return Qnil;
726}
727
728DEFUN ("generic-character-list", Fgeneric_character_list,
729       Sgeneric_character_list, 0, 0, 0,
730       doc: /* Return a list of all possible generic characters.
731It includes a generic character for a charset not yet defined.  */)
732     ()
733{
734  return Vgeneric_character_list;
735}
736
737DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char,
738       Sget_unused_iso_final_char, 2, 2, 0,
739       doc: /* Return an unused ISO's final char for a charset of DIMENSION and CHARS.
740DIMENSION is the number of bytes to represent a character: 1 or 2.
741CHARS is the number of characters in a dimension: 94 or 96.
742
743This final char is for private use, thus the range is `0' (48) .. `?' (63).
744If there's no unused final char for the specified kind of charset,
745return nil.  */)
746     (dimension, chars)
747     Lisp_Object dimension, chars;
748{
749  int final_char;
750
751  CHECK_NUMBER (dimension);
752  CHECK_NUMBER (chars);
753  if (XINT (dimension) != 1 && XINT (dimension) != 2)
754    error ("Invalid charset dimension %d, it should be 1 or 2",
755	   XINT (dimension));
756  if (XINT (chars) != 94 && XINT (chars) != 96)
757    error ("Invalid charset chars %d, it should be 94 or 96",
758	   XINT (chars));
759  for (final_char = '0'; final_char <= '?'; final_char++)
760    {
761      if (ISO_CHARSET_TABLE (dimension, chars, make_number (final_char)) < 0)
762	break;
763    }
764  return (final_char <= '?' ? make_number (final_char) : Qnil);
765}
766
767DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset,
768       4, 4, 0,
769       doc: /* Declare an equivalent charset for ISO-2022 decoding.
770
771On decoding by an ISO-2022 base coding system, when a charset
772specified by DIMENSION, CHARS, and FINAL-CHAR is designated, behave as
773if CHARSET is designated instead.  */)
774     (dimension, chars, final_char, charset)
775     Lisp_Object dimension, chars, final_char, charset;
776{
777  int charset_id;
778
779  CHECK_NUMBER (dimension);
780  CHECK_NUMBER (chars);
781  CHECK_NUMBER (final_char);
782  CHECK_SYMBOL (charset);
783
784  if (XINT (dimension) != 1 && XINT (dimension) != 2)
785    error ("Invalid DIMENSION %d, it should be 1 or 2", XINT (dimension));
786  if (XINT (chars) != 94 && XINT (chars) != 96)
787    error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars));
788  if (XINT (final_char) < '0' || XFASTINT (final_char) > '~')
789    error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars));
790  if ((charset_id = get_charset_id (charset)) < 0)
791    error ("Invalid charset %s", SDATA (SYMBOL_NAME (charset)));
792
793  ISO_CHARSET_TABLE (dimension, chars, final_char) = charset_id;
794  return Qnil;
795}
796
797/* Return information about charsets in the text at PTR of NBYTES
798   bytes, which are NCHARS characters.  The value is:
799
800	0: Each character is represented by one byte.  This is always
801	   true for unibyte text.
802	1: No charsets other than ascii eight-bit-control,
803	   eight-bit-graphic, and latin-1 are found.
804	2: Otherwise.
805
806   In addition, if CHARSETS is nonzero, for each found charset N, set
807   CHARSETS[N] to 1.  For that, callers should allocate CHARSETS
808   (MAX_CHARSET + 1 elements) in advance.  It may lookup a translation
809   table TABLE if supplied.  For invalid charsets, set CHARSETS[1] to
810   1 (note that there's no charset whose ID is 1).  */
811
812int
813find_charset_in_text (ptr, nchars, nbytes, charsets, table)
814     const unsigned char *ptr;
815     int nchars, nbytes, *charsets;
816     Lisp_Object table;
817{
818  if (nchars == nbytes)
819    {
820      if (charsets && nbytes > 0)
821	{
822	  const unsigned char *endp = ptr + nbytes;
823	  int maskbits = 0;
824
825	  while (ptr < endp && maskbits != 7)
826	    {
827	      maskbits |= (*ptr < 0x80 ? 1 : *ptr < 0xA0 ? 2 : 4);
828	      ptr++;
829	    }
830
831	  if (maskbits & 1)
832	    charsets[CHARSET_ASCII] = 1;
833	  if (maskbits & 2)
834	    charsets[CHARSET_8_BIT_CONTROL] = 1;
835	  if (maskbits & 4)
836	    charsets[CHARSET_8_BIT_GRAPHIC] = 1;
837	}
838      return 0;
839    }
840  else
841    {
842      int return_val = 1;
843      int bytes, charset, c1, c2;
844
845      if (! CHAR_TABLE_P (table))
846	table = Qnil;
847
848      while (nchars-- > 0)
849	{
850	  SPLIT_MULTIBYTE_SEQ (ptr, len, bytes, charset, c1, c2);
851	  ptr += bytes;
852
853	  if (!CHARSET_DEFINED_P (charset))
854	    charset = 1;
855	  else if (! NILP (table))
856	    {
857	      int c = translate_char (table, -1, charset, c1, c2);
858	      if (c >= 0)
859		charset = CHAR_CHARSET (c);
860	    }
861
862	  if (return_val == 1
863	      && charset != CHARSET_ASCII
864	      && charset != CHARSET_8_BIT_CONTROL
865	      && charset != CHARSET_8_BIT_GRAPHIC
866	      && charset != charset_latin_iso8859_1)
867	    return_val = 2;
868
869	  if (charsets)
870	    charsets[charset] = 1;
871	  else if (return_val == 2)
872	    break;
873	}
874      return return_val;
875    }
876}
877
878DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region,
879       2, 3, 0,
880       doc: /* Return a list of charsets in the region between BEG and END.
881BEG and END are buffer positions.
882Optional arg TABLE if non-nil is a translation table to look up.
883
884If the region contains invalid multibyte characters,
885`unknown' is included in the returned list.
886
887If the current buffer is unibyte, the returned list may contain
888only `ascii', `eight-bit-control', and `eight-bit-graphic'.  */)
889     (beg, end, table)
890     Lisp_Object beg, end, table;
891{
892  int charsets[MAX_CHARSET + 1];
893  int from, from_byte, to, stop, stop_byte, i;
894  Lisp_Object val;
895
896  validate_region (&beg, &end);
897  from = XFASTINT (beg);
898  stop = to = XFASTINT (end);
899
900  if (from < GPT && GPT < to)
901    {
902      stop = GPT;
903      stop_byte = GPT_BYTE;
904    }
905  else
906    stop_byte = CHAR_TO_BYTE (stop);
907
908  from_byte = CHAR_TO_BYTE (from);
909
910  bzero (charsets, (MAX_CHARSET + 1) * sizeof (int));
911  while (1)
912    {
913      find_charset_in_text (BYTE_POS_ADDR (from_byte), stop - from,
914			    stop_byte - from_byte, charsets, table);
915      if (stop < to)
916	{
917	  from = stop, from_byte = stop_byte;
918	  stop = to, stop_byte = CHAR_TO_BYTE (stop);
919	}
920      else
921	break;
922    }
923
924  val = Qnil;
925  if (charsets[1])
926    val = Fcons (Qunknown, val);
927  for (i = MAX_CHARSET; i >= MIN_CHARSET_OFFICIAL_DIMENSION1; i--)
928    if (charsets[i])
929      val = Fcons (CHARSET_SYMBOL (i), val);
930  if (charsets[0])
931    val = Fcons (Qascii, val);
932  return val;
933}
934
935DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string,
936       1, 2, 0,
937       doc: /* Return a list of charsets in STR.
938Optional arg TABLE if non-nil is a translation table to look up.
939
940If the string contains invalid multibyte characters,
941`unknown' is included in the returned list.
942
943If STR is unibyte, the returned list may contain
944only `ascii', `eight-bit-control', and `eight-bit-graphic'.  */)
945     (str, table)
946     Lisp_Object str, table;
947{
948  int charsets[MAX_CHARSET + 1];
949  int i;
950  Lisp_Object val;
951
952  CHECK_STRING (str);
953
954  bzero (charsets, (MAX_CHARSET + 1) * sizeof (int));
955  find_charset_in_text (SDATA (str), SCHARS (str),
956			SBYTES (str), charsets, table);
957
958  val = Qnil;
959  if (charsets[1])
960    val = Fcons (Qunknown, val);
961  for (i = MAX_CHARSET; i >= MIN_CHARSET_OFFICIAL_DIMENSION1; i--)
962    if (charsets[i])
963      val = Fcons (CHARSET_SYMBOL (i), val);
964  if (charsets[0])
965    val = Fcons (Qascii, val);
966  return val;
967}
968
969
970DEFUN ("make-char-internal", Fmake_char_internal, Smake_char_internal, 1, 3, 0,
971       doc: /* Return a character made from arguments.
972Internal use only.  */)
973     (charset, code1, code2)
974     Lisp_Object charset, code1, code2;
975{
976  int charset_id, c1, c2;
977
978  CHECK_NUMBER (charset);
979  charset_id = XINT (charset);
980  if (!CHARSET_DEFINED_P (charset_id))
981    error ("Invalid charset ID: %d", XINT (charset));
982
983  if (NILP (code1))
984    c1 = 0;
985  else
986    {
987      CHECK_NUMBER (code1);
988      c1 = XINT (code1);
989    }
990  if (NILP (code2))
991    c2 = 0;
992  else
993    {
994      CHECK_NUMBER (code2);
995      c2 = XINT (code2);
996    }
997
998  if (charset_id == CHARSET_ASCII)
999    {
1000      if (c1 < 0 || c1 > 0x7F)
1001	goto invalid_code_posints;
1002      return make_number (c1);
1003    }
1004  else if (charset_id == CHARSET_8_BIT_CONTROL)
1005    {
1006      if (NILP (code1))
1007	c1 = 0x80;
1008      else if (c1 < 0x80 || c1 > 0x9F)
1009	goto invalid_code_posints;
1010      return make_number (c1);
1011    }
1012  else if (charset_id == CHARSET_8_BIT_GRAPHIC)
1013    {
1014      if (NILP (code1))
1015	c1 = 0xA0;
1016      else if (c1 < 0xA0 || c1 > 0xFF)
1017	goto invalid_code_posints;
1018      return make_number (c1);
1019    }
1020  else if (c1 < 0 || c1 > 0xFF || c2 < 0 || c2 > 0xFF)
1021    goto invalid_code_posints;
1022  c1 &= 0x7F;
1023  c2 &= 0x7F;
1024  if (c1 == 0
1025      ? c2 != 0
1026      : (c2 == 0
1027	 ? !CHAR_COMPONENTS_VALID_P (charset_id, c1, 0x20)
1028	 : !CHAR_COMPONENTS_VALID_P (charset_id, c1, c2)))
1029    goto invalid_code_posints;
1030  return make_number (MAKE_CHAR (charset_id, c1, c2));
1031
1032 invalid_code_posints:
1033  error ("Invalid code points for charset ID %d: %d %d", charset_id, c1, c2);
1034}
1035
1036DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
1037       doc: /* Return list of charset and one or two position-codes of CH.
1038If CH is invalid as a character code,
1039return a list of symbol `unknown' and CH.  */)
1040     (ch)
1041     Lisp_Object ch;
1042{
1043  int c, charset, c1, c2;
1044
1045  CHECK_NUMBER (ch);
1046  c = XFASTINT (ch);
1047  if (!CHAR_VALID_P (c, 1))
1048    return Fcons (Qunknown, Fcons (ch, Qnil));
1049  SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
1050  return (c2 >= 0
1051	  ? Fcons (CHARSET_SYMBOL (charset),
1052		   Fcons (make_number (c1), Fcons (make_number (c2), Qnil)))
1053	  : Fcons (CHARSET_SYMBOL (charset), Fcons (make_number (c1), Qnil)));
1054}
1055
1056DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 1, 0,
1057       doc: /* Return charset of CH.  */)
1058     (ch)
1059     Lisp_Object ch;
1060{
1061  CHECK_NUMBER (ch);
1062
1063  return CHARSET_SYMBOL (CHAR_CHARSET (XINT (ch)));
1064}
1065
1066DEFUN ("charset-after", Fcharset_after, Scharset_after, 0, 1, 0,
1067       doc: /* Return charset of a character in the current buffer at position POS.
1068If POS is nil, it defauls to the current point.
1069If POS is out of range, the value is nil.  */)
1070     (pos)
1071     Lisp_Object pos;
1072{
1073  Lisp_Object ch;
1074  int charset;
1075
1076  ch = Fchar_after (pos);
1077  if (! INTEGERP (ch))
1078    return ch;
1079  charset = CHAR_CHARSET (XINT (ch));
1080  return CHARSET_SYMBOL (charset);
1081}
1082
1083DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0,
1084       doc: /* Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.
1085
1086ISO 2022's designation sequence (escape sequence) distinguishes charsets
1087by their DIMENSION, CHARS, and FINAL-CHAR,
1088where as Emacs distinguishes them by charset symbol.
1089See the documentation of the function `charset-info' for the meanings of
1090DIMENSION, CHARS, and FINAL-CHAR.  */)
1091     (dimension, chars, final_char)
1092     Lisp_Object dimension, chars, final_char;
1093{
1094  int charset;
1095
1096  CHECK_NUMBER (dimension);
1097  CHECK_NUMBER (chars);
1098  CHECK_NUMBER (final_char);
1099
1100  if ((charset = ISO_CHARSET_TABLE (dimension, chars, final_char)) < 0)
1101    return Qnil;
1102  return CHARSET_SYMBOL (charset);
1103}
1104
1105/* If GENERICP is nonzero, return nonzero iff C is a valid normal or
1106   generic character.  If GENERICP is zero, return nonzero iff C is a
1107   valid normal character.  Do not call this function directly,
1108   instead use macro CHAR_VALID_P.  */
1109int
1110char_valid_p (c, genericp)
1111     int c, genericp;
1112{
1113  int charset, c1, c2;
1114
1115  if (c < 0 || c >= MAX_CHAR)
1116    return 0;
1117  if (SINGLE_BYTE_CHAR_P (c))
1118    return 1;
1119  SPLIT_CHAR (c, charset, c1, c2);
1120  if (genericp)
1121    {
1122      if (c1)
1123	{
1124	  if (c2 <= 0) c2 = 0x20;
1125	}
1126      else
1127	{
1128	  if (c2 <= 0) c1 = c2 = 0x20;
1129	}
1130    }
1131  return (CHARSET_DEFINED_P (charset)
1132	  && CHAR_COMPONENTS_VALID_P (charset, c1, c2));
1133}
1134
1135DEFUN ("char-valid-p", Fchar_valid_p, Schar_valid_p, 1, 2, 0,
1136       doc: /* Return t if OBJECT is a valid normal character.
1137If optional arg GENERICP is non-nil, also return t if OBJECT is
1138a valid generic character.  */)
1139     (object, genericp)
1140     Lisp_Object object, genericp;
1141{
1142  if (! NATNUMP (object))
1143    return Qnil;
1144  return (CHAR_VALID_P (XFASTINT (object), !NILP (genericp)) ? Qt : Qnil);
1145}
1146
1147DEFUN ("unibyte-char-to-multibyte", Funibyte_char_to_multibyte,
1148       Sunibyte_char_to_multibyte, 1, 1, 0,
1149       doc: /* Convert the unibyte character CH to multibyte character.
1150The conversion is done based on `nonascii-translation-table' (which see)
1151 or `nonascii-insert-offset' (which see).  */)
1152     (ch)
1153     Lisp_Object ch;
1154{
1155  int c;
1156
1157  CHECK_NUMBER (ch);
1158  c = XINT (ch);
1159  if (c < 0 || c >= 0400)
1160    error ("Invalid unibyte character: %d", c);
1161  c = unibyte_char_to_multibyte (c);
1162  if (c < 0)
1163    error ("Can't convert to multibyte character: %d", XINT (ch));
1164  return make_number (c);
1165}
1166
1167DEFUN ("multibyte-char-to-unibyte", Fmultibyte_char_to_unibyte,
1168       Smultibyte_char_to_unibyte, 1, 1, 0,
1169       doc: /* Convert the multibyte character CH to unibyte character.
1170The conversion is done based on `nonascii-translation-table' (which see)
1171 or `nonascii-insert-offset' (which see).  */)
1172     (ch)
1173     Lisp_Object ch;
1174{
1175  int c;
1176
1177  CHECK_NUMBER (ch);
1178  c = XINT (ch);
1179  if (! CHAR_VALID_P (c, 0))
1180    error ("Invalid multibyte character: %d", c);
1181  c = multibyte_char_to_unibyte (c, Qnil);
1182  if (c < 0)
1183    error ("Can't convert to unibyte character: %d", XINT (ch));
1184  return make_number (c);
1185}
1186
1187DEFUN ("char-bytes", Fchar_bytes, Schar_bytes, 1, 1, 0,
1188       doc: /* Return 1 regardless of the argument CH.  */)
1189     (ch)
1190     Lisp_Object ch;
1191{
1192  CHECK_NUMBER (ch);
1193  return make_number (1);
1194}
1195
1196/* Return how many bytes C will occupy in a multibyte buffer.
1197   Don't call this function directly, instead use macro CHAR_BYTES.  */
1198int
1199char_bytes (c)
1200     int c;
1201{
1202  int charset;
1203
1204  if (ASCII_BYTE_P (c) || (c & ~((1 << CHARACTERBITS) -1)))
1205    return 1;
1206  if (SINGLE_BYTE_CHAR_P (c) && c >= 0xA0)
1207    return 1;
1208
1209  charset = CHAR_CHARSET (c);
1210  return (CHARSET_DEFINED_P (charset) ? CHARSET_BYTES (charset) : 1);
1211}
1212
1213/* Return the width of character of which multi-byte form starts with
1214   C.  The width is measured by how many columns occupied on the
1215   screen when displayed in the current buffer.  */
1216
1217#define ONE_BYTE_CHAR_WIDTH(c)					     	\
1218  (c < 0x20							     	\
1219   ? (c == '\t'							     	\
1220      ? XFASTINT (current_buffer->tab_width)			     	\
1221      : (c == '\n' ? 0 : (NILP (current_buffer->ctl_arrow) ? 4 : 2)))	\
1222   : (c < 0x7f							     	\
1223      ? 1							     	\
1224      : (c == 0x7F						     	\
1225	 ? (NILP (current_buffer->ctl_arrow) ? 4 : 2)		     	\
1226	 : ((! NILP (current_buffer->enable_multibyte_characters)    	\
1227	     && BASE_LEADING_CODE_P (c))			     	\
1228	    ? WIDTH_BY_CHAR_HEAD (c)				     	\
1229	    : 4))))
1230
1231DEFUN ("char-width", Fchar_width, Schar_width, 1, 1, 0,
1232       doc: /* Return width of CH when displayed in the current buffer.
1233The width is measured by how many columns it occupies on the screen.
1234Tab is taken to occupy `tab-width' columns.  */)
1235     (ch)
1236     Lisp_Object ch;
1237{
1238  Lisp_Object val, disp;
1239  int c;
1240  struct Lisp_Char_Table *dp = buffer_display_table ();
1241
1242  CHECK_NUMBER (ch);
1243
1244  c = XINT (ch);
1245
1246  /* Get the way the display table would display it.  */
1247  disp = dp ? DISP_CHAR_VECTOR (dp, c) : Qnil;
1248
1249  if (VECTORP (disp))
1250    XSETINT (val, XVECTOR (disp)->size);
1251  else if (SINGLE_BYTE_CHAR_P (c))
1252    XSETINT (val, ONE_BYTE_CHAR_WIDTH (c));
1253  else
1254    {
1255      int charset = CHAR_CHARSET (c);
1256
1257      XSETFASTINT (val, CHARSET_WIDTH (charset));
1258    }
1259  return val;
1260}
1261
1262/* Return width of string STR of length LEN when displayed in the
1263   current buffer.  The width is measured by how many columns it
1264   occupies on the screen.  */
1265
1266int
1267strwidth (str, len)
1268     unsigned char *str;
1269     int len;
1270{
1271  return c_string_width (str, len, -1, NULL, NULL);
1272}
1273
1274/* Return width of string STR of length LEN when displayed in the
1275   current buffer.  The width is measured by how many columns it
1276   occupies on the screen.  If PRECISION > 0, return the width of
1277   longest substring that doesn't exceed PRECISION, and set number of
1278   characters and bytes of the substring in *NCHARS and *NBYTES
1279   respectively.  */
1280
1281int
1282c_string_width (str, len, precision, nchars, nbytes)
1283     const unsigned char *str;
1284     int len, precision, *nchars, *nbytes;
1285{
1286  int i = 0, i_byte = 0;
1287  int width = 0;
1288  int chars;
1289  struct Lisp_Char_Table *dp = buffer_display_table ();
1290
1291  while (i_byte < len)
1292    {
1293      int bytes, thiswidth;
1294      Lisp_Object val;
1295
1296      if (dp)
1297	{
1298	  int c = STRING_CHAR_AND_LENGTH (str + i_byte, len - i_byte, bytes);
1299
1300	  chars = 1;
1301	  val = DISP_CHAR_VECTOR (dp, c);
1302	  if (VECTORP (val))
1303	    thiswidth = XVECTOR (val)->size;
1304	  else
1305	    thiswidth = ONE_BYTE_CHAR_WIDTH (str[i_byte]);
1306	}
1307      else
1308	{
1309	  chars = 1;
1310	  PARSE_MULTIBYTE_SEQ (str + i_byte, len - i_byte, bytes);
1311	  thiswidth = ONE_BYTE_CHAR_WIDTH (str[i_byte]);
1312	}
1313
1314      if (precision > 0
1315	  && (width + thiswidth > precision))
1316	{
1317	  *nchars = i;
1318	  *nbytes = i_byte;
1319	  return width;
1320	}
1321      i++;
1322      i_byte += bytes;
1323      width += thiswidth;
1324  }
1325
1326  if (precision > 0)
1327    {
1328      *nchars = i;
1329      *nbytes = i_byte;
1330    }
1331
1332  return width;
1333}
1334
1335/* Return width of Lisp string STRING when displayed in the current
1336   buffer.  The width is measured by how many columns it occupies on
1337   the screen while paying attention to compositions.  If PRECISION >
1338   0, return the width of longest substring that doesn't exceed
1339   PRECISION, and set number of characters and bytes of the substring
1340   in *NCHARS and *NBYTES respectively.  */
1341
1342int
1343lisp_string_width (string, precision, nchars, nbytes)
1344     Lisp_Object string;
1345     int precision, *nchars, *nbytes;
1346{
1347  int len = SCHARS (string);
1348  int len_byte = SBYTES (string);
1349  /* This set multibyte to 0 even if STRING is multibyte when it
1350     contains only ascii and eight-bit-graphic, but that's
1351     intentional.  */
1352  int multibyte = len < len_byte;
1353  const unsigned char *str = SDATA (string);
1354  int i = 0, i_byte = 0;
1355  int width = 0;
1356  struct Lisp_Char_Table *dp = buffer_display_table ();
1357
1358  while (i < len)
1359    {
1360      int chars, bytes, thiswidth;
1361      Lisp_Object val;
1362      int cmp_id;
1363      int ignore, end;
1364
1365      if (find_composition (i, -1, &ignore, &end, &val, string)
1366	  && ((cmp_id = get_composition_id (i, i_byte, end - i, val, string))
1367	      >= 0))
1368	{
1369	  thiswidth = composition_table[cmp_id]->width;
1370	  chars = end - i;
1371	  bytes = string_char_to_byte (string, end) - i_byte;
1372	}
1373      else if (dp)
1374	{
1375	  int c;
1376
1377	  if (multibyte)
1378	    c = STRING_CHAR_AND_LENGTH (str + i_byte, len - i_byte, bytes);
1379	  else
1380	    c = str[i_byte], bytes = 1;
1381	  chars = 1;
1382	  val = DISP_CHAR_VECTOR (dp, c);
1383	  if (VECTORP (val))
1384	    thiswidth = XVECTOR (val)->size;
1385	  else
1386	    thiswidth = ONE_BYTE_CHAR_WIDTH (str[i_byte]);
1387	}
1388      else
1389	{
1390	  chars = 1;
1391	  if (multibyte)
1392	    PARSE_MULTIBYTE_SEQ (str + i_byte, len_byte - i_byte, bytes);
1393	  else
1394	    bytes = 1;
1395	  thiswidth = ONE_BYTE_CHAR_WIDTH (str[i_byte]);
1396	}
1397
1398      if (precision > 0
1399	  && (width + thiswidth > precision))
1400	{
1401	  *nchars = i;
1402	  *nbytes = i_byte;
1403	  return width;
1404	}
1405      i += chars;
1406      i_byte += bytes;
1407      width += thiswidth;
1408  }
1409
1410  if (precision > 0)
1411    {
1412      *nchars = i;
1413      *nbytes = i_byte;
1414    }
1415
1416  return width;
1417}
1418
1419DEFUN ("string-width", Fstring_width, Sstring_width, 1, 1, 0,
1420       doc: /* Return width of STRING when displayed in the current buffer.
1421Width is measured by how many columns it occupies on the screen.
1422When calculating width of a multibyte character in STRING,
1423only the base leading-code is considered; the validity of
1424the following bytes is not checked.  Tabs in STRING are always
1425taken to occupy `tab-width' columns.  */)
1426     (string)
1427     Lisp_Object string;
1428{
1429  Lisp_Object val;
1430
1431  CHECK_STRING (string);
1432  XSETFASTINT (val, lisp_string_width (string, -1, NULL, NULL));
1433  return val;
1434}
1435
1436DEFUN ("char-direction", Fchar_direction, Schar_direction, 1, 1, 0,
1437       doc: /* Return the direction of CH.
1438The returned value is 0 for left-to-right and 1 for right-to-left.  */)
1439     (ch)
1440     Lisp_Object ch;
1441{
1442  int charset;
1443
1444  CHECK_NUMBER (ch);
1445  charset = CHAR_CHARSET (XFASTINT (ch));
1446  if (!CHARSET_DEFINED_P (charset))
1447    invalid_character (XINT (ch));
1448  return CHARSET_TABLE_INFO (charset, CHARSET_DIRECTION_IDX);
1449}
1450
1451/* Return the number of characters in the NBYTES bytes at PTR.
1452   This works by looking at the contents and checking for multibyte sequences.
1453   However, if the current buffer has enable-multibyte-characters = nil,
1454   we treat each byte as a character.  */
1455
1456int
1457chars_in_text (ptr, nbytes)
1458     const unsigned char *ptr;
1459     int nbytes;
1460{
1461  /* current_buffer is null at early stages of Emacs initialization.  */
1462  if (current_buffer == 0
1463      || NILP (current_buffer->enable_multibyte_characters))
1464    return nbytes;
1465
1466  return multibyte_chars_in_text (ptr, nbytes);
1467}
1468
1469/* Return the number of characters in the NBYTES bytes at PTR.
1470   This works by looking at the contents and checking for multibyte sequences.
1471   It ignores enable-multibyte-characters.  */
1472
1473int
1474multibyte_chars_in_text (ptr, nbytes)
1475     const unsigned char *ptr;
1476     int nbytes;
1477{
1478  const unsigned char *endp;
1479  int chars, bytes;
1480
1481  endp = ptr + nbytes;
1482  chars = 0;
1483
1484  while (ptr < endp)
1485    {
1486      PARSE_MULTIBYTE_SEQ (ptr, endp - ptr, bytes);
1487      ptr += bytes;
1488      chars++;
1489    }
1490
1491  return chars;
1492}
1493
1494/* Parse unibyte text at STR of LEN bytes as multibyte text, and
1495   count the numbers of characters and bytes in it.  On counting
1496   bytes, pay attention to the fact that 8-bit characters in the range
1497   0x80..0x9F are represented by 2 bytes in multibyte text.  */
1498void
1499parse_str_as_multibyte (str, len, nchars, nbytes)
1500     const unsigned char *str;
1501     int len, *nchars, *nbytes;
1502{
1503  const unsigned char *endp = str + len;
1504  int n, chars = 0, bytes = 0;
1505
1506  while (str < endp)
1507    {
1508      if (UNIBYTE_STR_AS_MULTIBYTE_P (str, endp - str, n))
1509	str += n, bytes += n;
1510      else
1511	str++, bytes += 2;
1512      chars++;
1513    }
1514  *nchars = chars;
1515  *nbytes = bytes;
1516  return;
1517}
1518
1519/* Arrange unibyte text at STR of NBYTES bytes as multibyte text.
1520   It actually converts only 8-bit characters in the range 0x80..0x9F
1521   that don't contruct multibyte characters to multibyte forms.  If
1522   NCHARS is nonzero, set *NCHARS to the number of characters in the
1523   text.  It is assured that we can use LEN bytes at STR as a work
1524   area and that is enough.  Return the number of bytes of the
1525   resulting text.  */
1526
1527int
1528str_as_multibyte (str, len, nbytes, nchars)
1529     unsigned char *str;
1530     int len, nbytes, *nchars;
1531{
1532  unsigned char *p = str, *endp = str + nbytes;
1533  unsigned char *to;
1534  int chars = 0;
1535  int n;
1536
1537  while (p < endp && UNIBYTE_STR_AS_MULTIBYTE_P (p, endp - p, n))
1538    p += n, chars++;
1539  if (nchars)
1540    *nchars = chars;
1541  if (p == endp)
1542    return nbytes;
1543
1544  to = p;
1545  nbytes = endp - p;
1546  endp = str + len;
1547  safe_bcopy (p, endp - nbytes, nbytes);
1548  p = endp - nbytes;
1549  while (p < endp)
1550    {
1551      if (UNIBYTE_STR_AS_MULTIBYTE_P (p, endp - p, n))
1552	{
1553	  while (n--)
1554	    *to++ = *p++;
1555	}
1556      else
1557	{
1558	  *to++ = LEADING_CODE_8_BIT_CONTROL;
1559	  *to++ = *p++ + 0x20;
1560	}
1561      chars++;
1562    }
1563  if (nchars)
1564    *nchars = chars;
1565  return (to - str);
1566}
1567
1568/* Parse unibyte string at STR of LEN bytes, and return the number of
1569   bytes it may ocupy when converted to multibyte string by
1570   `str_to_multibyte'.  */
1571
1572int
1573parse_str_to_multibyte (str, len)
1574     unsigned char *str;
1575     int len;
1576{
1577  unsigned char *endp = str + len;
1578  int bytes;
1579
1580  for (bytes = 0; str < endp; str++)
1581    bytes += (*str < 0x80 || *str >= 0xA0) ? 1 : 2;
1582  return bytes;
1583}
1584
1585/* Convert unibyte text at STR of NBYTES bytes to multibyte text
1586   that contains the same single-byte characters.  It actually
1587   converts all 8-bit characters to multibyte forms.  It is assured
1588   that we can use LEN bytes at STR as a work area and that is
1589   enough.  */
1590
1591int
1592str_to_multibyte (str, len, bytes)
1593     unsigned char *str;
1594     int len, bytes;
1595{
1596  unsigned char *p = str, *endp = str + bytes;
1597  unsigned char *to;
1598
1599  while (p < endp && (*p < 0x80 || *p >= 0xA0)) p++;
1600  if (p == endp)
1601    return bytes;
1602  to = p;
1603  bytes = endp - p;
1604  endp = str + len;
1605  safe_bcopy (p, endp - bytes, bytes);
1606  p = endp - bytes;
1607  while (p < endp)
1608    {
1609      if (*p < 0x80 || *p >= 0xA0)
1610	*to++ = *p++;
1611      else
1612	*to++ = LEADING_CODE_8_BIT_CONTROL, *to++ = *p++ + 0x20;
1613    }
1614  return (to - str);
1615}
1616
1617/* Arrange multibyte text at STR of LEN bytes as a unibyte text.  It
1618   actually converts only 8-bit characters in the range 0x80..0x9F to
1619   unibyte forms.  */
1620
1621int
1622str_as_unibyte (str, bytes)
1623     unsigned char *str;
1624     int bytes;
1625{
1626  unsigned char *p = str, *endp = str + bytes;
1627  unsigned char *to = str;
1628
1629  while (p < endp && *p != LEADING_CODE_8_BIT_CONTROL) p++;
1630  to = p;
1631  while (p < endp)
1632    {
1633      if (*p == LEADING_CODE_8_BIT_CONTROL)
1634	*to++ = *(p + 1) - 0x20, p += 2;
1635      else
1636	*to++ = *p++;
1637    }
1638  return (to - str);
1639}
1640
1641
1642DEFUN ("string", Fstring, Sstring, 0, MANY, 0,
1643  doc: /* Concatenate all the argument characters and make the result a string.
1644usage: (string &rest CHARACTERS)  */)
1645     (n, args)
1646     int n;
1647     Lisp_Object *args;
1648{
1649  int i, bufsize;
1650  unsigned char *buf, *p;
1651  int c;
1652  int multibyte = 0;
1653  Lisp_Object ret;
1654  USE_SAFE_ALLOCA;
1655
1656  bufsize = MAX_MULTIBYTE_LENGTH * n;
1657  SAFE_ALLOCA (buf, unsigned char *, bufsize);
1658  p = buf;
1659
1660  for (i = 0; i < n; i++)
1661    {
1662      CHECK_NUMBER (args[i]);
1663      if (!multibyte && !SINGLE_BYTE_CHAR_P (XFASTINT (args[i])))
1664	multibyte = 1;
1665    }
1666
1667  for (i = 0; i < n; i++)
1668    {
1669      c = XINT (args[i]);
1670      if (multibyte)
1671	p += CHAR_STRING (c, p);
1672      else
1673	*p++ = c;
1674    }
1675
1676  ret = make_string_from_bytes (buf, n, p - buf);
1677  SAFE_FREE ();
1678
1679  return ret;
1680}
1681
1682#endif /* emacs */
1683
1684int
1685charset_id_internal (charset_name)
1686     char *charset_name;
1687{
1688  Lisp_Object val;
1689
1690  val= Fget (intern (charset_name), Qcharset);
1691  if (!VECTORP (val))
1692    error ("Charset %s is not defined", charset_name);
1693
1694  return (XINT (XVECTOR (val)->contents[0]));
1695}
1696
1697DEFUN ("setup-special-charsets", Fsetup_special_charsets,
1698       Ssetup_special_charsets, 0, 0, 0, doc: /* Internal use only.  */)
1699     ()
1700{
1701  charset_latin_iso8859_1 = charset_id_internal ("latin-iso8859-1");
1702  charset_jisx0208_1978 = charset_id_internal ("japanese-jisx0208-1978");
1703  charset_jisx0208 = charset_id_internal ("japanese-jisx0208");
1704  charset_katakana_jisx0201 = charset_id_internal ("katakana-jisx0201");
1705  charset_latin_jisx0201 = charset_id_internal ("latin-jisx0201");
1706  charset_big5_1 = charset_id_internal ("chinese-big5-1");
1707  charset_big5_2 = charset_id_internal ("chinese-big5-2");
1708  charset_mule_unicode_0100_24ff
1709    = charset_id_internal ("mule-unicode-0100-24ff");
1710  charset_mule_unicode_2500_33ff
1711    = charset_id_internal ("mule-unicode-2500-33ff");
1712  charset_mule_unicode_e000_ffff
1713    = charset_id_internal ("mule-unicode-e000-ffff");
1714  return Qnil;
1715}
1716
1717void
1718init_charset_once ()
1719{
1720  int i, j, k;
1721
1722  staticpro (&Vcharset_table);
1723  staticpro (&Vcharset_symbol_table);
1724  staticpro (&Vgeneric_character_list);
1725
1726  /* This has to be done here, before we call Fmake_char_table.  */
1727  Qcharset_table = intern ("charset-table");
1728  staticpro (&Qcharset_table);
1729
1730  /* Intern this now in case it isn't already done.
1731     Setting this variable twice is harmless.
1732     But don't staticpro it here--that is done in alloc.c.  */
1733  Qchar_table_extra_slots = intern ("char-table-extra-slots");
1734
1735  /* Now we are ready to set up this property, so we can
1736     create the charset table.  */
1737  Fput (Qcharset_table, Qchar_table_extra_slots, make_number (0));
1738  Vcharset_table = Fmake_char_table (Qcharset_table, Qnil);
1739
1740  Qunknown = intern ("unknown");
1741  staticpro (&Qunknown);
1742  Vcharset_symbol_table = Fmake_vector (make_number (MAX_CHARSET + 1),
1743					Qunknown);
1744
1745  /* Setup tables.  */
1746  for (i = 0; i < 2; i++)
1747    for (j = 0; j < 2; j++)
1748      for (k = 0; k < 128; k++)
1749	iso_charset_table [i][j][k] = -1;
1750
1751  for (i = 0; i < 256; i++)
1752    bytes_by_char_head[i] = 1;
1753  bytes_by_char_head[LEADING_CODE_PRIVATE_11] = 3;
1754  bytes_by_char_head[LEADING_CODE_PRIVATE_12] = 3;
1755  bytes_by_char_head[LEADING_CODE_PRIVATE_21] = 4;
1756  bytes_by_char_head[LEADING_CODE_PRIVATE_22] = 4;
1757
1758  for (i = 0; i < 128; i++)
1759    width_by_char_head[i] = 1;
1760  for (; i < 256; i++)
1761    width_by_char_head[i] = 4;
1762  width_by_char_head[LEADING_CODE_PRIVATE_11] = 1;
1763  width_by_char_head[LEADING_CODE_PRIVATE_12] = 2;
1764  width_by_char_head[LEADING_CODE_PRIVATE_21] = 1;
1765  width_by_char_head[LEADING_CODE_PRIVATE_22] = 2;
1766
1767  {
1768    Lisp_Object val;
1769
1770    val = Qnil;
1771    for (i = 0x81; i < 0x90; i++)
1772      val = Fcons (make_number ((i - 0x70) << 7), val);
1773    for (; i < 0x9A; i++)
1774      val = Fcons (make_number ((i - 0x8F) << 14), val);
1775    for (i = 0xA0; i < 0xF0; i++)
1776      val = Fcons (make_number ((i - 0x70) << 7), val);
1777    for (; i < 0xFF; i++)
1778      val = Fcons (make_number ((i - 0xE0) << 14), val);
1779    Vgeneric_character_list = Fnreverse (val);
1780  }
1781
1782  nonascii_insert_offset = 0;
1783  Vnonascii_translation_table = Qnil;
1784}
1785
1786#ifdef emacs
1787
1788void
1789syms_of_charset ()
1790{
1791  Qcharset = intern ("charset");
1792  staticpro (&Qcharset);
1793
1794  Qascii = intern ("ascii");
1795  staticpro (&Qascii);
1796
1797  Qeight_bit_control = intern ("eight-bit-control");
1798  staticpro (&Qeight_bit_control);
1799
1800  Qeight_bit_graphic = intern ("eight-bit-graphic");
1801  staticpro (&Qeight_bit_graphic);
1802
1803  /* Define special charsets ascii, eight-bit-control, and
1804     eight-bit-graphic.  */
1805  update_charset_table (make_number (CHARSET_ASCII),
1806			make_number (1), make_number (94),
1807			make_number (1),
1808			make_number (0),
1809			make_number ('B'),
1810			make_number (0),
1811			build_string ("ASCII"),
1812			Qnil,	/* same as above */
1813			build_string ("ASCII (ISO646 IRV)"));
1814  CHARSET_SYMBOL (CHARSET_ASCII) = Qascii;
1815  Fput (Qascii, Qcharset, CHARSET_TABLE_ENTRY (CHARSET_ASCII));
1816
1817  update_charset_table (make_number (CHARSET_8_BIT_CONTROL),
1818			make_number (1), make_number (96),
1819			make_number (4),
1820			make_number (0),
1821			make_number (-1),
1822			make_number (-1),
1823			build_string ("8-bit control code (0x80..0x9F)"),
1824			Qnil,	/* same as above */
1825			Qnil);	/* same as above */
1826  CHARSET_SYMBOL (CHARSET_8_BIT_CONTROL) = Qeight_bit_control;
1827  Fput (Qeight_bit_control, Qcharset,
1828	CHARSET_TABLE_ENTRY (CHARSET_8_BIT_CONTROL));
1829
1830  update_charset_table (make_number (CHARSET_8_BIT_GRAPHIC),
1831			make_number (1), make_number (96),
1832			make_number (4),
1833			make_number (0),
1834			make_number (-1),
1835			make_number (-1),
1836			build_string ("8-bit graphic char (0xA0..0xFF)"),
1837			Qnil,	/* same as above */
1838			Qnil);	/* same as above */
1839  CHARSET_SYMBOL (CHARSET_8_BIT_GRAPHIC) = Qeight_bit_graphic;
1840  Fput (Qeight_bit_graphic, Qcharset,
1841	CHARSET_TABLE_ENTRY (CHARSET_8_BIT_GRAPHIC));
1842
1843  Qauto_fill_chars = intern ("auto-fill-chars");
1844  staticpro (&Qauto_fill_chars);
1845  Fput (Qauto_fill_chars, Qchar_table_extra_slots, make_number (0));
1846
1847  defsubr (&Sdefine_charset);
1848  defsubr (&Sgeneric_character_list);
1849  defsubr (&Sget_unused_iso_final_char);
1850  defsubr (&Sdeclare_equiv_charset);
1851  defsubr (&Sfind_charset_region);
1852  defsubr (&Sfind_charset_string);
1853  defsubr (&Smake_char_internal);
1854  defsubr (&Ssplit_char);
1855  defsubr (&Schar_charset);
1856  defsubr (&Scharset_after);
1857  defsubr (&Siso_charset);
1858  defsubr (&Schar_valid_p);
1859  defsubr (&Sunibyte_char_to_multibyte);
1860  defsubr (&Smultibyte_char_to_unibyte);
1861  defsubr (&Schar_bytes);
1862  defsubr (&Schar_width);
1863  defsubr (&Sstring_width);
1864  defsubr (&Schar_direction);
1865  defsubr (&Sstring);
1866  defsubr (&Ssetup_special_charsets);
1867
1868  DEFVAR_LISP ("charset-list", &Vcharset_list,
1869	       doc: /* List of charsets ever defined.  */);
1870  Vcharset_list = Fcons (Qascii, Fcons (Qeight_bit_control,
1871					Fcons (Qeight_bit_graphic, Qnil)));
1872
1873  DEFVAR_LISP ("translation-table-vector",  &Vtranslation_table_vector,
1874	       doc: /* Vector of cons cell of a symbol and translation table ever defined.
1875An ID of a translation table is an index of this vector.  */);
1876  Vtranslation_table_vector = Fmake_vector (make_number (16), Qnil);
1877
1878  DEFVAR_INT ("leading-code-private-11", &leading_code_private_11,
1879	      doc: /* Leading-code of private TYPE9N charset of column-width 1.  */);
1880  leading_code_private_11 = LEADING_CODE_PRIVATE_11;
1881
1882  DEFVAR_INT ("leading-code-private-12", &leading_code_private_12,
1883	      doc: /* Leading-code of private TYPE9N charset of column-width 2.  */);
1884  leading_code_private_12 = LEADING_CODE_PRIVATE_12;
1885
1886  DEFVAR_INT ("leading-code-private-21", &leading_code_private_21,
1887	      doc: /* Leading-code of private TYPE9Nx9N charset of column-width 1.  */);
1888  leading_code_private_21 = LEADING_CODE_PRIVATE_21;
1889
1890  DEFVAR_INT ("leading-code-private-22", &leading_code_private_22,
1891	      doc: /* Leading-code of private TYPE9Nx9N charset of column-width 2.  */);
1892  leading_code_private_22 = LEADING_CODE_PRIVATE_22;
1893
1894  DEFVAR_INT ("nonascii-insert-offset", &nonascii_insert_offset,
1895	      doc: /* Offset for converting non-ASCII unibyte codes 0240...0377 to multibyte.
1896This is used for converting unibyte text to multibyte,
1897and for inserting character codes specified by number.
1898
1899This serves to convert a Latin-1 or similar 8-bit character code
1900to the corresponding Emacs multibyte character code.
1901Typically the value should be (- (make-char CHARSET 0) 128),
1902for your choice of character set.
1903If `nonascii-translation-table' is non-nil, it overrides this variable.  */);
1904  nonascii_insert_offset = 0;
1905
1906  DEFVAR_LISP ("nonascii-translation-table", &Vnonascii_translation_table,
1907	       doc: /* Translation table to convert non-ASCII unibyte codes to multibyte.
1908This is used for converting unibyte text to multibyte,
1909and for inserting character codes specified by number.
1910
1911Conversion is performed only when multibyte characters are enabled,
1912and it serves to convert a Latin-1 or similar 8-bit character code
1913to the corresponding Emacs character code.
1914
1915If this is nil, `nonascii-insert-offset' is used instead.
1916See also the docstring of `make-translation-table'.  */);
1917  Vnonascii_translation_table = Qnil;
1918
1919  DEFVAR_LISP ("auto-fill-chars", &Vauto_fill_chars,
1920	       doc: /* A char-table for characters which invoke auto-filling.
1921Such characters have value t in this table.  */);
1922  Vauto_fill_chars = Fmake_char_table (Qauto_fill_chars, Qnil);
1923  CHAR_TABLE_SET (Vauto_fill_chars, make_number (' '), Qt);
1924  CHAR_TABLE_SET (Vauto_fill_chars, make_number ('\n'), Qt);
1925}
1926
1927#endif /* emacs */
1928
1929/* arch-tag: 66a89b8d-4c28-47d3-9ca1-56f78440d69f
1930   (do not change this comment) */
1931