1/* GNU Emacs routines to deal with category tables.
2   Copyright (C) 1998, 2001, 2002, 2003, 2004, 2005, 2006, 2007
3     Free Software Foundation, Inc.
4   Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
5     2005, 2006, 2007
6     National Institute of Advanced Industrial Science and Technology (AIST)
7     Registration Number H14PRO021
8
9This file is part of GNU Emacs.
10
11GNU Emacs is free software; you can redistribute it and/or modify
12it under the terms of the GNU General Public License as published by
13the Free Software Foundation; either version 2, or (at your option)
14any later version.
15
16GNU Emacs is distributed in the hope that it will be useful,
17but WITHOUT ANY WARRANTY; without even the implied warranty of
18MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19GNU General Public License for more details.
20
21You should have received a copy of the GNU General Public License
22along with GNU Emacs; see the file COPYING.  If not, write to
23the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
24Boston, MA 02110-1301, USA.  */
25
26
27/* Here we handle three objects: category, category set, and category
28   table.  Read comments in the file category.h to understand them.  */
29
30#include <config.h>
31#include <ctype.h>
32#include "lisp.h"
33#include "buffer.h"
34#include "charset.h"
35#include "category.h"
36#include "keymap.h"
37
38/* The version number of the latest category table.  Each category
39   table has a unique version number.  It is assigned a new number
40   also when it is modified.  When a regular expression is compiled
41   into the struct re_pattern_buffer, the version number of the
42   category table (of the current buffer) at that moment is also
43   embedded in the structure.
44
45   For the moment, we are not using this feature.  */
46static int category_table_version;
47
48Lisp_Object Qcategory_table, Qcategoryp, Qcategorysetp, Qcategory_table_p;
49
50/* Variables to determine word boundary.  */
51Lisp_Object Vword_combining_categories, Vword_separating_categories;
52
53/* Temporary internal variable used in macro CHAR_HAS_CATEGORY.  */
54Lisp_Object _temp_category_set;
55
56
57/* Category set staff.  */
58
59DEFUN ("make-category-set", Fmake_category_set, Smake_category_set, 1, 1, 0,
60       doc: /* Return a newly created category-set which contains CATEGORIES.
61CATEGORIES is a string of category mnemonics.
62The value is a bool-vector which has t at the indices corresponding to
63those categories.  */)
64     (categories)
65     Lisp_Object categories;
66{
67  Lisp_Object val;
68  int len;
69
70  CHECK_STRING (categories);
71  val = MAKE_CATEGORY_SET;
72
73  if (STRING_MULTIBYTE (categories))
74    error ("Multibyte string in `make-category-set'");
75
76  len = SCHARS (categories);
77  while (--len >= 0)
78    {
79      Lisp_Object category;
80
81      XSETFASTINT (category, SREF (categories, len));
82      CHECK_CATEGORY (category);
83      SET_CATEGORY_SET (val, category, Qt);
84    }
85  return val;
86}
87
88
89/* Category staff.  */
90
91Lisp_Object check_category_table ();
92
93DEFUN ("define-category", Fdefine_category, Sdefine_category, 2, 3, 0,
94       doc: /* Define CATEGORY as a category which is described by DOCSTRING.
95CATEGORY should be an ASCII printing character in the range ` ' to `~'.
96DOCSTRING is the documentation string of the category.
97The category is defined only in category table TABLE, which defaults to
98the current buffer's category table.  */)
99     (category, docstring, table)
100     Lisp_Object category, docstring, table;
101{
102  CHECK_CATEGORY (category);
103  CHECK_STRING (docstring);
104  table = check_category_table (table);
105
106  if (!NILP (CATEGORY_DOCSTRING (table, XFASTINT (category))))
107    error ("Category `%c' is already defined", XFASTINT (category));
108  CATEGORY_DOCSTRING (table, XFASTINT (category)) = docstring;
109
110  return Qnil;
111}
112
113DEFUN ("category-docstring", Fcategory_docstring, Scategory_docstring, 1, 2, 0,
114       doc: /* Return the documentation string of CATEGORY, as defined in TABLE.
115TABLE should be a category table and defaults to the current buffer's
116category table.  */)
117     (category, table)
118     Lisp_Object category, table;
119{
120  CHECK_CATEGORY (category);
121  table = check_category_table (table);
122
123  return CATEGORY_DOCSTRING (table, XFASTINT (category));
124}
125
126DEFUN ("get-unused-category", Fget_unused_category, Sget_unused_category,
127       0, 1, 0,
128       doc: /* Return a category which is not yet defined in TABLE.
129If no category remains available, return nil.
130The optional argument TABLE specifies which category table to modify;
131it defaults to the current buffer's category table.  */)
132     (table)
133     Lisp_Object table;
134{
135  int i;
136
137  table = check_category_table (table);
138
139  for (i = ' '; i <= '~'; i++)
140    if (NILP (CATEGORY_DOCSTRING (table, i)))
141      return make_number (i);
142
143  return Qnil;
144}
145
146
147/* Category-table staff.  */
148
149DEFUN ("category-table-p", Fcategory_table_p, Scategory_table_p, 1, 1, 0,
150       doc: /* Return t if ARG is a category table.  */)
151     (arg)
152     Lisp_Object arg;
153{
154  if (CHAR_TABLE_P (arg)
155      && EQ (XCHAR_TABLE (arg)->purpose, Qcategory_table))
156    return Qt;
157  return Qnil;
158}
159
160/* If TABLE is nil, return the current category table.  If TABLE is
161   not nil, check the validity of TABLE as a category table.  If
162   valid, return TABLE itself, but if not valid, signal an error of
163   wrong-type-argument.  */
164
165Lisp_Object
166check_category_table (table)
167     Lisp_Object table;
168{
169  if (NILP (table))
170    return current_buffer->category_table;
171  CHECK_TYPE (!NILP (Fcategory_table_p (table)), Qcategory_table_p, table);
172  return table;
173}
174
175DEFUN ("category-table", Fcategory_table, Scategory_table, 0, 0, 0,
176       doc: /* Return the current category table.
177This is the one specified by the current buffer.  */)
178     ()
179{
180  return current_buffer->category_table;
181}
182
183DEFUN ("standard-category-table", Fstandard_category_table,
184   Sstandard_category_table, 0, 0, 0,
185       doc: /* Return the standard category table.
186This is the one used for new buffers.  */)
187     ()
188{
189  return Vstandard_category_table;
190}
191
192/* Return a copy of category table TABLE.  We can't simply use the
193   function copy-sequence because no contents should be shared between
194   the original and the copy.  This function is called recursively by
195   binding TABLE to a sub char table.  */
196
197Lisp_Object
198copy_category_table (table)
199     Lisp_Object table;
200{
201  Lisp_Object tmp;
202  int i, to;
203
204  if (!NILP (XCHAR_TABLE (table)->top))
205    {
206      /* TABLE is a top level char table.
207	 At first, make a copy of tree structure of the table.  */
208      table = Fcopy_sequence (table);
209
210      /* Then, copy elements for single byte characters one by one.  */
211      for (i = 0; i < CHAR_TABLE_SINGLE_BYTE_SLOTS; i++)
212	if (!NILP (tmp = XCHAR_TABLE (table)->contents[i]))
213	  XCHAR_TABLE (table)->contents[i] = Fcopy_sequence (tmp);
214      to = CHAR_TABLE_ORDINARY_SLOTS;
215
216      /* Also copy the first (and sole) extra slot.  It is a vector
217         containing docstring of each category.  */
218      Fset_char_table_extra_slot
219	(table, make_number (0),
220	 Fcopy_sequence (Fchar_table_extra_slot (table, make_number (0))));
221    }
222  else
223    {
224      i  = 32;
225      to = SUB_CHAR_TABLE_ORDINARY_SLOTS;
226    }
227
228  /* If the table has non-nil default value, copy it.  */
229  if (!NILP (tmp = XCHAR_TABLE (table)->defalt))
230    XCHAR_TABLE (table)->defalt = Fcopy_sequence (tmp);
231
232  /* At last, copy the remaining elements while paying attention to a
233     sub char table.  */
234  for (; i < to; i++)
235    if (!NILP (tmp = XCHAR_TABLE (table)->contents[i]))
236      XCHAR_TABLE (table)->contents[i]
237	= (SUB_CHAR_TABLE_P (tmp)
238	   ? copy_category_table (tmp) : Fcopy_sequence (tmp));
239
240  return table;
241}
242
243DEFUN ("copy-category-table", Fcopy_category_table, Scopy_category_table,
244       0, 1, 0,
245       doc: /* Construct a new category table and return it.
246It is a copy of the TABLE, which defaults to the standard category table.  */)
247     (table)
248     Lisp_Object table;
249{
250  if (!NILP (table))
251    check_category_table (table);
252  else
253    table = Vstandard_category_table;
254
255  return copy_category_table (table);
256}
257
258DEFUN ("make-category-table", Fmake_category_table, Smake_category_table,
259       0, 0, 0,
260       doc: /* Construct a new and empty category table and return it.  */)
261     ()
262{
263  Lisp_Object val;
264
265  val = Fmake_char_table (Qcategory_table, Qnil);
266  XCHAR_TABLE (val)->defalt = MAKE_CATEGORY_SET;
267  Fset_char_table_extra_slot (val, make_number (0),
268			      Fmake_vector (make_number (95), Qnil));
269  return val;
270}
271
272DEFUN ("set-category-table", Fset_category_table, Sset_category_table, 1, 1, 0,
273       doc: /* Specify TABLE as the category table for the current buffer.
274Return TABLE.  */)
275     (table)
276     Lisp_Object table;
277{
278  int idx;
279  table = check_category_table (table);
280  current_buffer->category_table = table;
281  /* Indicate that this buffer now has a specified category table.  */
282  idx = PER_BUFFER_VAR_IDX (category_table);
283  SET_PER_BUFFER_VALUE_P (current_buffer, idx, 1);
284  return table;
285}
286
287
288DEFUN ("char-category-set", Fchar_category_set, Schar_category_set, 1, 1, 0,
289       doc: /* Return the category set of CHAR.
290usage: (char-category-set CHAR)  */)
291     (ch)
292     Lisp_Object ch;
293{
294  CHECK_NUMBER (ch);
295  return CATEGORY_SET (XFASTINT (ch));
296}
297
298DEFUN ("category-set-mnemonics", Fcategory_set_mnemonics,
299       Scategory_set_mnemonics, 1, 1, 0,
300       doc: /* Return a string containing mnemonics of the categories in CATEGORY-SET.
301CATEGORY-SET is a bool-vector, and the categories \"in\" it are those
302that are indexes where t occurs in the bool-vector.
303The return value is a string containing those same categories.  */)
304     (category_set)
305     Lisp_Object category_set;
306{
307  int i, j;
308  char str[96];
309
310  CHECK_CATEGORY_SET (category_set);
311
312  j = 0;
313  for (i = 32; i < 127; i++)
314    if (CATEGORY_MEMBER (i, category_set))
315      str[j++] = i;
316  str[j] = '\0';
317
318  return build_string (str);
319}
320
321/* Modify all category sets stored under sub char-table TABLE so that
322   they contain (SET_VALUE is t) or don't contain (SET_VALUE is nil)
323   CATEGORY.  */
324
325void
326modify_lower_category_set (table, category, set_value)
327     Lisp_Object table, category, set_value;
328{
329  Lisp_Object val;
330  int i;
331
332  val = XCHAR_TABLE (table)->defalt;
333  if (!CATEGORY_SET_P (val))
334    val = MAKE_CATEGORY_SET;
335  SET_CATEGORY_SET (val, category, set_value);
336  XCHAR_TABLE (table)->defalt = val;
337
338  for (i = 32; i < SUB_CHAR_TABLE_ORDINARY_SLOTS; i++)
339    {
340      val = XCHAR_TABLE (table)->contents[i];
341
342      if (CATEGORY_SET_P (val))
343	SET_CATEGORY_SET (val, category, set_value);
344      else if (SUB_CHAR_TABLE_P (val))
345	modify_lower_category_set (val, category, set_value);
346    }
347}
348
349void
350set_category_set (category_set, category, val)
351     Lisp_Object category_set, category, val;
352{
353  do {
354    int idx = XINT (category) / 8;
355    unsigned char bits = 1 << (XINT (category) % 8);
356
357    if (NILP (val))
358      XCATEGORY_SET (category_set)->data[idx] &= ~bits;
359    else
360      XCATEGORY_SET (category_set)->data[idx] |= bits;
361  } while (0);
362}
363
364DEFUN ("modify-category-entry", Fmodify_category_entry,
365       Smodify_category_entry, 2, 4, 0,
366       doc: /* Modify the category set of CHARACTER by adding CATEGORY to it.
367The category is changed only for table TABLE, which defaults to
368 the current buffer's category table.
369If optional fourth argument RESET is non-nil,
370then delete CATEGORY from the category set instead of adding it.  */)
371     (character, category, table, reset)
372     Lisp_Object character, category, table, reset;
373{
374  int c, charset, c1, c2;
375  Lisp_Object set_value;	/* Actual value to be set in category sets.  */
376  Lisp_Object val, category_set;
377
378  CHECK_NUMBER (character);
379  c = XINT (character);
380  CHECK_CATEGORY (category);
381  table = check_category_table (table);
382
383  if (NILP (CATEGORY_DOCSTRING (table, XFASTINT (category))))
384    error ("Undefined category: %c", XFASTINT (category));
385
386  set_value = NILP (reset) ? Qt : Qnil;
387
388  if (c < CHAR_TABLE_SINGLE_BYTE_SLOTS)
389    {
390      val = XCHAR_TABLE (table)->contents[c];
391      if (!CATEGORY_SET_P (val))
392	XCHAR_TABLE (table)->contents[c] = (val = MAKE_CATEGORY_SET);
393      SET_CATEGORY_SET (val, category, set_value);
394      return Qnil;
395    }
396
397  SPLIT_CHAR (c, charset, c1, c2);
398
399  /* The top level table.  */
400  val = XCHAR_TABLE (table)->contents[charset + 128];
401  if (CATEGORY_SET_P (val))
402    category_set = val;
403  else if (!SUB_CHAR_TABLE_P (val))
404    {
405      category_set = val = MAKE_CATEGORY_SET;
406      XCHAR_TABLE (table)->contents[charset + 128] = category_set;
407    }
408
409  if (c1 <= 0)
410    {
411      /* Only a charset is specified.  */
412      if (SUB_CHAR_TABLE_P (val))
413	/* All characters in CHARSET should be the same as for having
414           CATEGORY or not.  */
415	modify_lower_category_set (val, category, set_value);
416      else
417	SET_CATEGORY_SET (category_set, category, set_value);
418      return Qnil;
419    }
420
421  /* The second level table.  */
422  if (!SUB_CHAR_TABLE_P (val))
423    {
424      val = make_sub_char_table (Qnil);
425      XCHAR_TABLE (table)->contents[charset + 128] = val;
426      /* We must set default category set of CHARSET in `defalt' slot.  */
427      XCHAR_TABLE (val)->defalt = category_set;
428    }
429  table = val;
430
431  val = XCHAR_TABLE (table)->contents[c1];
432  if (CATEGORY_SET_P (val))
433    category_set = val;
434  else if (!SUB_CHAR_TABLE_P (val))
435    {
436      category_set = val = Fcopy_sequence (XCHAR_TABLE (table)->defalt);
437      XCHAR_TABLE (table)->contents[c1] = category_set;
438    }
439
440  if (c2 <= 0)
441    {
442      if (SUB_CHAR_TABLE_P (val))
443	/* All characters in C1 group of CHARSET should be the same as
444           for CATEGORY.  */
445	modify_lower_category_set (val, category, set_value);
446      else
447	SET_CATEGORY_SET (category_set, category, set_value);
448      return Qnil;
449    }
450
451  /* The third (bottom) level table.  */
452  if (!SUB_CHAR_TABLE_P (val))
453    {
454      val = make_sub_char_table (Qnil);
455      XCHAR_TABLE (table)->contents[c1] = val;
456      /* We must set default category set of CHARSET and C1 in
457         `defalt' slot.  */
458      XCHAR_TABLE (val)->defalt = category_set;
459    }
460  table = val;
461
462  val = XCHAR_TABLE (table)->contents[c2];
463  if (CATEGORY_SET_P (val))
464    category_set = val;
465  else if (!SUB_CHAR_TABLE_P (val))
466    {
467      category_set = Fcopy_sequence (XCHAR_TABLE (table)->defalt);
468      XCHAR_TABLE (table)->contents[c2] = category_set;
469    }
470  else
471    /* This should never happen.  */
472    error ("Invalid category table");
473
474  SET_CATEGORY_SET (category_set, category, set_value);
475
476  return Qnil;
477}
478
479/* Return 1 if there is a word boundary between two word-constituent
480   characters C1 and C2 if they appear in this order, else return 0.
481   Use the macro WORD_BOUNDARY_P instead of calling this function
482   directly.  */
483
484int
485word_boundary_p (c1, c2)
486     int c1, c2;
487{
488  Lisp_Object category_set1, category_set2;
489  Lisp_Object tail;
490  int default_result;
491
492  if (CHAR_CHARSET (c1) == CHAR_CHARSET (c2))
493    {
494      tail = Vword_separating_categories;
495      default_result = 0;
496    }
497  else
498    {
499      tail = Vword_combining_categories;
500      default_result = 1;
501    }
502
503  category_set1 = CATEGORY_SET (c1);
504  if (NILP (category_set1))
505    return default_result;
506  category_set2 = CATEGORY_SET (c2);
507  if (NILP (category_set2))
508    return default_result;
509
510  for (; CONSP (tail); tail = XCDR (tail))
511    {
512      Lisp_Object elt = XCAR (tail);
513
514      if (CONSP (elt)
515	  && CATEGORYP (XCAR (elt))
516	  && CATEGORYP (XCDR (elt))
517	  && CATEGORY_MEMBER (XFASTINT (XCAR (elt)), category_set1)
518	  && CATEGORY_MEMBER (XFASTINT (XCDR (elt)), category_set2))
519	return !default_result;
520    }
521  return default_result;
522}
523
524
525void
526init_category_once ()
527{
528  /* This has to be done here, before we call Fmake_char_table.  */
529  Qcategory_table = intern ("category-table");
530  staticpro (&Qcategory_table);
531
532  /* Intern this now in case it isn't already done.
533     Setting this variable twice is harmless.
534     But don't staticpro it here--that is done in alloc.c.  */
535  Qchar_table_extra_slots = intern ("char-table-extra-slots");
536
537  /* Now we are ready to set up this property, so we can
538     create category tables.  */
539  Fput (Qcategory_table, Qchar_table_extra_slots, make_number (2));
540
541  Vstandard_category_table = Fmake_char_table (Qcategory_table, Qnil);
542  /* Set a category set which contains nothing to the default.  */
543  XCHAR_TABLE (Vstandard_category_table)->defalt = MAKE_CATEGORY_SET;
544  Fset_char_table_extra_slot (Vstandard_category_table, make_number (0),
545			      Fmake_vector (make_number (95), Qnil));
546}
547
548void
549syms_of_category ()
550{
551  Qcategoryp = intern ("categoryp");
552  staticpro (&Qcategoryp);
553  Qcategorysetp = intern ("categorysetp");
554  staticpro (&Qcategorysetp);
555  Qcategory_table_p = intern ("category-table-p");
556  staticpro (&Qcategory_table_p);
557
558  DEFVAR_LISP ("word-combining-categories", &Vword_combining_categories,
559	       doc: /* List of pair (cons) of categories to determine word boundary.
560
561Emacs treats a sequence of word constituent characters as a single
562word (i.e. finds no word boundary between them) iff they belongs to
563the same charset.  But, exceptions are allowed in the following cases.
564
565\(1) The case that characters are in different charsets is controlled
566by the variable `word-combining-categories'.
567
568Emacs finds no word boundary between characters of different charsets
569if they have categories matching some element of this list.
570
571More precisely, if an element of this list is a cons of category CAT1
572and CAT2, and a multibyte character C1 which has CAT1 is followed by
573C2 which has CAT2, there's no word boundary between C1 and C2.
574
575For instance, to tell that ASCII characters and Latin-1 characters can
576form a single word, the element `(?l . ?l)' should be in this list
577because both characters have the category `l' (Latin characters).
578
579\(2) The case that character are in the same charset is controlled by
580the variable `word-separating-categories'.
581
582Emacs find a word boundary between characters of the same charset
583if they have categories matching some element of this list.
584
585More precisely, if an element of this list is a cons of category CAT1
586and CAT2, and a multibyte character C1 which has CAT1 is followed by
587C2 which has CAT2, there's a word boundary between C1 and C2.
588
589For instance, to tell that there's a word boundary between Japanese
590Hiragana and Japanese Kanji (both are in the same charset), the
591element `(?H . ?C) should be in this list.  */);
592
593  Vword_combining_categories = Qnil;
594
595  DEFVAR_LISP ("word-separating-categories", &Vword_separating_categories,
596	       doc: /* List of pair (cons) of categories to determine word boundary.
597See the documentation of the variable `word-combining-categories'.  */);
598
599  Vword_separating_categories = Qnil;
600
601  defsubr (&Smake_category_set);
602  defsubr (&Sdefine_category);
603  defsubr (&Scategory_docstring);
604  defsubr (&Sget_unused_category);
605  defsubr (&Scategory_table_p);
606  defsubr (&Scategory_table);
607  defsubr (&Sstandard_category_table);
608  defsubr (&Scopy_category_table);
609  defsubr (&Smake_category_table);
610  defsubr (&Sset_category_table);
611  defsubr (&Schar_category_set);
612  defsubr (&Scategory_set_mnemonics);
613  defsubr (&Smodify_category_entry);
614
615  category_table_version = 0;
616}
617
618/* arch-tag: 74ebf524-121b-4d9c-bd68-07f8d708b211
619   (do not change this comment) */
620