1// -*- C++ -*-
2/* Copyright (C) 1994, 2000, 2001, 2003, 2004 Free Software Foundation, Inc.
3     Written by James Clark (jjc@jclark.com)
4
5This file is part of groff.
6
7groff is free software; you can redistribute it and/or modify it under
8the terms of the GNU General Public License as published by the Free
9Software Foundation; either version 2, or (at your option) any later
10version.
11
12groff is distributed in the hope that it will be useful, but WITHOUT ANY
13WARRANTY; without even the implied warranty of MERCHANTABILITY or
14FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15for more details.
16
17You should have received a copy of the GNU General Public License along
18with groff; see the file COPYING.  If not, write to the Free Software
19Foundation, 51 Franklin St - Fifth Floor, Boston, MA 02110-1301, USA. */
20
21/*
22TODO
23devise new names for useful characters
24option to specify symbol sets to look in
25put filename in error messages (or fix lib)
26*/
27
28#include "lib.h"
29
30#include <stdio.h>
31#include <stdlib.h>
32#include <string.h>
33#include <ctype.h>
34#include <math.h>
35#include <errno.h>
36#include "assert.h"
37#include "posix.h"
38#include "errarg.h"
39#include "error.h"
40#include "cset.h"
41#include "nonposix.h"
42#include "unicode.h"
43
44extern "C" const char *Version_string;
45extern const char *hp_msl_to_unicode_code(const char *);
46
47#define SIZEOF(v) (sizeof(v)/sizeof(v[0]))
48#define equal(a, b) (strcmp(a, b) == 0)
49// only valid if is_uname(c) has returned true
50#define is_decomposed(c) strchr(c, '_')
51
52#define NO       0
53#define YES      1
54
55#define MSL      0
56#define SYMSET   1
57#define UNICODE  2
58
59#define UNNAMED "---"
60
61static double multiplier = 3.0;	// make Agfa-based unitwidth an integer
62
63inline
64int scale(int n)
65{
66  return int(n * multiplier + 0.5);
67}
68
69// tags in TFM file
70
71enum tag_type {
72  min_tag = 400,
73  type_tag = 400,
74  copyright_tag = 401,
75  comment_tag = 402,
76  charcode_tag = 403,		// MSL for Intellifont, Unicode for TrueType
77  symbol_set_tag = 404,
78  unique_identifier_tag = 405,
79  inches_per_point_tag = 406,
80  nominal_point_size_tag = 407,
81  design_units_per_em_tag = 408,
82  posture_tag = 409,
83  type_structure_tag = 410,
84  stroke_weight_tag = 411,
85  spacing_tag = 412,
86  slant_tag = 413,
87  appearance_width_tag = 414,
88  serif_style_tag = 415,
89  font_name_tag = 417,
90  typeface_source_tag = 418,
91  average_width_tag = 419,
92  max_width_tag = 420,
93  word_spacing_tag = 421,
94  recommended_line_spacing_tag = 422,
95  cap_height_tag = 423,
96  x_height_tag = 424,
97  max_ascent_tag = 425,
98  max_descent_tag = 426,
99  lower_ascent_tag = 427,
100  lower_descent_tag = 428,
101  underscore_depth_tag = 429,
102  underscore_thickness_tag = 430,
103  uppercase_accent_height_tag = 431,
104  lowercase_accent_height_tag = 432,
105  width_tag = 433,
106  vertical_escapement_tag = 434,
107  left_extent_tag = 435,
108  right_extent_tag = 436,
109  ascent_tag = 437,
110  descent_tag = 438,
111  pair_kern_tag = 439,
112  sector_kern_tag = 440,
113  track_kern_tag = 441,
114  typeface_tag = 442,
115  panose_tag = 443,
116  max_tag = 443
117};
118
119const char *tag_name[] = {
120  "Symbol Set",
121  "Font Type"		// MSL for Intellifont, Unicode for TrueType
122};
123
124// types in TFM file
125enum {
126  BYTE_TYPE = 1,
127  ASCII_TYPE = 2,		// NUL-terminated string
128  USHORT_TYPE = 3,
129  LONG_TYPE = 4,		// unused
130  RATIONAL_TYPE = 5,		// 8-byte numerator + 8-byte denominator
131  SIGNED_BYTE_TYPE = 16,	// unused
132  SIGNED_SHORT_TYPE = 17,
133  SIGNED_LONG_TYPE = 18		// unused
134};
135
136typedef unsigned char byte;
137typedef unsigned short uint16;
138typedef short int16;
139typedef unsigned int uint32;
140
141class File {
142public:
143  File(const char *);
144  void skip(int n);
145  byte get_byte();
146  uint16 get_uint16();
147  uint32 get_uint32();
148  uint32 get_uint32(char *orig);
149  void seek(uint32 n);
150private:
151  unsigned char *buf_;
152  const unsigned char *ptr_;
153  const unsigned char *end_;
154};
155
156struct entry {
157  char present;
158  uint16 type;
159  uint32 count;
160  uint32 value;
161  char orig_value[4];
162  entry() : present(0) { }
163};
164
165struct char_info {
166  uint16 charcode;
167  uint16 width;
168  int16 ascent;
169  int16 descent;
170  int16 left_extent;
171  uint16 right_extent;
172  uint16 symbol_set;
173  unsigned char code;
174};
175
176const uint16 NO_GLYPH = 0xffff;
177const uint16 NO_SYMBOL_SET = 0;
178
179struct name_list {
180  char *name;
181  name_list *next;
182  name_list(const char *s, name_list *p) : name(strsave(s)), next(p) { }
183  ~name_list() { a_delete name; }
184};
185
186struct symbol_set {
187  uint16 select;
188  uint16 index[256];
189};
190
191#define SYMBOL_SET(n, c) ((n) * 32 + ((c) - 64))
192
193uint16 text_symbol_sets[] = {
194  SYMBOL_SET(19, 'U'),		// Windows Latin 1 ("ANSI", code page 1252)
195  SYMBOL_SET(9, 'E'),		// Windows Latin 2, Code Page 1250
196  SYMBOL_SET(5, 'T'),		// Code Page 1254
197  SYMBOL_SET(7, 'J'),		// Desktop
198  SYMBOL_SET(6, 'J'),		// Microsoft Publishing
199  SYMBOL_SET(0, 'N'),		// Latin 1 (subset of 19U,
200				// so we should never get here)
201  SYMBOL_SET(2, 'N'),		// Latin 2 (subset of 9E,
202				// so we should never get here)
203  SYMBOL_SET(8, 'U'),		// HP Roman 8
204  SYMBOL_SET(10, 'J'),		// PS Standard
205  SYMBOL_SET(9, 'U'),		// Windows 3.0 "ANSI"
206  SYMBOL_SET(1, 'U'),		// U.S. Legal
207
208  SYMBOL_SET(12, 'J'),		// MC Text
209  SYMBOL_SET(10, 'U'),		// PC Code Page 437
210  SYMBOL_SET(11, 'U'),		// PC Code Page 437N
211  SYMBOL_SET(17, 'U'),		// PC Code Page 852
212  SYMBOL_SET(12, 'U'),		// PC Code Page 850
213  SYMBOL_SET(9, 'T'),		// PC Code Page 437T
214  0
215};
216
217uint16 special_symbol_sets[] = {
218  SYMBOL_SET(8, 'M'),		// Math 8
219  SYMBOL_SET(5, 'M'),		// PS Math
220  SYMBOL_SET(15, 'U'),		// Pi font
221  SYMBOL_SET(13, 'J'),		// Ventura International
222  SYMBOL_SET(19, 'M'),		// Symbol font
223  SYMBOL_SET(579, 'L'),		// Wingdings
224  0
225};
226
227entry tags[max_tag + 1 - min_tag];
228
229char_info *char_table;
230uint32 nchars = 0;
231
232unsigned int charcode_name_table_size = 0;
233name_list **charcode_name_table = NULL;
234
235symbol_set *symbol_set_table;
236unsigned int n_symbol_sets;
237
238static int debug_flag = NO;
239static int special_flag = NO;	// not a special font
240static int italic_flag = NO;	// don't add italic correction
241static int italic_sep;
242static int all_flag = NO;	// don't include glyphs not in mapfile
243static int quiet_flag = NO;	// don't suppress warnings about symbols not found
244
245static char *hp_msl_to_ucode_name(int);
246static char *unicode_to_ucode_name(int);
247static int is_uname(char *);
248static char *show_symset(unsigned int);
249static void usage(FILE *);
250static void usage();
251static const char *xbasename(const char *);
252static void read_tags(File &);
253static int check_type();
254static void check_units(File &, const int, double *, double *);
255static int read_map(const char *, const int);
256static void require_tag(tag_type);
257static void dump_ascii(File &, tag_type);
258static void dump_tags(File &);
259static void dump_symbol_sets(File &);
260static void dump_symbols(int);
261static void output_font_name(File &);
262static void output_spacewidth();
263static void output_pclweight();
264static void output_pclproportional();
265static void read_and_output_pcltypeface(File &);
266static void output_pclstyle();
267static void output_slant();
268static void output_ligatures();
269static void read_symbol_sets(File &);
270static void read_and_output_kernpairs(File &);
271static void output_charset(const int);
272static void read_char_table(File &);
273
274inline
275entry &tag_info(tag_type t)
276{
277  return tags[t - min_tag];
278}
279
280int
281main(int argc, char **argv)
282{
283  program_name = argv[0];
284
285  int opt;
286  int res = 1200;		// PCL unit of measure for cursor moves
287  int scalesize = 4;		// LaserJet 4 only allows 1/4 point increments
288  int unitwidth = 6350;
289  double ppi;			// points per inch
290  double upem;			// design units per em
291
292  static const struct option long_options[] = {
293    { "help", no_argument, 0, CHAR_MAX + 1 },
294    { "version", no_argument, 0, 'v' },
295    { NULL, 0, 0, 0 }
296  };
297  while ((opt = getopt_long(argc, argv, "adsqvi:", long_options, NULL)) != EOF) {
298    switch (opt) {
299    case 'a':
300      all_flag = YES;
301      break;
302    case 'd':
303      debug_flag = YES;
304      break;
305    case 's':
306      special_flag = YES;
307      break;
308    case 'i':
309      italic_flag = YES;
310      italic_sep = atoi(optarg);	// design units
311      break;
312    case 'q':
313      quiet_flag = YES;		// suppress warnings about symbols not found
314      break;
315    case 'v':
316      printf("GNU hpftodit (groff) version %s\n", Version_string);
317      exit(0);
318      break;
319    case CHAR_MAX + 1: // --help
320      usage(stdout);
321      exit(0);
322      break;
323    case '?':
324      usage();
325      break;
326    default:
327      assert(0);
328    }
329  }
330
331  if (debug_flag && argc - optind < 1)
332    usage();
333  else if (!debug_flag && argc - optind != 3)
334    usage();
335  File f(argv[optind]);
336  read_tags(f);
337  int tfm_type = check_type();
338  if (debug_flag)
339    dump_tags(f);
340  if (!debug_flag && !read_map(argv[optind + 1], tfm_type))
341    exit(1);
342  else if (debug_flag && argc - optind > 1)
343    read_map(argv[optind + 1], tfm_type);
344  current_filename = NULL;
345  current_lineno = -1;		// no line numbers
346  if (!debug_flag && !equal(argv[optind + 2], "-"))
347    if (freopen(argv[optind + 2], "w", stdout) == NULL)
348      fatal("cannot open `%1': %2", argv[optind + 2], strerror(errno));
349  current_filename = argv[optind];
350
351  check_units(f, tfm_type, &ppi, &upem);
352  if (tfm_type == UNICODE)	// don't calculate for Intellifont TFMs
353    multiplier = double(res) / upem / ppi * unitwidth / scalesize;
354  if (italic_flag)
355    // convert from thousandths of an em to design units
356    italic_sep = int(italic_sep * upem / 1000 + 0.5);
357
358  read_char_table(f);
359  if (nchars == 0)
360    fatal("no characters");
361
362  if (!debug_flag) {
363    output_font_name(f);
364    printf("name %s\n", xbasename(argv[optind + 2]));
365    if (special_flag)
366      printf("special\n");
367    output_spacewidth();
368    output_slant();
369    read_and_output_pcltypeface(f);
370    output_pclproportional();
371    output_pclweight();
372    output_pclstyle();
373  }
374  read_symbol_sets(f);
375  if (debug_flag)
376    dump_symbols(tfm_type);
377  else {
378    output_ligatures();
379    read_and_output_kernpairs(f);
380    output_charset(tfm_type);
381  }
382  return 0;
383}
384
385static void
386usage(FILE *stream)
387{
388  fprintf(stream,
389	  "usage: %s [-s] [-a] [-q] [-i n] tfm_file map_file output_font\n"
390	  "       %s -d tfm_file [map_file]\n",
391	  program_name, program_name);
392}
393
394static void
395usage()
396{
397  usage(stderr);
398  exit(1);
399}
400
401File::File(const char *s)
402{
403  // We need to read the file in binary mode because hpftodit relies
404  // on byte counts.
405  int fd = open(s, O_RDONLY | O_BINARY);
406  if (fd < 0)
407    fatal("cannot open `%1': %2", s, strerror(errno));
408  current_filename = s;
409  struct stat sb;
410  if (fstat(fd, &sb) < 0)
411    fatal("cannot stat: %1", strerror(errno));
412  if (!S_ISREG(sb.st_mode))
413    fatal("not a regular file");
414  buf_ = new unsigned char[sb.st_size];
415  long nread = read(fd, buf_, sb.st_size);
416  if (nread < 0)
417    fatal("read error: %1", strerror(errno));
418  if (nread != sb.st_size)
419    fatal("read unexpected number of bytes");
420  ptr_ = buf_;
421  end_ = buf_ + sb.st_size;
422}
423
424void
425File::skip(int n)
426{
427  if (end_ - ptr_ < n)
428    fatal("unexpected end of file");
429  ptr_ += n;
430}
431
432void
433File::seek(uint32 n)
434{
435  if (uint32(end_ - buf_) < n)
436    fatal("unexpected end of file");
437  ptr_ = buf_ + n;
438}
439
440byte
441File::get_byte()
442{
443  if (ptr_ >= end_)
444    fatal("unexpected end of file");
445  return *ptr_++;
446}
447
448uint16
449File::get_uint16()
450{
451  if (end_ - ptr_ < 2)
452    fatal("unexpected end of file");
453  uint16 n = *ptr_++;
454  return n + (*ptr_++ << 8);
455}
456
457uint32
458File::get_uint32()
459{
460  if (end_ - ptr_ < 4)
461    fatal("unexpected end of file");
462  uint32 n = *ptr_++;
463  for (int i = 0; i < 3; i++)
464    n += *ptr_++ << (i + 1)*8;
465  return n;
466}
467
468uint32
469File::get_uint32(char *orig)
470{
471  if (end_ - ptr_ < 4)
472    fatal("unexpected end of file");
473  unsigned char v = *ptr_++;
474  uint32 n = v;
475  orig[0] = v;
476  for (int i = 1; i < 4; i++) {
477    v = *ptr_++;
478    orig[i] = v;
479    n += v << i*8;
480  }
481  return n;
482}
483
484static void
485read_tags(File &f)
486{
487  if (f.get_byte() != 'I' || f.get_byte() != 'I')
488    fatal("not an Intel format TFM file");
489  f.skip(6);
490  uint16 ntags = f.get_uint16();
491  entry dummy;
492  for (uint16 i = 0; i < ntags; i++) {
493    uint16 tag = f.get_uint16();
494    entry *p;
495    if (min_tag <= tag && tag <= max_tag)
496      p = tags + (tag - min_tag);
497    else
498      p = &dummy;
499    p->present = 1;
500    p->type = f.get_uint16();
501    p->count = f.get_uint32();
502    p->value = f.get_uint32(p->orig_value);
503  }
504}
505
506static int
507check_type()
508{
509  require_tag(type_tag);
510  int tfm_type = tag_info(type_tag).value;
511  switch (tfm_type) {
512    case MSL:
513    case UNICODE:
514      break;
515    case SYMSET:
516      fatal("cannot handle Symbol Set TFM files");
517      break;
518    default:
519      fatal("unknown type tag %1", tfm_type);
520  }
521  return tfm_type;
522}
523
524static void
525check_units(File &f, const int tfm_type, double *ppi, double *upem)
526{
527  require_tag(design_units_per_em_tag);
528  f.seek(tag_info(design_units_per_em_tag).value);
529  uint32 num = f.get_uint32();
530  uint32 den = f.get_uint32();
531  if (tfm_type == MSL && (num != 8782 || den != 1))
532    fatal("design units per em != 8782/1");
533  *upem = double(num) / den;
534  require_tag(inches_per_point_tag);
535  f.seek(tag_info(inches_per_point_tag).value);
536  num = f.get_uint32();
537  den = f.get_uint32();
538  if (tfm_type == MSL && (num != 100 || den != 7231))
539    fatal("inches per point not 100/7231");
540  *ppi = double(den) / num;
541}
542
543static void
544require_tag(tag_type t)
545{
546  if (!tag_info(t).present)
547    fatal("tag %1 missing", int(t));
548}
549
550// put a human-readable font name in the file
551static void
552output_font_name(File &f)
553{
554  char *p;
555
556  if (!tag_info(font_name_tag).present)
557    return;
558  int count = tag_info(font_name_tag).count;
559  char *font_name = new char[count];
560
561  if (count > 4) {	// value is a file offset to the string
562    f.seek(tag_info(font_name_tag).value);
563    int n = count;
564    p = font_name;
565    while (--n)
566      *p++ = f.get_byte();
567  }
568  else			// orig_value contains the string
569    sprintf(font_name, "%.*s",
570	    count, tag_info(font_name_tag).orig_value);
571
572  // remove any trailing space
573  p = font_name + count - 1;
574  while (csspace(*--p))
575    ;
576  *(p + 1) = '\0';
577  printf("# %s\n", font_name);
578  delete font_name;
579}
580
581static void
582output_spacewidth()
583{
584  require_tag(word_spacing_tag);
585  printf("spacewidth %d\n", scale(tag_info(word_spacing_tag).value));
586}
587
588static void
589read_symbol_sets(File &f)
590{
591  uint32 symbol_set_dir_length = tag_info(symbol_set_tag).count;
592  uint16 *symbol_set_selectors;
593  n_symbol_sets = symbol_set_dir_length/14;
594  symbol_set_table = new symbol_set[n_symbol_sets];
595  unsigned int i;
596
597  for (i = 0; i < nchars; i++)
598    char_table[i].symbol_set = NO_SYMBOL_SET;
599
600  for (i = 0; i < n_symbol_sets; i++) {
601    f.seek(tag_info(symbol_set_tag).value + i*14);
602    (void)f.get_uint32();		// offset to symbol set name
603    uint32 off1 = f.get_uint32();	// offset to selection string
604    uint32 off2 = f.get_uint32();	// offset to symbol set index array
605
606    f.seek(off1);
607    uint16 kind = 0;			// HP-GL "Kind 1" symbol set value
608    unsigned int j;
609    for (j = 0; j < off2 - off1; j++) {
610      unsigned char c = f.get_byte();
611      if ('0' <= c && c <= '9')		// value
612	kind = kind*10 + (c - '0');
613      else if ('A' <= c && c <= 'Z')	// terminator
614	kind = kind*32 + (c - 64);
615    }
616    symbol_set_table[i].select = kind;
617    for (j = 0; j < 256; j++)
618      symbol_set_table[i].index[j] = f.get_uint16();
619  }
620
621  symbol_set_selectors = (special_flag ? special_symbol_sets
622				       : text_symbol_sets);
623  for (i = 0; symbol_set_selectors[i] != 0; i++) {
624    unsigned int j;
625    for (j = 0; j < n_symbol_sets; j++)
626      if (symbol_set_table[j].select == symbol_set_selectors[i])
627	break;
628    if (j < n_symbol_sets) {
629      for (int k = 0; k < 256; k++) {
630	uint16 idx = symbol_set_table[j].index[k];
631	if (idx != NO_GLYPH
632	    && char_table[idx].symbol_set == NO_SYMBOL_SET) {
633	  char_table[idx].symbol_set = symbol_set_table[j].select;
634	  char_table[idx].code = k;
635	}
636      }
637    }
638  }
639
640  if (all_flag)
641    return;
642
643  symbol_set_selectors = (special_flag ? text_symbol_sets
644				       : special_symbol_sets);
645  for (i = 0; symbol_set_selectors[i] != 0; i++) {
646    unsigned int j;
647    for (j = 0; j < n_symbol_sets; j++)
648      if (symbol_set_table[j].select == symbol_set_selectors[i])
649	break;
650    if (j < n_symbol_sets) {
651      for (int k = 0; k < 256; k++) {
652	uint16 idx = symbol_set_table[j].index[k];
653	if (idx != NO_GLYPH
654	    && char_table[idx].symbol_set == NO_SYMBOL_SET) {
655	  char_table[idx].symbol_set = symbol_set_table[j].select;
656	  char_table[idx].code = k;
657	}
658      }
659    }
660  }
661  return;
662}
663
664static void
665read_char_table(File &f)
666{
667  require_tag(charcode_tag);
668  nchars = tag_info(charcode_tag).count;
669  char_table = new char_info[nchars];
670
671  f.seek(tag_info(charcode_tag).value);
672  uint32 i;
673  for (i = 0; i < nchars; i++)
674    char_table[i].charcode = f.get_uint16();
675
676  require_tag(width_tag);
677  f.seek(tag_info(width_tag).value);
678  for (i = 0; i < nchars; i++)
679    char_table[i].width = f.get_uint16();
680
681  require_tag(ascent_tag);
682  f.seek(tag_info(ascent_tag).value);
683  for (i = 0; i < nchars; i++) {
684    char_table[i].ascent = f.get_uint16();
685    if (char_table[i].ascent < 0)
686      char_table[i].ascent = 0;
687  }
688
689  require_tag(descent_tag);
690  f.seek(tag_info(descent_tag).value);
691  for (i = 0; i < nchars; i++) {
692    char_table[i].descent = f.get_uint16();
693    if (char_table[i].descent > 0)
694      char_table[i].descent = 0;
695  }
696
697  require_tag(left_extent_tag);
698  f.seek(tag_info(left_extent_tag).value);
699  for (i = 0; i < nchars; i++)
700    char_table[i].left_extent = int16(f.get_uint16());
701
702  require_tag(right_extent_tag);
703  f.seek(tag_info(right_extent_tag).value);
704  for (i = 0; i < nchars; i++)
705    char_table[i].right_extent = f.get_uint16();
706}
707
708static void
709output_pclweight()
710{
711  require_tag(stroke_weight_tag);
712  int stroke_weight = tag_info(stroke_weight_tag).value;
713  int pcl_stroke_weight;
714  if (stroke_weight < 128)
715    pcl_stroke_weight = -3;
716  else if (stroke_weight == 128)
717    pcl_stroke_weight = 0;
718  else if (stroke_weight <= 145)
719    pcl_stroke_weight = 1;
720  else if (stroke_weight <= 179)
721    pcl_stroke_weight = 3;
722  else
723    pcl_stroke_weight = 4;
724  printf("pclweight %d\n", pcl_stroke_weight);
725}
726
727static void
728output_pclproportional()
729{
730  require_tag(spacing_tag);
731  printf("pclproportional %d\n", tag_info(spacing_tag).value == 0);
732}
733
734static void
735read_and_output_pcltypeface(File &f)
736{
737  printf("pcltypeface ");
738  require_tag(typeface_tag);
739  if (tag_info(typeface_tag).count > 4) {
740    f.seek(tag_info(typeface_tag).value);
741    for (uint32 i = 0; i < tag_info(typeface_tag).count; i++) {
742      unsigned char c = f.get_byte();
743      if (c == '\0')
744	break;
745      putchar(c);
746    }
747  }
748  else
749    printf("%.4s", tag_info(typeface_tag).orig_value);
750  printf("\n");
751}
752
753static void
754output_pclstyle()
755{
756  unsigned pcl_style = 0;
757  // older tfms don't have the posture tag
758  if (tag_info(posture_tag).present) {
759    if (tag_info(posture_tag).value)
760      pcl_style |= 1;
761  }
762  else {
763    require_tag(slant_tag);
764    if (tag_info(slant_tag).value != 0)
765      pcl_style |= 1;
766  }
767  require_tag(appearance_width_tag);
768  if (tag_info(appearance_width_tag).value < 100) // guess
769    pcl_style |= 4;
770  printf("pclstyle %d\n", pcl_style);
771}
772
773static void
774output_slant()
775{
776  require_tag(slant_tag);
777  int slant = int16(tag_info(slant_tag).value);
778  if (slant != 0)
779    printf("slant %f\n", slant/100.0);
780}
781
782static void
783output_ligatures()
784{
785  // don't use ligatures for fixed space font
786  require_tag(spacing_tag);
787  if (tag_info(spacing_tag).value != 0)
788    return;
789  static const char *ligature_names[] = {
790    "fi", "fl", "ff", "ffi", "ffl"
791    };
792
793  static const char *ligature_chars[] = {
794    "fi", "fl", "ff", "Fi", "Fl"
795    };
796
797  unsigned ligature_mask = 0;
798  unsigned int i;
799  for (i = 0; i < nchars; i++) {
800    uint16 charcode = char_table[i].charcode;
801    if (charcode < charcode_name_table_size
802	&& char_table[i].symbol_set != NO_SYMBOL_SET) {
803      for (name_list *p = charcode_name_table[charcode]; p; p = p->next)
804	for (unsigned int j = 0; j < SIZEOF(ligature_chars); j++)
805	  if (strcmp(p->name, ligature_chars[j]) == 0) {
806	    ligature_mask |= 1 << j;
807	    break;
808	  }
809      }
810    }
811  if (ligature_mask) {
812    printf("ligatures");
813    for (i = 0; i < SIZEOF(ligature_names); i++)
814      if (ligature_mask & (1 << i))
815	printf(" %s", ligature_names[i]);
816    printf(" 0\n");
817  }
818}
819
820static void
821read_and_output_kernpairs(File &f)
822{
823  if (tag_info(pair_kern_tag).present) {
824    printf("kernpairs\n");
825    f.seek(tag_info(pair_kern_tag).value);
826    uint16 n_pairs = f.get_uint16();
827    for (int i = 0; i < n_pairs; i++) {
828      uint16 i1 = f.get_uint16();
829      uint16 i2 = f.get_uint16();
830      int16 val = int16(f.get_uint16());
831      if (char_table[i1].symbol_set != NO_SYMBOL_SET
832	  && char_table[i2].symbol_set != NO_SYMBOL_SET
833	  && char_table[i1].charcode < charcode_name_table_size
834	  && char_table[i2].charcode < charcode_name_table_size) {
835	for (name_list *p = charcode_name_table[char_table[i1].charcode];
836	     p;
837	     p = p->next)
838	  for (name_list *q = charcode_name_table[char_table[i2].charcode];
839	       q;
840	       q = q->next)
841	    if (!equal(p->name, UNNAMED) && !equal(q->name, UNNAMED))
842		printf("%s %s %d\n", p->name, q->name, scale(val));
843      }
844    }
845  }
846}
847
848static void
849output_charset(const int tfm_type)
850{
851  require_tag(slant_tag);
852  double slant_angle = int16(tag_info(slant_tag).value)*PI/18000.0;
853  double slant = sin(slant_angle)/cos(slant_angle);
854
855  if (italic_flag)
856    require_tag(x_height_tag);
857  require_tag(lower_ascent_tag);
858  require_tag(lower_descent_tag);
859
860  printf("charset\n");
861  unsigned int i;
862  for (i = 0; i < nchars; i++) {
863    uint16 charcode = char_table[i].charcode;
864
865    // the glyph is bound to one of the searched symbol sets
866    if (char_table[i].symbol_set != NO_SYMBOL_SET) {
867      // the character was in the map file
868      if (charcode < charcode_name_table_size && charcode_name_table[charcode])
869	printf("%s", charcode_name_table[charcode]->name);
870      else if (!all_flag)
871	continue;
872      else if (tfm_type == MSL)
873	fputs(hp_msl_to_ucode_name(charcode), stdout);
874      else
875	fputs(unicode_to_ucode_name(charcode), stdout);
876
877      printf("\t%d,%d",
878	     scale(char_table[i].width), scale(char_table[i].ascent));
879
880      int depth = scale(-char_table[i].descent);
881      if (depth < 0)
882	depth = 0;
883      int italic_correction = 0;
884      int left_italic_correction = 0;
885      int subscript_correction = 0;
886
887      if (italic_flag) {
888	italic_correction = scale(char_table[i].right_extent
889				  - char_table[i].width
890				  + italic_sep);
891	if (italic_correction < 0)
892	  italic_correction = 0;
893	subscript_correction = int((tag_info(x_height_tag).value
894				    * slant * .8) + .5);
895	if (subscript_correction > italic_correction)
896	  subscript_correction = italic_correction;
897	left_italic_correction = scale(italic_sep
898				       - char_table[i].left_extent);
899      }
900
901      if (subscript_correction != 0)
902	printf(",%d,%d,%d,%d",
903	       depth, italic_correction, left_italic_correction,
904	       subscript_correction);
905      else if (left_italic_correction != 0)
906	printf(",%d,%d,%d", depth, italic_correction, left_italic_correction);
907      else if (italic_correction != 0)
908	printf(",%d,%d", depth, italic_correction);
909      else if (depth != 0)
910	printf(",%d", depth);
911      // This is fairly arbitrary.  Fortunately it doesn't much matter.
912      unsigned type = 0;
913      if (char_table[i].ascent > int16(tag_info(lower_ascent_tag).value)*9/10)
914	type |= 2;
915      if (char_table[i].descent < int16(tag_info(lower_descent_tag).value)*9/10)
916	type |= 1;
917      printf("\t%d\t%d", type,
918	     char_table[i].symbol_set*256 + char_table[i].code);
919
920      if (tfm_type == UNICODE) {
921	if (charcode >= 0xE000 && charcode <= 0xF8FF)
922	  printf("\t-- HP PUA U+%04X", charcode);
923	else
924	  printf("\t-- U+%04X", charcode);
925      }
926      else
927	printf("\t-- MSL %4d", charcode);
928      printf(" (%3s %3d)\n",
929	     show_symset(char_table[i].symbol_set), char_table[i].code);
930
931      if (charcode < charcode_name_table_size
932	  && charcode_name_table[charcode])
933	for (name_list *p = charcode_name_table[charcode]->next;
934	     p; p = p->next)
935	  printf("%s\t\"\n", p->name);
936    }
937    // warnings about characters in mapfile not found in TFM
938    else if (charcode < charcode_name_table_size
939	     && charcode_name_table[charcode]) {
940      char *name = charcode_name_table[charcode]->name;
941      // don't warn about Unicode or unnamed glyphs
942      //  that aren't in the the TFM file
943      if (tfm_type == UNICODE && !quiet_flag && !equal(name, UNNAMED)
944	  && !is_uname(name)) {
945	fprintf(stderr, "%s: warning: symbol U+%04X (%s",
946		program_name, charcode, name);
947	for (name_list *p = charcode_name_table[charcode]->next;
948	     p; p = p->next)
949	  fprintf(stderr, ", %s", p->name);
950	fprintf(stderr, ") not in any searched symbol set\n");
951      }
952      else if (!quiet_flag && !equal(name, UNNAMED) && !is_uname(name)) {
953	fprintf(stderr, "%s: warning: symbol MSL %d (%s",
954		program_name, charcode, name);
955	for (name_list *p = charcode_name_table[charcode]->next;
956	     p; p = p->next)
957	  fprintf(stderr, ", %s", p->name);
958	fprintf(stderr, ") not in any searched symbol set\n");
959      }
960    }
961  }
962}
963
964#define em_fract(a) (upem >= 0 ? double(a)/upem : 0)
965
966static void
967dump_tags(File &f)
968{
969  double upem = -1.0;
970
971  printf("TFM tags\n"
972	 "\n"
973	 "tag# type count value\n"
974	 "---------------------\n");
975
976  for (int i = min_tag; i <= max_tag; i++) {
977    enum tag_type t = tag_type(i);
978    if (tag_info(t).present) {
979      printf("%4d %4d %5d", i, tag_info(t).type, tag_info(t).count);
980      switch (tag_info(t).type) {
981      case BYTE_TYPE:
982      case USHORT_TYPE:
983	printf(" %5u", tag_info(t).value);
984	switch (i) {
985	case type_tag:
986	  printf(" Font Type ");
987	  switch (tag_info(t).value) {
988	  case MSL:
989	  case SYMSET:
990	    printf("(Intellifont)");
991	    break;
992	  case UNICODE:
993	    printf("(TrueType)");
994	  }
995	  break;
996	case charcode_tag:
997	  printf(" Number of Symbols (%u)", tag_info(t).count);
998	  break;
999	case symbol_set_tag:
1000	  printf(" Symbol Sets (%u): ",
1001		 tag_info(symbol_set_tag).count / 14);
1002	  dump_symbol_sets(f);
1003	  break;
1004	case type_structure_tag:
1005	  printf(" Type Structure (%u)", tag_info(t).value);
1006	  break;
1007	case stroke_weight_tag:
1008	  printf(" Stroke Weight (%u)", tag_info(t).value);
1009	  break;
1010	case spacing_tag:
1011	  printf(" Spacing ");
1012	  switch (tag_info(t).value) {
1013	  case 0:
1014	    printf("(Proportional)");
1015	    break;
1016	  case 1:
1017	    printf("(Fixed Pitch: %u DU: %.2f em)", tag_info(t).value,
1018		   em_fract(tag_info(t).value));
1019	    break;
1020	  }
1021	  break;
1022	case appearance_width_tag:
1023	  printf(" Appearance Width (%u)", tag_info(t).value);
1024	  break;
1025	case serif_style_tag:
1026	  printf(" Serif Style (%u)", tag_info(t).value);
1027	  break;
1028	case posture_tag:
1029	  printf(" Posture (%s)", tag_info(t).value == 0
1030				  ? "Upright"
1031				  : tag_info(t).value == 1
1032				    ? "Italic"
1033				    : "Alternate Italic");
1034	  break;
1035	case max_width_tag:
1036	  printf(" Maximum Width (%u DU: %.2f em)", tag_info(t).value,
1037		 em_fract(tag_info(t).value));
1038	  break;
1039	case word_spacing_tag:
1040	  printf(" Interword Spacing (%u DU: %.2f em)", tag_info(t).value,
1041		 em_fract(tag_info(t).value));
1042	  break;
1043	case recommended_line_spacing_tag:
1044	  printf(" Recommended Line Spacing (%u DU: %.2f em)", tag_info(t).value,
1045		 em_fract(tag_info(t).value));
1046	  break;
1047	case x_height_tag:
1048	  printf(" x-Height (%u DU: %.2f em)", tag_info(t).value,
1049		 em_fract(tag_info(t).value));
1050	  break;
1051	case cap_height_tag:
1052	  printf(" Cap Height (%u DU: %.2f em)", tag_info(t).value,
1053		 em_fract(tag_info(t).value));
1054	  break;
1055	case max_ascent_tag:
1056	  printf(" Maximum Ascent (%u DU: %.2f em)", tag_info(t).value,
1057		 em_fract(tag_info(t).value));
1058	  break;
1059	case lower_ascent_tag:
1060	  printf(" Lowercase Ascent (%u DU: %.2f em)", tag_info(t).value,
1061		 em_fract(tag_info(t).value));
1062	  break;
1063	case underscore_thickness_tag:
1064	  printf(" Underscore Thickness (%u DU: %.2f em)", tag_info(t).value,
1065		 em_fract(tag_info(t).value));
1066	  break;
1067	case uppercase_accent_height_tag:
1068	  printf(" Uppercase Accent Height (%u DU: %.2f em)", tag_info(t).value,
1069		 em_fract(tag_info(t).value));
1070	  break;
1071	case lowercase_accent_height_tag:
1072	  printf(" Lowercase Accent Height (%u DU: %.2f em)", tag_info(t).value,
1073		 em_fract(tag_info(t).value));
1074	  break;
1075	case width_tag:
1076	  printf(" Horizontal Escapement array");
1077	  break;
1078	case vertical_escapement_tag:
1079	  printf(" Vertical Escapement array");
1080	  break;
1081	case right_extent_tag:
1082	  printf(" Right Extent array");
1083	  break;
1084	case ascent_tag:
1085	  printf(" Character Ascent array");
1086	  break;
1087	case pair_kern_tag:
1088	  f.seek(tag_info(t).value);
1089	  printf(" Kern Pairs (%u)", f.get_uint16());
1090	  break;
1091	case panose_tag:
1092	  printf(" PANOSE Classification array");
1093	  break;
1094	}
1095	break;
1096      case SIGNED_SHORT_TYPE:
1097	printf(" %5d", int16(tag_info(t).value));
1098	switch (i) {
1099	case slant_tag:
1100	  printf(" Slant (%.2f degrees)", double(tag_info(t).value) / 100);
1101	  break;
1102	case max_descent_tag:
1103	  printf(" Maximum Descent (%d DU: %.2f em)", int16(tag_info(t).value),
1104		 em_fract(int16(tag_info(t).value)));
1105	  break;
1106	case lower_descent_tag:
1107	  printf(" Lowercase Descent (%d DU: %.2f em)", int16(tag_info(t).value),
1108		 em_fract(int16(tag_info(t).value)));
1109	  break;
1110	case underscore_depth_tag:
1111	  printf(" Underscore Depth (%d DU: %.2f em)", int16(tag_info(t).value),
1112		 em_fract(int16(tag_info(t).value)));
1113	  break;
1114	case left_extent_tag:
1115	  printf(" Left Extent array");
1116	  break;
1117	// The type of this tag has changed from SHORT to SIGNED SHORT
1118	// in TFM version 1.3.0.
1119	case ascent_tag:
1120	  printf(" Character Ascent array");
1121	  break;
1122	case descent_tag:
1123	  printf(" Character Descent array");
1124	  break;
1125	}
1126	break;
1127      case RATIONAL_TYPE:
1128	printf(" %5u", tag_info(t).value);
1129	switch (i) {
1130	case inches_per_point_tag:
1131	  printf(" Inches per Point");
1132	  break;
1133	case nominal_point_size_tag:
1134	  printf(" Nominal Point Size");
1135	  break;
1136	case design_units_per_em_tag:
1137	  printf(" Design Units per Em");
1138	  break;
1139	case average_width_tag:
1140	  printf(" Average Width");
1141	  break;
1142	}
1143	if (tag_info(t).count == 1) {
1144	  f.seek(tag_info(t).value);
1145	  uint32 num = f.get_uint32();
1146	  uint32 den = f.get_uint32();
1147	  if (i == design_units_per_em_tag)
1148	    upem = double(num) / den;
1149	  printf(" (%u/%u = %g)", num, den, double(num)/den);
1150	}
1151	break;
1152      case ASCII_TYPE:
1153	printf(" %5u ", tag_info(t).value);
1154	switch (i) {
1155	case comment_tag:
1156	  printf("Comment ");
1157	  break;
1158	case copyright_tag:
1159	  printf("Copyright ");
1160	  break;
1161	case unique_identifier_tag:
1162	  printf("Unique ID ");
1163	  break;
1164	case font_name_tag:
1165	  printf("Typeface Name ");
1166	  break;
1167	case typeface_source_tag:
1168	  printf("Typeface Source ");
1169	  break;
1170	case typeface_tag:
1171	  printf("PCL Typeface ");
1172	  break;
1173	}
1174	dump_ascii(f, t);
1175      }
1176      putchar('\n');
1177    }
1178  }
1179  putchar('\n');
1180}
1181#undef em_fract
1182
1183static void
1184dump_ascii(File &f, tag_type t)
1185{
1186  putchar('"');
1187  if (tag_info(t).count > 4) {
1188    int count = tag_info(t).count;
1189    f.seek(tag_info(t).value);
1190    while (--count)
1191      printf("%c", f.get_byte());
1192  }
1193  else
1194    printf("%.4s", tag_info(t).orig_value);
1195  putchar('"');
1196}
1197
1198static void
1199dump_symbol_sets(File &f)
1200{
1201  uint32 symbol_set_dir_length = tag_info(symbol_set_tag).count;
1202  uint32 num_symbol_sets = symbol_set_dir_length / 14;
1203
1204  for (uint32 i = 0; i < num_symbol_sets; i++) {
1205    f.seek(tag_info(symbol_set_tag).value + i * 14);
1206    (void)f.get_uint32();		// offset to symbol set name
1207    uint32 off1 = f.get_uint32();	// offset to selection string
1208    uint32 off2 = f.get_uint32();	// offset to symbol set index array
1209    f.seek(off1);
1210    for (uint32 j = 0; j < off2 - off1; j++) {
1211      unsigned char c = f.get_byte();
1212      if ('0' <= c && c <= '9')
1213	putchar(c);
1214      else if ('A' <= c && c <= 'Z')
1215	printf(i < num_symbol_sets - 1 ? "%c," : "%c", c);
1216    }
1217  }
1218}
1219
1220static void
1221dump_symbols(int tfm_type)
1222{
1223  printf("Symbols:\n"
1224	 "\n"
1225	 " glyph id#     symbol set  name(s)\n"
1226	 "----------------------------------\n");
1227  for (uint32 i = 0; i < nchars; i++) {
1228    uint16 charcode = char_table[i].charcode;
1229    if (charcode < charcode_name_table_size
1230	&& charcode_name_table[charcode]) {
1231      if (char_table[i].symbol_set != NO_SYMBOL_SET) {
1232	printf(tfm_type == UNICODE ? "%4d (U+%04X)   (%3s %3d)  %s"
1233				   : "%4d (MSL %4d) (%3s %3d)  %s",
1234	       i, charcode,
1235	       show_symset(char_table[i].symbol_set),
1236	       char_table[i].code,
1237	       charcode_name_table[charcode]->name);
1238	for (name_list *p = charcode_name_table[charcode]->next;
1239	      p; p = p->next)
1240	  printf(", %s", p->name);
1241	putchar('\n');
1242      }
1243    }
1244    else {
1245      printf(tfm_type == UNICODE ? "%4d (U+%04X)   "
1246				 : "%4d (MSL %4d) ",
1247	     i, charcode);
1248      if (char_table[i].symbol_set != NO_SYMBOL_SET)
1249	printf("(%3s %3d)",
1250	       show_symset(char_table[i].symbol_set), char_table[i].code);
1251      putchar('\n');
1252    }
1253  }
1254  putchar('\n');
1255}
1256
1257static char *
1258show_symset(unsigned int symset)
1259{
1260   static char symset_str[8];
1261
1262   sprintf(symset_str, "%d%c", symset / 32, (symset & 31) + 64);
1263   return symset_str;
1264}
1265
1266static char *
1267hp_msl_to_ucode_name(int msl)
1268{
1269  char codestr[8];
1270
1271  sprintf(codestr, "%d", msl);
1272  const char *ustr = hp_msl_to_unicode_code(codestr);
1273  if (ustr == NULL)
1274    ustr = UNNAMED;
1275  else {
1276    char *nonum;
1277    int ucode = int(strtol(ustr, &nonum, 16));
1278    // don't allow PUA code points as Unicode names
1279    if (ucode >= 0xE000 && ucode <= 0xF8FF)
1280      ustr = UNNAMED;
1281  }
1282  if (!equal(ustr, UNNAMED)) {
1283    const char *uname_decomposed = decompose_unicode(ustr);
1284    if (uname_decomposed)
1285      // 1st char is the number of components
1286      ustr = uname_decomposed + 1;
1287  }
1288  char *value = new char[strlen(ustr) + 1];
1289  sprintf(value, equal(ustr, UNNAMED) ? ustr : "u%s", ustr);
1290  return value;
1291}
1292
1293static char *
1294unicode_to_ucode_name(int ucode)
1295{
1296  const char *ustr;
1297  char codestr[8];
1298
1299  // don't allow PUA code points as Unicode names
1300  if (ucode >= 0xE000 && ucode <= 0xF8FF)
1301    ustr = UNNAMED;
1302  else {
1303    sprintf(codestr, "%04X", ucode);
1304    ustr = codestr;
1305  }
1306  if (!equal(ustr, UNNAMED)) {
1307    const char *uname_decomposed = decompose_unicode(ustr);
1308    if (uname_decomposed)
1309      // 1st char is the number of components
1310      ustr = uname_decomposed + 1;
1311  }
1312  char *value = new char[strlen(ustr) + 1];
1313  sprintf(value, equal(ustr, UNNAMED) ? ustr : "u%s", ustr);
1314  return value;
1315}
1316
1317static int
1318is_uname(char *name)
1319{
1320  size_t i;
1321  size_t len = strlen(name);
1322  if (len % 5)
1323    return 0;
1324
1325  if (name[0] != 'u')
1326    return 0;
1327  for (i = 1; i < 4; i++)
1328    if (!csxdigit(name[i]))
1329      return 0;
1330  for (i = 5; i < len; i++)
1331    if (i % 5 ? !csxdigit(name[i]) : name[i] != '_')
1332      return 0;
1333
1334  return 1;
1335}
1336
1337static int
1338read_map(const char *file, const int tfm_type)
1339{
1340  errno = 0;
1341  FILE *fp = fopen(file, "r");
1342  if (!fp) {
1343    error("can't open `%1': %2", file, strerror(errno));
1344    return 0;
1345  }
1346  current_filename = file;
1347  char buf[512];
1348  current_lineno = 0;
1349  char *nonum;
1350  while (fgets(buf, int(sizeof(buf)), fp)) {
1351    current_lineno++;
1352    char *ptr = buf;
1353    while (csspace(*ptr))
1354      ptr++;
1355    if (*ptr == '\0' || *ptr == '#')
1356      continue;
1357    ptr = strtok(ptr, " \n\t");
1358    if (!ptr)
1359      continue;
1360
1361    int msl_code = int(strtol(ptr, &nonum, 10));
1362    if (*nonum != '\0') {
1363      if (csxdigit(*nonum))
1364	error("bad MSL map: got hex code (%1)", ptr);
1365      else if (ptr == nonum)
1366	error("bad MSL map: bad MSL code (%1)", ptr);
1367      else
1368	error("bad MSL map");
1369      fclose(fp);
1370      return 0;
1371    }
1372
1373    ptr = strtok(NULL, " \n\t");
1374    if (!ptr)
1375      continue;
1376    int unicode = int(strtol(ptr, &nonum, 16));
1377    if (*nonum != '\0') {
1378      if (ptr == nonum)
1379	error("bad Unicode value (%1)", ptr);
1380      else
1381	error("bad Unicode map");
1382      fclose(fp);
1383      return 0;
1384    }
1385    if (strlen(ptr) != 4) {
1386      error("bad Unicode value (%1)", ptr);
1387      return 0;
1388    }
1389
1390    int n = tfm_type == MSL ? msl_code : unicode;
1391    if (tfm_type == UNICODE && n > 0xFFFF) {
1392      // greatest value supported by TFM files
1393      error("bad Unicode value (%1): greatest value is 0xFFFF", ptr);
1394      fclose(fp);
1395      return 0;
1396    }
1397    else if (n < 0) {
1398      error("negative code value (%1)", ptr);
1399      fclose(fp);
1400      return 0;
1401    }
1402
1403    ptr = strtok(NULL, " \n\t");
1404    if (!ptr) {					// groff name
1405      error("missing name(s)");
1406      fclose(fp);
1407      return 0;
1408    }
1409    // leave decomposed Unicode values alone
1410    else if (is_uname(ptr) && !is_decomposed(ptr))
1411      ptr = unicode_to_ucode_name(strtol(ptr + 1, &nonum, 16));
1412
1413    if (size_t(n) >= charcode_name_table_size) {
1414      size_t old_size = charcode_name_table_size;
1415      name_list **old_table = charcode_name_table;
1416      charcode_name_table_size = n + 256;
1417      charcode_name_table = new name_list *[charcode_name_table_size];
1418      if (old_table) {
1419	memcpy(charcode_name_table, old_table, old_size*sizeof(name_list *));
1420	a_delete old_table;
1421      }
1422      for (size_t i = old_size; i < charcode_name_table_size; i++)
1423	charcode_name_table[i] = NULL;
1424    }
1425
1426    // a '#' that isn't the first groff name begins a comment
1427    for (int names = 1; ptr; ptr = strtok(NULL, " \n\t")) {
1428      if (names++ > 1 && *ptr == '#')
1429	break;
1430      charcode_name_table[n] = new name_list(ptr, charcode_name_table[n]);
1431    }
1432  }
1433  fclose(fp);
1434  return 1;
1435}
1436
1437static const char *
1438xbasename(const char *s)
1439{
1440  // DIR_SEPS[] are possible directory separator characters, see
1441  // nonposix.h.  We want the rightmost separator of all possible
1442  // ones.  Example: d:/foo\\bar.
1443  const char *b = strrchr(s, DIR_SEPS[0]), *b1;
1444  const char *sep = &DIR_SEPS[1];
1445
1446  while (*sep)
1447    {
1448      b1 = strrchr(s, *sep);
1449      if (b1 && (!b || b1 > b))
1450	b = b1;
1451      sep++;
1452    }
1453  return b ? b + 1 : s;
1454}
1455