1139790Simp/*	$NetBSD: hpftodit.cpp,v 1.2 2016/01/13 19:01:59 christos Exp $	*/
2215140Sjkim
33263Sdg// -*- C++ -*-
4215140Sjkim/* Copyright (C) 1994, 2000, 2001, 2003, 2004 Free Software Foundation, Inc.
53263Sdg     Written by James Clark (jjc@jclark.com)
6215140Sjkim
7This file is part of groff.
8
9groff is free software; you can redistribute it and/or modify it under
10the terms of the GNU General Public License as published by the Free
11Software Foundation; either version 2, or (at your option) any later
12version.
13
14groff is distributed in the hope that it will be useful, but WITHOUT ANY
15WARRANTY; without even the implied warranty of MERCHANTABILITY or
16FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
17for more details.
18
19You should have received a copy of the GNU General Public License along
20with groff; see the file COPYING.  If not, write to the Free Software
21Foundation, 51 Franklin St - Fifth Floor, Boston, MA 02110-1301, USA. */
22
23/*
24TODO
25devise new names for useful characters
26option to specify symbol sets to look in
27put filename in error messages (or fix lib)
28*/
29
30#include "lib.h"
31
32#include <stdio.h>
33#include <stdlib.h>
34#include <string.h>
35#include <ctype.h>
36#include <math.h>
37#include <errno.h>
38#include "assert.h"
39#include "posix.h"
40#include "errarg.h"
41#include "error.h"
42#include "cset.h"
43#include "nonposix.h"
44#include "unicode.h"
45
46extern "C" const char *Version_string;
47extern const char *hp_msl_to_unicode_code(const char *);
48
49#define SIZEOF(v) (sizeof(v)/sizeof(v[0]))
50#define equal(a, b) (strcmp(a, b) == 0)
51// only valid if is_uname(c) has returned true
52#define is_decomposed(c) strchr(c, '_')
53
54#define NO       0
55#define YES      1
56
57#define MSL      0
58#define SYMSET   1
59#define UNICODE  2
60
61#define UNNAMED "---"
62
63static double multiplier = 3.0;	// make Agfa-based unitwidth an integer
64
65inline
66int scale(int n)
67{
68  return int(n * multiplier + 0.5);
69}
70
71// tags in TFM file
72
73enum tag_type {
74  min_tag = 400,
75  type_tag = 400,
76  copyright_tag = 401,
77  comment_tag = 402,
78  charcode_tag = 403,		// MSL for Intellifont, Unicode for TrueType
79  symbol_set_tag = 404,
80  unique_identifier_tag = 405,
81  inches_per_point_tag = 406,
82  nominal_point_size_tag = 407,
83  design_units_per_em_tag = 408,
84  posture_tag = 409,
85  type_structure_tag = 410,
86  stroke_weight_tag = 411,
87  spacing_tag = 412,
88  slant_tag = 413,
89  appearance_width_tag = 414,
90  serif_style_tag = 415,
91  font_name_tag = 417,
92  typeface_source_tag = 418,
93  average_width_tag = 419,
94  max_width_tag = 420,
95  word_spacing_tag = 421,
96  recommended_line_spacing_tag = 422,
97  cap_height_tag = 423,
98  x_height_tag = 424,
99  max_ascent_tag = 425,
100  max_descent_tag = 426,
101  lower_ascent_tag = 427,
102  lower_descent_tag = 428,
103  underscore_depth_tag = 429,
104  underscore_thickness_tag = 430,
105  uppercase_accent_height_tag = 431,
106  lowercase_accent_height_tag = 432,
107  width_tag = 433,
108  vertical_escapement_tag = 434,
109  left_extent_tag = 435,
110  right_extent_tag = 436,
111  ascent_tag = 437,
112  descent_tag = 438,
113  pair_kern_tag = 439,
114  sector_kern_tag = 440,
115  track_kern_tag = 441,
116  typeface_tag = 442,
117  panose_tag = 443,
118  max_tag = 443
119};
120
121const char *tag_name[] = {
122  "Symbol Set",
123  "Font Type"		// MSL for Intellifont, Unicode for TrueType
124};
125
126// types in TFM file
127enum {
128  BYTE_TYPE = 1,
129  ASCII_TYPE = 2,		// NUL-terminated string
130  USHORT_TYPE = 3,
131  LONG_TYPE = 4,		// unused
132  RATIONAL_TYPE = 5,		// 8-byte numerator + 8-byte denominator
133  SIGNED_BYTE_TYPE = 16,	// unused
134  SIGNED_SHORT_TYPE = 17,
135  SIGNED_LONG_TYPE = 18		// unused
136};
137
138typedef unsigned char byte;
139typedef unsigned short uint16;
140typedef short int16;
141typedef unsigned int uint32;
142
143class File {
144public:
145  File(const char *);
146  void skip(int n);
147  byte get_byte();
148  uint16 get_uint16();
149  uint32 get_uint32();
150  uint32 get_uint32(char *orig);
151  void seek(uint32 n);
152private:
153  unsigned char *buf_;
154  const unsigned char *ptr_;
155  const unsigned char *end_;
156};
157
158struct entry {
159  char present;
160  uint16 type;
161  uint32 count;
162  uint32 value;
163  char orig_value[4];
164  entry() : present(0) { }
165};
166
167struct char_info {
168  uint16 charcode;
169  uint16 width;
170  int16 ascent;
171  int16 descent;
172  int16 left_extent;
173  uint16 right_extent;
174  uint16 symbol_set;
175  unsigned char code;
176};
177
178const uint16 NO_GLYPH = 0xffff;
179const uint16 NO_SYMBOL_SET = 0;
180
181struct name_list {
182  char *name;
183  name_list *next;
184  name_list(const char *s, name_list *p) : name(strsave(s)), next(p) { }
185  ~name_list() { a_delete name; }
186};
187
188struct symbol_set {
189  uint16 select;
190  uint16 index[256];
191};
192
193#define SYMBOL_SET(n, c) ((n) * 32 + ((c) - 64))
194
195uint16 text_symbol_sets[] = {
196  SYMBOL_SET(19, 'U'),		// Windows Latin 1 ("ANSI", code page 1252)
197  SYMBOL_SET(9, 'E'),		// Windows Latin 2, Code Page 1250
198  SYMBOL_SET(5, 'T'),		// Code Page 1254
199  SYMBOL_SET(7, 'J'),		// Desktop
200  SYMBOL_SET(6, 'J'),		// Microsoft Publishing
201  SYMBOL_SET(0, 'N'),		// Latin 1 (subset of 19U,
202				// so we should never get here)
203  SYMBOL_SET(2, 'N'),		// Latin 2 (subset of 9E,
204				// so we should never get here)
205  SYMBOL_SET(8, 'U'),		// HP Roman 8
206  SYMBOL_SET(10, 'J'),		// PS Standard
207  SYMBOL_SET(9, 'U'),		// Windows 3.0 "ANSI"
208  SYMBOL_SET(1, 'U'),		// U.S. Legal
209
210  SYMBOL_SET(12, 'J'),		// MC Text
211  SYMBOL_SET(10, 'U'),		// PC Code Page 437
212  SYMBOL_SET(11, 'U'),		// PC Code Page 437N
213  SYMBOL_SET(17, 'U'),		// PC Code Page 852
214  SYMBOL_SET(12, 'U'),		// PC Code Page 850
215  SYMBOL_SET(9, 'T'),		// PC Code Page 437T
216  0
217};
218
219uint16 special_symbol_sets[] = {
220  SYMBOL_SET(8, 'M'),		// Math 8
221  SYMBOL_SET(5, 'M'),		// PS Math
222  SYMBOL_SET(15, 'U'),		// Pi font
223  SYMBOL_SET(13, 'J'),		// Ventura International
224  SYMBOL_SET(19, 'M'),		// Symbol font
225  SYMBOL_SET(579, 'L'),		// Wingdings
226  0
227};
228
229entry tags[max_tag + 1 - min_tag];
230
231char_info *char_table;
232uint32 nchars = 0;
233
234unsigned int charcode_name_table_size = 0;
235name_list **charcode_name_table = NULL;
236
237symbol_set *symbol_set_table;
238unsigned int n_symbol_sets;
239
240static int debug_flag = NO;
241static int special_flag = NO;	// not a special font
242static int italic_flag = NO;	// don't add italic correction
243static int italic_sep;
244static int all_flag = NO;	// don't include glyphs not in mapfile
245static int quiet_flag = NO;	// don't suppress warnings about symbols not found
246
247static char *hp_msl_to_ucode_name(int);
248static char *unicode_to_ucode_name(int);
249static int is_uname(char *);
250static char *show_symset(unsigned int);
251static void usage(FILE *);
252static void usage();
253static const char *xbasename(const char *);
254static void read_tags(File &);
255static int check_type();
256static void check_units(File &, const int, double *, double *);
257static int read_map(const char *, const int);
258static void require_tag(tag_type);
259static void dump_ascii(File &, tag_type);
260static void dump_tags(File &);
261static void dump_symbol_sets(File &);
262static void dump_symbols(int);
263static void output_font_name(File &);
264static void output_spacewidth();
265static void output_pclweight();
266static void output_pclproportional();
267static void read_and_output_pcltypeface(File &);
268static void output_pclstyle();
269static void output_slant();
270static void output_ligatures();
271static void read_symbol_sets(File &);
272static void read_and_output_kernpairs(File &);
273static void output_charset(const int);
274static void read_char_table(File &);
275
276inline
277entry &tag_info(tag_type t)
278{
279  return tags[t - min_tag];
280}
281
282int
283main(int argc, char **argv)
284{
285  program_name = argv[0];
286
287  int opt;
288  int res = 1200;		// PCL unit of measure for cursor moves
289  int scalesize = 4;		// LaserJet 4 only allows 1/4 point increments
290  int unitwidth = 6350;
291  double ppi;			// points per inch
292  double upem;			// design units per em
293
294  static const struct option long_options[] = {
295    { "help", no_argument, 0, CHAR_MAX + 1 },
296    { "version", no_argument, 0, 'v' },
297    { NULL, 0, 0, 0 }
298  };
299  while ((opt = getopt_long(argc, argv, "adsqvi:", long_options, NULL)) != EOF) {
300    switch (opt) {
301    case 'a':
302      all_flag = YES;
303      break;
304    case 'd':
305      debug_flag = YES;
306      break;
307    case 's':
308      special_flag = YES;
309      break;
310    case 'i':
311      italic_flag = YES;
312      italic_sep = atoi(optarg);	// design units
313      break;
314    case 'q':
315      quiet_flag = YES;		// suppress warnings about symbols not found
316      break;
317    case 'v':
318      printf("GNU hpftodit (groff) version %s\n", Version_string);
319      exit(0);
320      break;
321    case CHAR_MAX + 1: // --help
322      usage(stdout);
323      exit(0);
324      break;
325    case '?':
326      usage();
327      break;
328    default:
329      assert(0);
330    }
331  }
332
333  if (debug_flag && argc - optind < 1)
334    usage();
335  else if (!debug_flag && argc - optind != 3)
336    usage();
337  File f(argv[optind]);
338  read_tags(f);
339  int tfm_type = check_type();
340  if (debug_flag)
341    dump_tags(f);
342  if (!debug_flag && !read_map(argv[optind + 1], tfm_type))
343    exit(1);
344  else if (debug_flag && argc - optind > 1)
345    read_map(argv[optind + 1], tfm_type);
346  current_filename = NULL;
347  current_lineno = -1;		// no line numbers
348  if (!debug_flag && !equal(argv[optind + 2], "-"))
349    if (freopen(argv[optind + 2], "w", stdout) == NULL)
350      fatal("cannot open `%1': %2", argv[optind + 2], strerror(errno));
351  current_filename = argv[optind];
352
353  check_units(f, tfm_type, &ppi, &upem);
354  if (tfm_type == UNICODE)	// don't calculate for Intellifont TFMs
355    multiplier = double(res) / upem / ppi * unitwidth / scalesize;
356  if (italic_flag)
357    // convert from thousandths of an em to design units
358    italic_sep = int(italic_sep * upem / 1000 + 0.5);
359
360  read_char_table(f);
361  if (nchars == 0)
362    fatal("no characters");
363
364  if (!debug_flag) {
365    output_font_name(f);
366    printf("name %s\n", xbasename(argv[optind + 2]));
367    if (special_flag)
368      printf("special\n");
369    output_spacewidth();
370    output_slant();
371    read_and_output_pcltypeface(f);
372    output_pclproportional();
373    output_pclweight();
374    output_pclstyle();
375  }
376  read_symbol_sets(f);
377  if (debug_flag)
378    dump_symbols(tfm_type);
379  else {
380    output_ligatures();
381    read_and_output_kernpairs(f);
382    output_charset(tfm_type);
383  }
384  return 0;
385}
386
387static void
388usage(FILE *stream)
389{
390  fprintf(stream,
391	  "usage: %s [-s] [-a] [-q] [-i n] tfm_file map_file output_font\n"
392	  "       %s -d tfm_file [map_file]\n",
393	  program_name, program_name);
394}
395
396static void
397usage()
398{
399  usage(stderr);
400  exit(1);
401}
402
403File::File(const char *s)
404{
405  // We need to read the file in binary mode because hpftodit relies
406  // on byte counts.
407  int fd = open(s, O_RDONLY | O_BINARY);
408  if (fd < 0)
409    fatal("cannot open `%1': %2", s, strerror(errno));
410  current_filename = s;
411  struct stat sb;
412  if (fstat(fd, &sb) < 0)
413    fatal("cannot stat: %1", strerror(errno));
414  if (!S_ISREG(sb.st_mode))
415    fatal("not a regular file");
416  buf_ = new unsigned char[sb.st_size];
417  long nread = read(fd, buf_, sb.st_size);
418  if (nread < 0)
419    fatal("read error: %1", strerror(errno));
420  if (nread != sb.st_size)
421    fatal("read unexpected number of bytes");
422  ptr_ = buf_;
423  end_ = buf_ + sb.st_size;
424}
425
426void
427File::skip(int n)
428{
429  if (end_ - ptr_ < n)
430    fatal("unexpected end of file");
431  ptr_ += n;
432}
433
434void
435File::seek(uint32 n)
436{
437  if (uint32(end_ - buf_) < n)
438    fatal("unexpected end of file");
439  ptr_ = buf_ + n;
440}
441
442byte
443File::get_byte()
444{
445  if (ptr_ >= end_)
446    fatal("unexpected end of file");
447  return *ptr_++;
448}
449
450uint16
451File::get_uint16()
452{
453  if (end_ - ptr_ < 2)
454    fatal("unexpected end of file");
455  uint16 n = *ptr_++;
456  return n + (*ptr_++ << 8);
457}
458
459uint32
460File::get_uint32()
461{
462  if (end_ - ptr_ < 4)
463    fatal("unexpected end of file");
464  uint32 n = *ptr_++;
465  for (int i = 0; i < 3; i++)
466    n += *ptr_++ << (i + 1)*8;
467  return n;
468}
469
470uint32
471File::get_uint32(char *orig)
472{
473  if (end_ - ptr_ < 4)
474    fatal("unexpected end of file");
475  unsigned char v = *ptr_++;
476  uint32 n = v;
477  orig[0] = v;
478  for (int i = 1; i < 4; i++) {
479    v = *ptr_++;
480    orig[i] = v;
481    n += v << i*8;
482  }
483  return n;
484}
485
486static void
487read_tags(File &f)
488{
489  if (f.get_byte() != 'I' || f.get_byte() != 'I')
490    fatal("not an Intel format TFM file");
491  f.skip(6);
492  uint16 ntags = f.get_uint16();
493  entry dummy;
494  for (uint16 i = 0; i < ntags; i++) {
495    uint16 tag = f.get_uint16();
496    entry *p;
497    if (min_tag <= tag && tag <= max_tag)
498      p = tags + (tag - min_tag);
499    else
500      p = &dummy;
501    p->present = 1;
502    p->type = f.get_uint16();
503    p->count = f.get_uint32();
504    p->value = f.get_uint32(p->orig_value);
505  }
506}
507
508static int
509check_type()
510{
511  require_tag(type_tag);
512  int tfm_type = tag_info(type_tag).value;
513  switch (tfm_type) {
514    case MSL:
515    case UNICODE:
516      break;
517    case SYMSET:
518      fatal("cannot handle Symbol Set TFM files");
519      break;
520    default:
521      fatal("unknown type tag %1", tfm_type);
522  }
523  return tfm_type;
524}
525
526static void
527check_units(File &f, const int tfm_type, double *ppi, double *upem)
528{
529  require_tag(design_units_per_em_tag);
530  f.seek(tag_info(design_units_per_em_tag).value);
531  uint32 num = f.get_uint32();
532  uint32 den = f.get_uint32();
533  if (tfm_type == MSL && (num != 8782 || den != 1))
534    fatal("design units per em != 8782/1");
535  *upem = double(num) / den;
536  require_tag(inches_per_point_tag);
537  f.seek(tag_info(inches_per_point_tag).value);
538  num = f.get_uint32();
539  den = f.get_uint32();
540  if (tfm_type == MSL && (num != 100 || den != 7231))
541    fatal("inches per point not 100/7231");
542  *ppi = double(den) / num;
543}
544
545static void
546require_tag(tag_type t)
547{
548  if (!tag_info(t).present)
549    fatal("tag %1 missing", int(t));
550}
551
552// put a human-readable font name in the file
553static void
554output_font_name(File &f)
555{
556  char *p;
557
558  if (!tag_info(font_name_tag).present)
559    return;
560  int count = tag_info(font_name_tag).count;
561  char *font_name = new char[count];
562
563  if (count > 4) {	// value is a file offset to the string
564    f.seek(tag_info(font_name_tag).value);
565    int n = count;
566    p = font_name;
567    while (--n)
568      *p++ = f.get_byte();
569  }
570  else			// orig_value contains the string
571    sprintf(font_name, "%.*s",
572	    count, tag_info(font_name_tag).orig_value);
573
574  // remove any trailing space
575  p = font_name + count - 1;
576  while (csspace(*--p))
577    ;
578  *(p + 1) = '\0';
579  printf("# %s\n", font_name);
580  delete[] font_name;
581}
582
583static void
584output_spacewidth()
585{
586  require_tag(word_spacing_tag);
587  printf("spacewidth %d\n", scale(tag_info(word_spacing_tag).value));
588}
589
590static void
591read_symbol_sets(File &f)
592{
593  uint32 symbol_set_dir_length = tag_info(symbol_set_tag).count;
594  uint16 *symbol_set_selectors;
595  n_symbol_sets = symbol_set_dir_length/14;
596  symbol_set_table = new symbol_set[n_symbol_sets];
597  unsigned int i;
598
599  for (i = 0; i < nchars; i++)
600    char_table[i].symbol_set = NO_SYMBOL_SET;
601
602  for (i = 0; i < n_symbol_sets; i++) {
603    f.seek(tag_info(symbol_set_tag).value + i*14);
604    (void)f.get_uint32();		// offset to symbol set name
605    uint32 off1 = f.get_uint32();	// offset to selection string
606    uint32 off2 = f.get_uint32();	// offset to symbol set index array
607
608    f.seek(off1);
609    uint16 kind = 0;			// HP-GL "Kind 1" symbol set value
610    unsigned int j;
611    for (j = 0; j < off2 - off1; j++) {
612      unsigned char c = f.get_byte();
613      if ('0' <= c && c <= '9')		// value
614	kind = kind*10 + (c - '0');
615      else if ('A' <= c && c <= 'Z')	// terminator
616	kind = kind*32 + (c - 64);
617    }
618    symbol_set_table[i].select = kind;
619    for (j = 0; j < 256; j++)
620      symbol_set_table[i].index[j] = f.get_uint16();
621  }
622
623  symbol_set_selectors = (special_flag ? special_symbol_sets
624				       : text_symbol_sets);
625  for (i = 0; symbol_set_selectors[i] != 0; i++) {
626    unsigned int j;
627    for (j = 0; j < n_symbol_sets; j++)
628      if (symbol_set_table[j].select == symbol_set_selectors[i])
629	break;
630    if (j < n_symbol_sets) {
631      for (int k = 0; k < 256; k++) {
632	uint16 idx = symbol_set_table[j].index[k];
633	if (idx != NO_GLYPH
634	    && char_table[idx].symbol_set == NO_SYMBOL_SET) {
635	  char_table[idx].symbol_set = symbol_set_table[j].select;
636	  char_table[idx].code = k;
637	}
638      }
639    }
640  }
641
642  if (all_flag)
643    return;
644
645  symbol_set_selectors = (special_flag ? text_symbol_sets
646				       : special_symbol_sets);
647  for (i = 0; symbol_set_selectors[i] != 0; i++) {
648    unsigned int j;
649    for (j = 0; j < n_symbol_sets; j++)
650      if (symbol_set_table[j].select == symbol_set_selectors[i])
651	break;
652    if (j < n_symbol_sets) {
653      for (int k = 0; k < 256; k++) {
654	uint16 idx = symbol_set_table[j].index[k];
655	if (idx != NO_GLYPH
656	    && char_table[idx].symbol_set == NO_SYMBOL_SET) {
657	  char_table[idx].symbol_set = symbol_set_table[j].select;
658	  char_table[idx].code = k;
659	}
660      }
661    }
662  }
663  return;
664}
665
666static void
667read_char_table(File &f)
668{
669  require_tag(charcode_tag);
670  nchars = tag_info(charcode_tag).count;
671  char_table = new char_info[nchars];
672
673  f.seek(tag_info(charcode_tag).value);
674  uint32 i;
675  for (i = 0; i < nchars; i++)
676    char_table[i].charcode = f.get_uint16();
677
678  require_tag(width_tag);
679  f.seek(tag_info(width_tag).value);
680  for (i = 0; i < nchars; i++)
681    char_table[i].width = f.get_uint16();
682
683  require_tag(ascent_tag);
684  f.seek(tag_info(ascent_tag).value);
685  for (i = 0; i < nchars; i++) {
686    char_table[i].ascent = f.get_uint16();
687    if (char_table[i].ascent < 0)
688      char_table[i].ascent = 0;
689  }
690
691  require_tag(descent_tag);
692  f.seek(tag_info(descent_tag).value);
693  for (i = 0; i < nchars; i++) {
694    char_table[i].descent = f.get_uint16();
695    if (char_table[i].descent > 0)
696      char_table[i].descent = 0;
697  }
698
699  require_tag(left_extent_tag);
700  f.seek(tag_info(left_extent_tag).value);
701  for (i = 0; i < nchars; i++)
702    char_table[i].left_extent = int16(f.get_uint16());
703
704  require_tag(right_extent_tag);
705  f.seek(tag_info(right_extent_tag).value);
706  for (i = 0; i < nchars; i++)
707    char_table[i].right_extent = f.get_uint16();
708}
709
710static void
711output_pclweight()
712{
713  require_tag(stroke_weight_tag);
714  int stroke_weight = tag_info(stroke_weight_tag).value;
715  int pcl_stroke_weight;
716  if (stroke_weight < 128)
717    pcl_stroke_weight = -3;
718  else if (stroke_weight == 128)
719    pcl_stroke_weight = 0;
720  else if (stroke_weight <= 145)
721    pcl_stroke_weight = 1;
722  else if (stroke_weight <= 179)
723    pcl_stroke_weight = 3;
724  else
725    pcl_stroke_weight = 4;
726  printf("pclweight %d\n", pcl_stroke_weight);
727}
728
729static void
730output_pclproportional()
731{
732  require_tag(spacing_tag);
733  printf("pclproportional %d\n", tag_info(spacing_tag).value == 0);
734}
735
736static void
737read_and_output_pcltypeface(File &f)
738{
739  printf("pcltypeface ");
740  require_tag(typeface_tag);
741  if (tag_info(typeface_tag).count > 4) {
742    f.seek(tag_info(typeface_tag).value);
743    for (uint32 i = 0; i < tag_info(typeface_tag).count; i++) {
744      unsigned char c = f.get_byte();
745      if (c == '\0')
746	break;
747      putchar(c);
748    }
749  }
750  else
751    printf("%.4s", tag_info(typeface_tag).orig_value);
752  printf("\n");
753}
754
755static void
756output_pclstyle()
757{
758  unsigned pcl_style = 0;
759  // older tfms don't have the posture tag
760  if (tag_info(posture_tag).present) {
761    if (tag_info(posture_tag).value)
762      pcl_style |= 1;
763  }
764  else {
765    require_tag(slant_tag);
766    if (tag_info(slant_tag).value != 0)
767      pcl_style |= 1;
768  }
769  require_tag(appearance_width_tag);
770  if (tag_info(appearance_width_tag).value < 100) // guess
771    pcl_style |= 4;
772  printf("pclstyle %d\n", pcl_style);
773}
774
775static void
776output_slant()
777{
778  require_tag(slant_tag);
779  int slant = int16(tag_info(slant_tag).value);
780  if (slant != 0)
781    printf("slant %f\n", slant/100.0);
782}
783
784static void
785output_ligatures()
786{
787  // don't use ligatures for fixed space font
788  require_tag(spacing_tag);
789  if (tag_info(spacing_tag).value != 0)
790    return;
791  static const char *ligature_names[] = {
792    "fi", "fl", "ff", "ffi", "ffl"
793    };
794
795  static const char *ligature_chars[] = {
796    "fi", "fl", "ff", "Fi", "Fl"
797    };
798
799  unsigned ligature_mask = 0;
800  unsigned int i;
801  for (i = 0; i < nchars; i++) {
802    uint16 charcode = char_table[i].charcode;
803    if (charcode < charcode_name_table_size
804	&& char_table[i].symbol_set != NO_SYMBOL_SET) {
805      for (name_list *p = charcode_name_table[charcode]; p; p = p->next)
806	for (unsigned int j = 0; j < SIZEOF(ligature_chars); j++)
807	  if (strcmp(p->name, ligature_chars[j]) == 0) {
808	    ligature_mask |= 1 << j;
809	    break;
810	  }
811      }
812    }
813  if (ligature_mask) {
814    printf("ligatures");
815    for (i = 0; i < SIZEOF(ligature_names); i++)
816      if (ligature_mask & (1 << i))
817	printf(" %s", ligature_names[i]);
818    printf(" 0\n");
819  }
820}
821
822static void
823read_and_output_kernpairs(File &f)
824{
825  if (tag_info(pair_kern_tag).present) {
826    printf("kernpairs\n");
827    f.seek(tag_info(pair_kern_tag).value);
828    uint16 n_pairs = f.get_uint16();
829    for (int i = 0; i < n_pairs; i++) {
830      uint16 i1 = f.get_uint16();
831      uint16 i2 = f.get_uint16();
832      int16 val = int16(f.get_uint16());
833      if (char_table[i1].symbol_set != NO_SYMBOL_SET
834	  && char_table[i2].symbol_set != NO_SYMBOL_SET
835	  && char_table[i1].charcode < charcode_name_table_size
836	  && char_table[i2].charcode < charcode_name_table_size) {
837	for (name_list *p = charcode_name_table[char_table[i1].charcode];
838	     p;
839	     p = p->next)
840	  for (name_list *q = charcode_name_table[char_table[i2].charcode];
841	       q;
842	       q = q->next)
843	    if (!equal(p->name, UNNAMED) && !equal(q->name, UNNAMED))
844		printf("%s %s %d\n", p->name, q->name, scale(val));
845      }
846    }
847  }
848}
849
850static void
851output_charset(const int tfm_type)
852{
853  require_tag(slant_tag);
854  double slant_angle = int16(tag_info(slant_tag).value)*PI/18000.0;
855  double slant = sin(slant_angle)/cos(slant_angle);
856
857  if (italic_flag)
858    require_tag(x_height_tag);
859  require_tag(lower_ascent_tag);
860  require_tag(lower_descent_tag);
861
862  printf("charset\n");
863  unsigned int i;
864  for (i = 0; i < nchars; i++) {
865    uint16 charcode = char_table[i].charcode;
866
867    // the glyph is bound to one of the searched symbol sets
868    if (char_table[i].symbol_set != NO_SYMBOL_SET) {
869      // the character was in the map file
870      if (charcode < charcode_name_table_size && charcode_name_table[charcode])
871	printf("%s", charcode_name_table[charcode]->name);
872      else if (!all_flag)
873	continue;
874      else if (tfm_type == MSL)
875	printf(hp_msl_to_ucode_name(charcode));
876      else
877	printf(unicode_to_ucode_name(charcode));
878
879      printf("\t%d,%d",
880	     scale(char_table[i].width), scale(char_table[i].ascent));
881
882      int depth = scale(-char_table[i].descent);
883      if (depth < 0)
884	depth = 0;
885      int italic_correction = 0;
886      int left_italic_correction = 0;
887      int subscript_correction = 0;
888
889      if (italic_flag) {
890	italic_correction = scale(char_table[i].right_extent
891				  - char_table[i].width
892				  + italic_sep);
893	if (italic_correction < 0)
894	  italic_correction = 0;
895	subscript_correction = int((tag_info(x_height_tag).value
896				    * slant * .8) + .5);
897	if (subscript_correction > italic_correction)
898	  subscript_correction = italic_correction;
899	left_italic_correction = scale(italic_sep
900				       - char_table[i].left_extent);
901      }
902
903      if (subscript_correction != 0)
904	printf(",%d,%d,%d,%d",
905	       depth, italic_correction, left_italic_correction,
906	       subscript_correction);
907      else if (left_italic_correction != 0)
908	printf(",%d,%d,%d", depth, italic_correction, left_italic_correction);
909      else if (italic_correction != 0)
910	printf(",%d,%d", depth, italic_correction);
911      else if (depth != 0)
912	printf(",%d", depth);
913      // This is fairly arbitrary.  Fortunately it doesn't much matter.
914      unsigned type = 0;
915      if (char_table[i].ascent > int16(tag_info(lower_ascent_tag).value)*9/10)
916	type |= 2;
917      if (char_table[i].descent < int16(tag_info(lower_descent_tag).value)*9/10)
918	type |= 1;
919      printf("\t%d\t%d", type,
920	     char_table[i].symbol_set*256 + char_table[i].code);
921
922      if (tfm_type == UNICODE) {
923	if (charcode >= 0xE000 && charcode <= 0xF8FF)
924	  printf("\t-- HP PUA U+%04X", charcode);
925	else
926	  printf("\t-- U+%04X", charcode);
927      }
928      else
929	printf("\t-- MSL %4d", charcode);
930      printf(" (%3s %3d)\n",
931	     show_symset(char_table[i].symbol_set), char_table[i].code);
932
933      if (charcode < charcode_name_table_size
934	  && charcode_name_table[charcode])
935	for (name_list *p = charcode_name_table[charcode]->next;
936	     p; p = p->next)
937	  printf("%s\t\"\n", p->name);
938    }
939    // warnings about characters in mapfile not found in TFM
940    else if (charcode < charcode_name_table_size
941	     && charcode_name_table[charcode]) {
942      char *name = charcode_name_table[charcode]->name;
943      // don't warn about Unicode or unnamed glyphs
944      //  that aren't in the the TFM file
945      if (tfm_type == UNICODE && !quiet_flag && !equal(name, UNNAMED)
946	  && !is_uname(name)) {
947	fprintf(stderr, "%s: warning: symbol U+%04X (%s",
948		program_name, charcode, name);
949	for (name_list *p = charcode_name_table[charcode]->next;
950	     p; p = p->next)
951	  fprintf(stderr, ", %s", p->name);
952	fprintf(stderr, ") not in any searched symbol set\n");
953      }
954      else if (!quiet_flag && !equal(name, UNNAMED) && !is_uname(name)) {
955	fprintf(stderr, "%s: warning: symbol MSL %d (%s",
956		program_name, charcode, name);
957	for (name_list *p = charcode_name_table[charcode]->next;
958	     p; p = p->next)
959	  fprintf(stderr, ", %s", p->name);
960	fprintf(stderr, ") not in any searched symbol set\n");
961      }
962    }
963  }
964}
965
966#define em_fract(a) (upem >= 0 ? double(a)/upem : 0)
967
968static void
969dump_tags(File &f)
970{
971  double upem = -1.0;
972
973  printf("TFM tags\n"
974	 "\n"
975	 "tag# type count value\n"
976	 "---------------------\n");
977
978  for (int i = min_tag; i <= max_tag; i++) {
979    enum tag_type t = tag_type(i);
980    if (tag_info(t).present) {
981      printf("%4d %4d %5d", i, tag_info(t).type, tag_info(t).count);
982      switch (tag_info(t).type) {
983      case BYTE_TYPE:
984      case USHORT_TYPE:
985	printf(" %5u", tag_info(t).value);
986	switch (i) {
987	case type_tag:
988	  printf(" Font Type ");
989	  switch (tag_info(t).value) {
990	  case MSL:
991	  case SYMSET:
992	    printf("(Intellifont)");
993	    break;
994	  case UNICODE:
995	    printf("(TrueType)");
996	  }
997	  break;
998	case charcode_tag:
999	  printf(" Number of Symbols (%u)", tag_info(t).count);
1000	  break;
1001	case symbol_set_tag:
1002	  printf(" Symbol Sets (%u): ",
1003		 tag_info(symbol_set_tag).count / 14);
1004	  dump_symbol_sets(f);
1005	  break;
1006	case type_structure_tag:
1007	  printf(" Type Structure (%u)", tag_info(t).value);
1008	  break;
1009	case stroke_weight_tag:
1010	  printf(" Stroke Weight (%u)", tag_info(t).value);
1011	  break;
1012	case spacing_tag:
1013	  printf(" Spacing ");
1014	  switch (tag_info(t).value) {
1015	  case 0:
1016	    printf("(Proportional)");
1017	    break;
1018	  case 1:
1019	    printf("(Fixed Pitch: %u DU: %.2f em)", tag_info(t).value,
1020		   em_fract(tag_info(t).value));
1021	    break;
1022	  }
1023	  break;
1024	case appearance_width_tag:
1025	  printf(" Appearance Width (%u)", tag_info(t).value);
1026	  break;
1027	case serif_style_tag:
1028	  printf(" Serif Style (%u)", tag_info(t).value);
1029	  break;
1030	case posture_tag:
1031	  printf(" Posture (%s)", tag_info(t).value == 0
1032				  ? "Upright"
1033				  : tag_info(t).value == 1
1034				    ? "Italic"
1035				    : "Alternate Italic");
1036	  break;
1037	case max_width_tag:
1038	  printf(" Maximum Width (%u DU: %.2f em)", tag_info(t).value,
1039		 em_fract(tag_info(t).value));
1040	  break;
1041	case word_spacing_tag:
1042	  printf(" Interword Spacing (%u DU: %.2f em)", tag_info(t).value,
1043		 em_fract(tag_info(t).value));
1044	  break;
1045	case recommended_line_spacing_tag:
1046	  printf(" Recommended Line Spacing (%u DU: %.2f em)", tag_info(t).value,
1047		 em_fract(tag_info(t).value));
1048	  break;
1049	case x_height_tag:
1050	  printf(" x-Height (%u DU: %.2f em)", tag_info(t).value,
1051		 em_fract(tag_info(t).value));
1052	  break;
1053	case cap_height_tag:
1054	  printf(" Cap Height (%u DU: %.2f em)", tag_info(t).value,
1055		 em_fract(tag_info(t).value));
1056	  break;
1057	case max_ascent_tag:
1058	  printf(" Maximum Ascent (%u DU: %.2f em)", tag_info(t).value,
1059		 em_fract(tag_info(t).value));
1060	  break;
1061	case lower_ascent_tag:
1062	  printf(" Lowercase Ascent (%u DU: %.2f em)", tag_info(t).value,
1063		 em_fract(tag_info(t).value));
1064	  break;
1065	case underscore_thickness_tag:
1066	  printf(" Underscore Thickness (%u DU: %.2f em)", tag_info(t).value,
1067		 em_fract(tag_info(t).value));
1068	  break;
1069	case uppercase_accent_height_tag:
1070	  printf(" Uppercase Accent Height (%u DU: %.2f em)", tag_info(t).value,
1071		 em_fract(tag_info(t).value));
1072	  break;
1073	case lowercase_accent_height_tag:
1074	  printf(" Lowercase Accent Height (%u DU: %.2f em)", tag_info(t).value,
1075		 em_fract(tag_info(t).value));
1076	  break;
1077	case width_tag:
1078	  printf(" Horizontal Escapement array");
1079	  break;
1080	case vertical_escapement_tag:
1081	  printf(" Vertical Escapement array");
1082	  break;
1083	case right_extent_tag:
1084	  printf(" Right Extent array");
1085	  break;
1086	case ascent_tag:
1087	  printf(" Character Ascent array");
1088	  break;
1089	case pair_kern_tag:
1090	  f.seek(tag_info(t).value);
1091	  printf(" Kern Pairs (%u)", f.get_uint16());
1092	  break;
1093	case panose_tag:
1094	  printf(" PANOSE Classification array");
1095	  break;
1096	}
1097	break;
1098      case SIGNED_SHORT_TYPE:
1099	printf(" %5d", int16(tag_info(t).value));
1100	switch (i) {
1101	case slant_tag:
1102	  printf(" Slant (%.2f degrees)", double(tag_info(t).value) / 100);
1103	  break;
1104	case max_descent_tag:
1105	  printf(" Maximum Descent (%d DU: %.2f em)", int16(tag_info(t).value),
1106		 em_fract(int16(tag_info(t).value)));
1107	  break;
1108	case lower_descent_tag:
1109	  printf(" Lowercase Descent (%d DU: %.2f em)", int16(tag_info(t).value),
1110		 em_fract(int16(tag_info(t).value)));
1111	  break;
1112	case underscore_depth_tag:
1113	  printf(" Underscore Depth (%d DU: %.2f em)", int16(tag_info(t).value),
1114		 em_fract(int16(tag_info(t).value)));
1115	  break;
1116	case left_extent_tag:
1117	  printf(" Left Extent array");
1118	  break;
1119	// The type of this tag has changed from SHORT to SIGNED SHORT
1120	// in TFM version 1.3.0.
1121	case ascent_tag:
1122	  printf(" Character Ascent array");
1123	  break;
1124	case descent_tag:
1125	  printf(" Character Descent array");
1126	  break;
1127	}
1128	break;
1129      case RATIONAL_TYPE:
1130	printf(" %5u", tag_info(t).value);
1131	switch (i) {
1132	case inches_per_point_tag:
1133	  printf(" Inches per Point");
1134	  break;
1135	case nominal_point_size_tag:
1136	  printf(" Nominal Point Size");
1137	  break;
1138	case design_units_per_em_tag:
1139	  printf(" Design Units per Em");
1140	  break;
1141	case average_width_tag:
1142	  printf(" Average Width");
1143	  break;
1144	}
1145	if (tag_info(t).count == 1) {
1146	  f.seek(tag_info(t).value);
1147	  uint32 num = f.get_uint32();
1148	  uint32 den = f.get_uint32();
1149	  if (i == design_units_per_em_tag)
1150	    upem = double(num) / den;
1151	  printf(" (%u/%u = %g)", num, den, double(num)/den);
1152	}
1153	break;
1154      case ASCII_TYPE:
1155	printf(" %5u ", tag_info(t).value);
1156	switch (i) {
1157	case comment_tag:
1158	  printf("Comment ");
1159	  break;
1160	case copyright_tag:
1161	  printf("Copyright ");
1162	  break;
1163	case unique_identifier_tag:
1164	  printf("Unique ID ");
1165	  break;
1166	case font_name_tag:
1167	  printf("Typeface Name ");
1168	  break;
1169	case typeface_source_tag:
1170	  printf("Typeface Source ");
1171	  break;
1172	case typeface_tag:
1173	  printf("PCL Typeface ");
1174	  break;
1175	}
1176	dump_ascii(f, t);
1177      }
1178      putchar('\n');
1179    }
1180  }
1181  putchar('\n');
1182}
1183#undef em_fract
1184
1185static void
1186dump_ascii(File &f, tag_type t)
1187{
1188  putchar('"');
1189  if (tag_info(t).count > 4) {
1190    int count = tag_info(t).count;
1191    f.seek(tag_info(t).value);
1192    while (--count)
1193      printf("%c", f.get_byte());
1194  }
1195  else
1196    printf("%.4s", tag_info(t).orig_value);
1197  putchar('"');
1198}
1199
1200static void
1201dump_symbol_sets(File &f)
1202{
1203  uint32 symbol_set_dir_length = tag_info(symbol_set_tag).count;
1204  uint32 num_symbol_sets = symbol_set_dir_length / 14;
1205
1206  for (uint32 i = 0; i < num_symbol_sets; i++) {
1207    f.seek(tag_info(symbol_set_tag).value + i * 14);
1208    (void)f.get_uint32();		// offset to symbol set name
1209    uint32 off1 = f.get_uint32();	// offset to selection string
1210    uint32 off2 = f.get_uint32();	// offset to symbol set index array
1211    f.seek(off1);
1212    for (uint32 j = 0; j < off2 - off1; j++) {
1213      unsigned char c = f.get_byte();
1214      if ('0' <= c && c <= '9')
1215	putchar(c);
1216      else if ('A' <= c && c <= 'Z')
1217	printf(i < num_symbol_sets - 1 ? "%c," : "%c", c);
1218    }
1219  }
1220}
1221
1222static void
1223dump_symbols(int tfm_type)
1224{
1225  printf("Symbols:\n"
1226	 "\n"
1227	 " glyph id#     symbol set  name(s)\n"
1228	 "----------------------------------\n");
1229  for (uint32 i = 0; i < nchars; i++) {
1230    uint16 charcode = char_table[i].charcode;
1231    if (charcode < charcode_name_table_size
1232	&& charcode_name_table[charcode]) {
1233      if (char_table[i].symbol_set != NO_SYMBOL_SET) {
1234	printf(tfm_type == UNICODE ? "%4d (U+%04X)   (%3s %3d)  %s"
1235				   : "%4d (MSL %4d) (%3s %3d)  %s",
1236	       i, charcode,
1237	       show_symset(char_table[i].symbol_set),
1238	       char_table[i].code,
1239	       charcode_name_table[charcode]->name);
1240	for (name_list *p = charcode_name_table[charcode]->next;
1241	      p; p = p->next)
1242	  printf(", %s", p->name);
1243	putchar('\n');
1244      }
1245    }
1246    else {
1247      printf(tfm_type == UNICODE ? "%4d (U+%04X)   "
1248				 : "%4d (MSL %4d) ",
1249	     i, charcode);
1250      if (char_table[i].symbol_set != NO_SYMBOL_SET)
1251	printf("(%3s %3d)",
1252	       show_symset(char_table[i].symbol_set), char_table[i].code);
1253      putchar('\n');
1254    }
1255  }
1256  putchar('\n');
1257}
1258
1259static char *
1260show_symset(unsigned int symset)
1261{
1262   static char symset_str[8];
1263
1264   sprintf(symset_str, "%d%c", symset / 32, (symset & 31) + 64);
1265   return symset_str;
1266}
1267
1268static char *
1269hp_msl_to_ucode_name(int msl)
1270{
1271  char codestr[8];
1272
1273  sprintf(codestr, "%d", msl);
1274  const char *ustr = hp_msl_to_unicode_code(codestr);
1275  if (ustr == NULL)
1276    ustr = UNNAMED;
1277  else {
1278    char *nonum;
1279    int ucode = int(strtol(ustr, &nonum, 16));
1280    // don't allow PUA code points as Unicode names
1281    if (ucode >= 0xE000 && ucode <= 0xF8FF)
1282      ustr = UNNAMED;
1283  }
1284  if (!equal(ustr, UNNAMED)) {
1285    const char *uname_decomposed = decompose_unicode(ustr);
1286    if (uname_decomposed)
1287      // 1st char is the number of components
1288      ustr = uname_decomposed + 1;
1289  }
1290  char *value = new char[strlen(ustr) + 1];
1291  sprintf(value, equal(ustr, UNNAMED) ? ustr : "u%s", ustr);
1292  return value;
1293}
1294
1295static char *
1296unicode_to_ucode_name(int ucode)
1297{
1298  const char *ustr;
1299  char codestr[8];
1300
1301  // don't allow PUA code points as Unicode names
1302  if (ucode >= 0xE000 && ucode <= 0xF8FF)
1303    ustr = UNNAMED;
1304  else {
1305    sprintf(codestr, "%04X", ucode);
1306    ustr = codestr;
1307  }
1308  if (!equal(ustr, UNNAMED)) {
1309    const char *uname_decomposed = decompose_unicode(ustr);
1310    if (uname_decomposed)
1311      // 1st char is the number of components
1312      ustr = uname_decomposed + 1;
1313  }
1314  char *value = new char[strlen(ustr) + 1];
1315  sprintf(value, equal(ustr, UNNAMED) ? ustr : "u%s", ustr);
1316  return value;
1317}
1318
1319static int
1320is_uname(char *name)
1321{
1322  size_t i;
1323  size_t len = strlen(name);
1324  if (len % 5)
1325    return 0;
1326
1327  if (name[0] != 'u')
1328    return 0;
1329  for (i = 1; i < 4; i++)
1330    if (!csxdigit(name[i]))
1331      return 0;
1332  for (i = 5; i < len; i++)
1333    if (i % 5 ? !csxdigit(name[i]) : name[i] != '_')
1334      return 0;
1335
1336  return 1;
1337}
1338
1339static int
1340read_map(const char *file, const int tfm_type)
1341{
1342  errno = 0;
1343  FILE *fp = fopen(file, "r");
1344  if (!fp) {
1345    error("can't open `%1': %2", file, strerror(errno));
1346    return 0;
1347  }
1348  current_filename = file;
1349  char buf[512];
1350  current_lineno = 0;
1351  char *nonum;
1352  while (fgets(buf, int(sizeof(buf)), fp)) {
1353    current_lineno++;
1354    char *ptr = buf;
1355    while (csspace(*ptr))
1356      ptr++;
1357    if (*ptr == '\0' || *ptr == '#')
1358      continue;
1359    ptr = strtok(ptr, " \n\t");
1360    if (!ptr)
1361      continue;
1362
1363    int msl_code = int(strtol(ptr, &nonum, 10));
1364    if (*nonum != '\0') {
1365      if (csxdigit(*nonum))
1366	error("bad MSL map: got hex code (%1)", ptr);
1367      else if (ptr == nonum)
1368	error("bad MSL map: bad MSL code (%1)", ptr);
1369      else
1370	error("bad MSL map");
1371      fclose(fp);
1372      return 0;
1373    }
1374
1375    ptr = strtok(NULL, " \n\t");
1376    if (!ptr)
1377      continue;
1378    int unicode = int(strtol(ptr, &nonum, 16));
1379    if (*nonum != '\0') {
1380      if (ptr == nonum)
1381	error("bad Unicode value (%1)", ptr);
1382      else
1383	error("bad Unicode map");
1384      fclose(fp);
1385      return 0;
1386    }
1387    if (strlen(ptr) != 4) {
1388      error("bad Unicode value (%1)", ptr);
1389      return 0;
1390    }
1391
1392    int n = tfm_type == MSL ? msl_code : unicode;
1393    if (tfm_type == UNICODE && n > 0xFFFF) {
1394      // greatest value supported by TFM files
1395      error("bad Unicode value (%1): greatest value is 0xFFFF", ptr);
1396      fclose(fp);
1397      return 0;
1398    }
1399    else if (n < 0) {
1400      error("negative code value (%1)", ptr);
1401      fclose(fp);
1402      return 0;
1403    }
1404
1405    ptr = strtok(NULL, " \n\t");
1406    if (!ptr) {					// groff name
1407      error("missing name(s)");
1408      fclose(fp);
1409      return 0;
1410    }
1411    // leave decomposed Unicode values alone
1412    else if (is_uname(ptr) && !is_decomposed(ptr))
1413      ptr = unicode_to_ucode_name(strtol(ptr + 1, &nonum, 16));
1414
1415    if (size_t(n) >= charcode_name_table_size) {
1416      size_t old_size = charcode_name_table_size;
1417      name_list **old_table = charcode_name_table;
1418      charcode_name_table_size = n + 256;
1419      charcode_name_table = new name_list *[charcode_name_table_size];
1420      if (old_table) {
1421	memcpy(charcode_name_table, old_table, old_size*sizeof(name_list *));
1422	a_delete old_table;
1423      }
1424      for (size_t i = old_size; i < charcode_name_table_size; i++)
1425	charcode_name_table[i] = NULL;
1426    }
1427
1428    // a '#' that isn't the first groff name begins a comment
1429    for (int names = 1; ptr; ptr = strtok(NULL, " \n\t")) {
1430      if (names++ > 1 && *ptr == '#')
1431	break;
1432      charcode_name_table[n] = new name_list(ptr, charcode_name_table[n]);
1433    }
1434  }
1435  fclose(fp);
1436  return 1;
1437}
1438
1439static const char *
1440xbasename(const char *s)
1441{
1442  // DIR_SEPS[] are possible directory separator characters, see
1443  // nonposix.h.  We want the rightmost separator of all possible
1444  // ones.  Example: d:/foo\\bar.
1445  const char *b = strrchr(s, DIR_SEPS[0]), *b1;
1446  const char *sep = &DIR_SEPS[1];
1447
1448  while (*sep)
1449    {
1450      b1 = strrchr(s, *sep);
1451      if (b1 && (!b || b1 > b))
1452	b = b1;
1453      sep++;
1454    }
1455  return b ? b + 1 : s;
1456}
1457