1#!/usr/bin/env ruby
2
3# example:
4# ./enc-emoji4unicode.rb emoji4unicode.xml > ../enc/trans/emoji-exchange-tbl.rb
5
6require 'rexml/document'
7require File.expand_path("../transcode-tblgen", __FILE__)
8
9class EmojiTable
10  VERBOSE_MODE = false
11
12  def initialize(xml_path)
13    @doc = REXML::Document.new File.open(xml_path)
14    @kddi_undoc = make_kddi_undoc_map()
15  end
16
17  def conversion(from_carrier, to_carrier, &block)
18    REXML::XPath.each(@doc.root, '//e') do |e|
19      from = e.attribute(from_carrier.downcase).to_s
20      to = e.attribute(to_carrier.downcase).to_s
21      text_fallback = e.attribute('text_fallback').to_s
22      name = e.attribute('name').to_s
23      if from =~ /^(?:\*|\+)(.+)$/ # proposed or unified
24        from = $1
25      end
26      if from.empty? || from !~ /^[0-9A-F]+$/
27        # do nothing
28      else
29        from_utf8 = [from.hex].pack("U").unpack("H*").first
30        if to =~ /^(?:>|\*)?([0-9A-F\+]+)$/
31          str_to = $1
32          if str_to =~ /^\+/ # unicode "proposed" begins at "+"
33            proposal = true
34            str_to.sub!(/^\+/, '')
35          else
36            proposal = false
37          end
38          tos = str_to.split('+')
39          to_utf8 = tos.map(&:hex).pack("U*").unpack("H*").first
40          comment = "[%s] U+%X -> %s" % [name, from.hex, tos.map{|c| "U+%X"%c.hex}.join(' ')]
41          block.call(:from => from_utf8,
42                     :to => to_utf8,
43                     :comment => comment,
44                     :fallback => false,
45                     :proposal => proposal)
46        elsif to.empty?
47          if text_fallback.empty?
48            comment = "[%s] U+%X -> U+3013 (GETA)" % [name, from.hex]
49            block.call(:from => from_utf8,
50                       :to => "\u{3013}".unpack("H*").first,
51                       :comment => comment, # geta
52                       :fallback => true,
53                       :proposal => false)
54          else
55            to_utf8 = text_fallback.unpack("H*").first
56            comment = %([%s] U+%X -> "%s") % [name, from.hex, text_fallback]
57            block.call(:from => from_utf8,
58                       :to => to_utf8,
59                       :comment => comment,
60                       :fallback => true,
61                       :proposal => false)
62          end
63        else
64          raise "something wrong: %s -> %s" % [from, to]
65        end
66      end
67    end
68  end
69
70  def generate(io, from_carrier, to_carrier)
71    from_encoding = (from_carrier == "Unicode") ? "UTF-8" : "UTF8-"+from_carrier
72    to_encoding   = (to_carrier == "Unicode" )  ? "UTF-8" : "UTF8-"+to_carrier
73      io.puts "EMOJI_EXCHANGE_TBL['#{from_encoding}']['#{to_encoding}'] = ["
74      io.puts "  # for documented codepoints" if from_carrier == "KDDI"
75      self.conversion(from_carrier, to_carrier) do |params|
76        from, to = params[:from], %Q{"#{params[:to]}"}
77        to = ":undef" if params[:fallback] || params[:proposal]
78        io.puts %{  ["#{from}", #{to}], # #{params[:comment]}}
79      end
80      if from_carrier == "KDDI"
81        io.puts "  # for undocumented codepoints"
82        self.conversion(from_carrier, to_carrier) do |params|
83          from, to = params[:from], %Q{"#{params[:to]}"}
84          to = ":undef" if params[:fallback] || params[:proposal]
85          unicode = utf8_to_ucs(from)
86          undoc = ucs_to_utf8(@kddi_undoc[unicode])
87          io.puts %{  ["#{undoc}", #{to}], # #{params[:comment]}}
88        end
89      end
90      io.puts "]"
91      io.puts
92  end
93
94  private
95
96  def utf8_to_ucs(cp)
97    return [cp].pack("H*").unpack("U*").first
98  end
99
100  def ucs_to_utf8(cp)
101    return [cp].pack("U*").unpack("H*").first
102  end
103
104  def make_kddi_undoc_map()
105    pub_to_sjis = citrus_decode_mapsrc(
106      "mskanji", 2, "UCS/EMOJI_SHIFT_JIS-KDDI").sort_by{|u, s| s}
107    sjis_to_undoc = citrus_decode_mapsrc(
108      "mskanji", 2, "EMOJI_SHIFT_JIS-KDDI-UNDOC/UCS").sort_by{|s, u| s}
109    return pub_to_sjis.zip(sjis_to_undoc).inject({}) {|h, rec|
110      raise "no match sjis codepoint" if rec[0][1] != rec[1][0]
111      h[rec[0][0]] = rec[1][1]
112      next h
113    }
114  end
115end
116
117if ARGV.empty?
118  puts "usage: #$0 [emoji4unicode.xml]"
119  exit 1
120end
121$srcdir = File.expand_path("../../enc/trans", __FILE__)
122emoji_table = EmojiTable.new(ARGV[0])
123
124companies = %w(DoCoMo KDDI SoftBank Unicode)
125
126io = STDOUT
127io.puts "EMOJI_EXCHANGE_TBL = Hash.new{|h,k| h[k] = {}}"
128companies.each do |from_company|
129  companies.each do |to_company|
130    next if from_company == to_company
131    emoji_table.generate(io, from_company, to_company)
132  end
133end
134