1#!/usr/bin/env ruby 2 3# example: 4# ./enc-emoji4unicode.rb emoji4unicode.xml > ../enc/trans/emoji-exchange-tbl.rb 5 6require 'rexml/document' 7require File.expand_path("../transcode-tblgen", __FILE__) 8 9class EmojiTable 10 VERBOSE_MODE = false 11 12 def initialize(xml_path) 13 @doc = REXML::Document.new File.open(xml_path) 14 @kddi_undoc = make_kddi_undoc_map() 15 end 16 17 def conversion(from_carrier, to_carrier, &block) 18 REXML::XPath.each(@doc.root, '//e') do |e| 19 from = e.attribute(from_carrier.downcase).to_s 20 to = e.attribute(to_carrier.downcase).to_s 21 text_fallback = e.attribute('text_fallback').to_s 22 name = e.attribute('name').to_s 23 if from =~ /^(?:\*|\+)(.+)$/ # proposed or unified 24 from = $1 25 end 26 if from.empty? || from !~ /^[0-9A-F]+$/ 27 # do nothing 28 else 29 from_utf8 = [from.hex].pack("U").unpack("H*").first 30 if to =~ /^(?:>|\*)?([0-9A-F\+]+)$/ 31 str_to = $1 32 if str_to =~ /^\+/ # unicode "proposed" begins at "+" 33 proposal = true 34 str_to.sub!(/^\+/, '') 35 else 36 proposal = false 37 end 38 tos = str_to.split('+') 39 to_utf8 = tos.map(&:hex).pack("U*").unpack("H*").first 40 comment = "[%s] U+%X -> %s" % [name, from.hex, tos.map{|c| "U+%X"%c.hex}.join(' ')] 41 block.call(:from => from_utf8, 42 :to => to_utf8, 43 :comment => comment, 44 :fallback => false, 45 :proposal => proposal) 46 elsif to.empty? 47 if text_fallback.empty? 48 comment = "[%s] U+%X -> U+3013 (GETA)" % [name, from.hex] 49 block.call(:from => from_utf8, 50 :to => "\u{3013}".unpack("H*").first, 51 :comment => comment, # geta 52 :fallback => true, 53 :proposal => false) 54 else 55 to_utf8 = text_fallback.unpack("H*").first 56 comment = %([%s] U+%X -> "%s") % [name, from.hex, text_fallback] 57 block.call(:from => from_utf8, 58 :to => to_utf8, 59 :comment => comment, 60 :fallback => true, 61 :proposal => false) 62 end 63 else 64 raise "something wrong: %s -> %s" % [from, to] 65 end 66 end 67 end 68 end 69 70 def generate(io, from_carrier, to_carrier) 71 from_encoding = (from_carrier == "Unicode") ? "UTF-8" : "UTF8-"+from_carrier 72 to_encoding = (to_carrier == "Unicode" ) ? "UTF-8" : "UTF8-"+to_carrier 73 io.puts "EMOJI_EXCHANGE_TBL['#{from_encoding}']['#{to_encoding}'] = [" 74 io.puts " # for documented codepoints" if from_carrier == "KDDI" 75 self.conversion(from_carrier, to_carrier) do |params| 76 from, to = params[:from], %Q{"#{params[:to]}"} 77 to = ":undef" if params[:fallback] || params[:proposal] 78 io.puts %{ ["#{from}", #{to}], # #{params[:comment]}} 79 end 80 if from_carrier == "KDDI" 81 io.puts " # for undocumented codepoints" 82 self.conversion(from_carrier, to_carrier) do |params| 83 from, to = params[:from], %Q{"#{params[:to]}"} 84 to = ":undef" if params[:fallback] || params[:proposal] 85 unicode = utf8_to_ucs(from) 86 undoc = ucs_to_utf8(@kddi_undoc[unicode]) 87 io.puts %{ ["#{undoc}", #{to}], # #{params[:comment]}} 88 end 89 end 90 io.puts "]" 91 io.puts 92 end 93 94 private 95 96 def utf8_to_ucs(cp) 97 return [cp].pack("H*").unpack("U*").first 98 end 99 100 def ucs_to_utf8(cp) 101 return [cp].pack("U*").unpack("H*").first 102 end 103 104 def make_kddi_undoc_map() 105 pub_to_sjis = citrus_decode_mapsrc( 106 "mskanji", 2, "UCS/EMOJI_SHIFT_JIS-KDDI").sort_by{|u, s| s} 107 sjis_to_undoc = citrus_decode_mapsrc( 108 "mskanji", 2, "EMOJI_SHIFT_JIS-KDDI-UNDOC/UCS").sort_by{|s, u| s} 109 return pub_to_sjis.zip(sjis_to_undoc).inject({}) {|h, rec| 110 raise "no match sjis codepoint" if rec[0][1] != rec[1][0] 111 h[rec[0][0]] = rec[1][1] 112 next h 113 } 114 end 115end 116 117if ARGV.empty? 118 puts "usage: #$0 [emoji4unicode.xml]" 119 exit 1 120end 121$srcdir = File.expand_path("../../enc/trans", __FILE__) 122emoji_table = EmojiTable.new(ARGV[0]) 123 124companies = %w(DoCoMo KDDI SoftBank Unicode) 125 126io = STDOUT 127io.puts "EMOJI_EXCHANGE_TBL = Hash.new{|h,k| h[k] = {}}" 128companies.each do |from_company| 129 companies.each do |to_company| 130 next if from_company == to_company 131 emoji_table.generate(io, from_company, to_company) 132 end 133end 134