1# coding: utf-8
2
3##
4# For RDoc::Text#to_html
5
6require 'strscan'
7
8##
9# For RDoc::Text#snippet
10
11begin
12  gem 'json'
13rescue Gem::LoadError
14end
15
16require 'json'
17
18##
19# Methods for manipulating comment text
20
21module RDoc::Text
22
23  ##
24  # Maps markup formats to classes that can parse them.  If the format is
25  # unknown, "rdoc" format is used.
26
27  MARKUP_FORMAT = {
28    'markdown' => RDoc::Markdown,
29    'rdoc'     => RDoc::Markup,
30    'rd'       => RDoc::RD,
31    'tomdoc'   => RDoc::TomDoc,
32  }
33
34  MARKUP_FORMAT.default = RDoc::Markup
35
36  ##
37  # Maps an encoding to a Hash of characters properly transcoded for that
38  # encoding.
39  #
40  # See also encode_fallback.
41
42  TO_HTML_CHARACTERS = Hash.new do |h, encoding|
43    h[encoding] = {
44      :close_dquote => encode_fallback('”', encoding, '"'),
45      :close_squote => encode_fallback('’', encoding, '\''),
46      :copyright    => encode_fallback('©', encoding, '(c)'),
47      :ellipsis     => encode_fallback('…', encoding, '...'),
48      :em_dash      => encode_fallback('—', encoding, '---'),
49      :en_dash      => encode_fallback('–', encoding, '--'),
50      :open_dquote  => encode_fallback('“', encoding, '"'),
51      :open_squote  => encode_fallback('‘', encoding, '\''),
52      :trademark    => encode_fallback('®', encoding, '(r)'),
53    }
54  end if Object.const_defined? :Encoding
55
56  ##
57  # Transcodes +character+ to +encoding+ with a +fallback+ character.
58
59  def self.encode_fallback character, encoding, fallback
60    character.encode(encoding, :fallback => { character => fallback },
61                     :undef => :replace, :replace => fallback)
62  end
63
64  ##
65  # Expands tab characters in +text+ to eight spaces
66
67  def expand_tabs text
68    expanded = []
69
70    text.each_line do |line|
71      line.gsub!(/^((?:.{8})*?)([^\t\r\n]{0,7})\t/) do
72        r = "#{$1}#{$2}#{' ' * (8 - $2.size)}"
73        r.force_encoding text.encoding if Object.const_defined? :Encoding
74        r
75      end until line !~ /\t/
76
77      expanded << line
78    end
79
80    expanded.join
81  end
82
83  ##
84  # Flush +text+ left based on the shortest line
85
86  def flush_left text
87    indent = 9999
88
89    text.each_line do |line|
90      line_indent = line =~ /\S/ || 9999
91      indent = line_indent if indent > line_indent
92    end
93
94    empty = ''
95    empty.force_encoding text.encoding if Object.const_defined? :Encoding
96
97    text.gsub(/^ {0,#{indent}}/, empty)
98  end
99
100  ##
101  # Convert a string in markup format into HTML.
102  #
103  # Requires the including class to implement #formatter
104
105  def markup text
106    parse(text).accept formatter
107  end
108
109  ##
110  # Strips hashes, expands tabs then flushes +text+ to the left
111
112  def normalize_comment text
113    return text if text.empty?
114
115    text = strip_stars    text
116    text = strip_hashes   text
117    text = expand_tabs    text
118    text = flush_left     text
119    text = strip_newlines text
120    text
121  end
122
123  ##
124  # Normalizes +text+ then builds a RDoc::Markup::Document from it
125
126  def parse text, format = 'rdoc'
127    return text if RDoc::Markup::Document === text
128    return text.parse if RDoc::Comment === text
129
130    text = normalize_comment text # TODO remove, should not be necessary
131
132    return RDoc::Markup::Document.new if text =~ /\A\n*\z/
133
134    MARKUP_FORMAT[format].parse text
135  end
136
137  ##
138  # The first +limit+ characters of +text+ as HTML
139
140  def snippet text, limit = 100
141    document = parse text
142
143    RDoc::Markup::ToHtmlSnippet.new(limit).convert document
144  end
145
146  ##
147  # Strips leading # characters from +text+
148
149  def strip_hashes text
150    return text if text =~ /^(?>\s*)[^\#]/
151
152    empty = ''
153    empty.force_encoding text.encoding if Object.const_defined? :Encoding
154
155    text.gsub(/^\s*(#+)/) { $1.tr '#', ' ' }.gsub(/^\s+$/, empty)
156  end
157
158  ##
159  # Strips leading and trailing \n characters from +text+
160
161  def strip_newlines text
162    text.gsub(/\A\n*(.*?)\n*\z/m) do $1 end # block preserves String encoding
163  end
164
165  ##
166  # Strips /* */ style comments
167
168  def strip_stars text
169    return text unless text =~ %r%/\*.*\*/%m
170
171    encoding = text.encoding if Object.const_defined? :Encoding
172
173    text = text.gsub %r%Document-method:\s+[\w:.#=!?]+%, ''
174
175    space = ' '
176    space.force_encoding encoding if encoding
177
178    text.sub!  %r%/\*+%       do space * $&.length end
179    text.sub!  %r%\*+/%       do space * $&.length end
180    text.gsub! %r%^[ \t]*\*%m do space * $&.length end
181
182    empty = ''
183    empty.force_encoding encoding if encoding
184    text.gsub(/^\s+$/, empty)
185  end
186
187  ##
188  # Converts ampersand, dashes, ellipsis, quotes, copyright and registered
189  # trademark symbols in +text+ to properly encoded characters.
190
191  def to_html text
192    if Object.const_defined? :Encoding then
193      html = ''.encode text.encoding
194
195      encoded = RDoc::Text::TO_HTML_CHARACTERS[text.encoding]
196    else
197      html = ''
198      encoded = {
199        :close_dquote => '���',
200        :close_squote => '���',
201        :copyright    => '��',
202        :ellipsis     => '���',
203        :em_dash      => '���',
204        :en_dash      => '���',
205        :open_dquote  => '���',
206        :open_squote  => '���',
207        :trademark    => '��',
208      }
209    end
210
211    s = StringScanner.new text
212    insquotes = false
213    indquotes = false
214    after_word = nil
215
216    until s.eos? do
217      case
218      when s.scan(/<(tt|code)>.*?<\/\1>/) then # skip contents of tt
219        html << s.matched.gsub('\\\\', '\\')
220      when s.scan(/<(tt|code)>.*?/) then
221        warn "mismatched <#{s[1]}> tag" # TODO signal file/line
222        html << s.matched
223      when s.scan(/<[^>]+\/?s*>/) then # skip HTML tags
224        html << s.matched
225      when s.scan(/\\(\S)/) then # unhandled suppressed crossref
226        html << s[1]
227        after_word = nil
228      when s.scan(/\.\.\.(\.?)/) then
229        html << s[1] << encoded[:ellipsis]
230        after_word = nil
231      when s.scan(/\(c\)/) then
232        html << encoded[:copyright]
233        after_word = nil
234      when s.scan(/\(r\)/) then
235        html << encoded[:trademark]
236        after_word = nil
237      when s.scan(/---/) then
238        html << encoded[:em_dash]
239        after_word = nil
240      when s.scan(/--/) then
241        html << encoded[:en_dash]
242        after_word = nil
243      when s.scan(/&quot;|"/) then
244        html << encoded[indquotes ? :close_dquote : :open_dquote]
245        indquotes = !indquotes
246        after_word = nil
247      when s.scan(/``/) then # backtick double quote
248        html << encoded[:open_dquote]
249        after_word = nil
250      when s.scan(/''/) then # tick double quote
251        html << encoded[:close_dquote]
252        after_word = nil
253      when s.scan(/'/) then # single quote
254        if insquotes
255          html << encoded[:close_squote]
256          insquotes = false
257        elsif after_word
258          # Mary's dog, my parents' house: do not start paired quotes
259          html << encoded[:close_squote]
260        else
261          html << encoded[:open_squote]
262          insquotes = true
263        end
264
265        after_word = nil
266      else # advance to the next potentially significant character
267        match = s.scan(/.+?(?=[<\\.("'`&-])/) #"
268
269        if match then
270          html << match
271          after_word = match =~ /\w$/
272        else
273          html << s.rest
274          break
275        end
276      end
277    end
278
279    html
280  end
281
282  ##
283  # Wraps +txt+ to +line_len+
284
285  def wrap(txt, line_len = 76)
286    res = []
287    sp = 0
288    ep = txt.length
289
290    while sp < ep
291      # scan back for a space
292      p = sp + line_len - 1
293      if p >= ep
294        p = ep
295      else
296        while p > sp and txt[p] != ?\s
297          p -= 1
298        end
299        if p <= sp
300          p = sp + line_len
301          while p < ep and txt[p] != ?\s
302            p += 1
303          end
304        end
305      end
306      res << txt[sp...p] << "\n"
307      sp = p
308      sp += 1 while sp < ep and txt[sp] == ?\s
309    end
310
311    res.join.strip
312  end
313
314end
315
316