1" Vim indent file 2" Language: DTD (Document Type Definition for XML) 3" Maintainer: Nikolai Weibull <now@bitwi.se> 4" Latest Revision: 2008-07-18 5 6let s:cpo_save = &cpo 7set cpo&vim 8 9setlocal indentexpr=GetDTDIndent() 10setlocal indentkeys=!^F,o,O,> 11setlocal nosmartindent 12 13if exists("*GetDTDIndent") 14 finish 15endif 16 17" TODO: Needs to be adjusted to stop at [, <, and ]. 18let s:token_pattern = '^[^[:space:]]\+' 19 20function s:lex1(input, start, ...) 21 let pattern = a:0 > 0 ? a:1 : s:token_pattern 22 let start = matchend(a:input, '^\_s*', a:start) 23 if start == -1 24 return ["", a:start] 25 endif 26 let end = matchend(a:input, pattern, start) 27 if end == -1 28 return ["", a:start] 29 endif 30 let token = strpart(a:input, start, end - start) 31 return [token, end] 32endfunction 33 34function s:lex(input, start, ...) 35 let pattern = a:0 > 0 ? a:1 : s:token_pattern 36 let info = s:lex1(a:input, a:start, pattern) 37 while info[0] == '--' 38 let info = s:lex1(a:input, info[1], pattern) 39 while info[0] != "" && info[0] != '--' 40 let info = s:lex1(a:input, info[1], pattern) 41 endwhile 42 if info[0] == "" 43 return info 44 endif 45 let info = s:lex1(a:input, info[1], pattern) 46 endwhile 47 return info 48endfunction 49 50function s:indent_to_innermost_parentheses(line, end) 51 let token = '(' 52 let end = a:end 53 let parentheses = [end - 1] 54 while token != "" 55 let [token, end] = s:lex(a:line, end, '^\%([(),|]\|[A-Za-z0-9_-]\+\)[?*+]\=') 56 if token[0] == '(' 57 call add(parentheses, end - 1) 58 elseif token[0] == ')' 59 if len(parentheses) == 1 60 return [-1, end] 61 endif 62 call remove(parentheses, -1) 63 endif 64 endwhile 65 return [parentheses[-1] - strridx(a:line, "\n", parentheses[-1]), end] 66endfunction 67 68" TODO: Line and end could be script global (think OO members). 69function GetDTDIndent() 70 if v:lnum == 1 71 return 0 72 endif 73 74 " Begin by searching back for a <! that isn’t inside a comment. 75 " From here, depending on what follows immediately after, parse to 76 " where we’re at to determine what to do. 77 if search('<!', 'bceW') == 0 78 return indent(v:lnum - 1) 79 endif 80 let lnum = line('.') 81 let col = col('.') 82 let indent = indent('.') 83 let line = join(getline(lnum, v:lnum - 1), "\n") 84 85 let [declaration, end] = s:lex1(line, col) 86 if declaration == "" 87 return indent + &sw 88 elseif declaration == '--' 89 " We’re looking at a comment. Now, simply determine if the comment is 90 " terminated or not. If it isn’t, let Vim take care of that using 91 " 'comments' and 'autoindent'. Otherwise, indent to the first lines level. 92 while declaration != "" 93 let [declaration, end] = s:lex(line, end) 94 if declaration == "-->" 95 return indent 96 endif 97 endwhile 98 return -1 99 elseif declaration == 'ELEMENT' 100 " Check for element name. If none exists, indent one level. 101 let [name, end] = s:lex(line, end) 102 if name == "" 103 return indent + &sw 104 endif 105 106 " Check for token following element name. This can be a specification of 107 " whether the start or end tag may be omitted. If nothing is found, indent 108 " one level. 109 let [token, end] = s:lex(line, end) 110 let n = 0 111 while token =~ '[-O]' && n < 2 112 let [token, end] = s:lex(line, end, '^\%([-O(]\|ANY\|EMPTY\)') 113 let n += 1 114 endwhile 115 if token == "" 116 return indent + &sw 117 endif 118 119 " Next comes the content model. If the token we’ve found isn’t a 120 " parenthesis it must be either ANY, EMPTY or some random junk. Either 121 " way, we’re done indenting this element, so set it to that of the first 122 " line so that the terminating “>” winds up having the same indention. 123 if token != '(' 124 return indent 125 endif 126 127 " Now go through the content model. We need to keep track of the nesting 128 " of parentheses. As soon as we hit 0 we’re done. If that happens we must 129 " have a complete content model. Thus set indention to be the same as that 130 " of the first line so that the terminating “>” winds up having the same 131 " indention. Otherwise, we’ll indent to the innermost parentheses not yet 132 " matched. 133 let [indent_of_innermost, end] = s:indent_to_innermost_parentheses(line, end) 134 if indent_of_innermost != -1 135 return indent_of_innermost 136 endif 137 138 " Finally, look for any additions and/or exceptions to the content model. 139 " This is defined by a “+” or “-” followed by another content model 140 " declaration. 141 " TODO: Can the “-” be separated by whitespace from the “(”? 142 let seen = { '+(': 0, '-(': 0 } 143 while 1 144 let [additions_exceptions, end] = s:lex(line, end, '^[+-](') 145 if additions_exceptions != '+(' && additions_exceptions != '-(' 146 let [token, end] = s:lex(line, end) 147 if token == '>' 148 return indent 149 endif 150 " TODO: Should use s:lex here on getline(v:lnum) and check for >. 151 return getline(v:lnum) =~ '^\s*>' || count(values(seen), 0) == 0 ? indent : (indent + &sw) 152 endif 153 154 " If we’ve seen an addition or exception already and this is of the same 155 " kind, the user is writing a broken DTD. Time to bail. 156 if seen[additions_exceptions] 157 return indent 158 endif 159 let seen[additions_exceptions] = 1 160 161 let [indent_of_innermost, end] = s:indent_to_innermost_parentheses(line, end) 162 if indent_of_innermost != -1 163 return indent_of_innermost 164 endif 165 endwhile 166 elseif declaration == 'ATTLIST' 167 " Check for element name. If none exists, indent one level. 168 let [name, end] = s:lex(line, end) 169 if name == "" 170 return indent + &sw 171 endif 172 173 " Check for any number of attributes. 174 while 1 175 " Check for attribute name. If none exists, indent one level, unless the 176 " current line is a lone “>”, in which case we indent to the same level 177 " as the first line. Otherwise, if the attribute name is “>”, we have 178 " actually hit the end of the attribute list, in which case we indent to 179 " the same level as the first line. 180 let [name, end] = s:lex(line, end) 181 if name == "" 182 " TODO: Should use s:lex here on getline(v:lnum) and check for >. 183 return getline(v:lnum) =~ '^\s*>' ? indent : (indent + &sw) 184 elseif name == ">" 185 return indent 186 endif 187 188 " Check for attribute value declaration. If none exists, indent two 189 " levels. Otherwise, if it’s an enumerated value, check for nested 190 " parentheses and indent to the innermost one if we don’t reach the end 191 " of the listc. Otherwise, just continue with looking for the default 192 " attribute value. 193 " TODO: Do validation of keywords 194 " (CDATA|NMTOKEN|NMTOKENS|ID|IDREF|IDREFS|ENTITY|ENTITIES)? 195 let [value, end] = s:lex(line, end, '^\%((\|[^[:space:]]\+\)') 196 if value == "" 197 return indent + &sw * 2 198 elseif value == 'NOTATION' 199 " If this is a enumerated value based on notations, read another token 200 " for the actual value. If it doesn’t exist, indent three levels. 201 " TODO: If validating according to above, value must be equal to '('. 202 let [value, end] = s:lex(line, end, '^\%((\|[^[:space:]]\+\)') 203 if value == "" 204 return indent + &sw * 3 205 endif 206 endif 207 208 if value == '(' 209 let [indent_of_innermost, end] = s:indent_to_innermost_parentheses(line, end) 210 if indent_of_innermost != -1 211 return indent_of_innermost 212 endif 213 endif 214 215 " Finally look for the attribute’s default value. If non exists, indent 216 " two levels. 217 " TODO: Do validation of keywords (#REQUIRED|#IMPLIED)? 218 let [default, end] = s:lex(line, end, '^\%("\_[^"]*"\|[^[:space:]]\+\)') 219 if default == "" 220 return indent + &sw * 2 221 elseif default == '#FIXED' 222 " We need to look for the fixed value. If non exists, indent three 223 " levels. 224 let [default, end] = s:lex(line, end, '^"\_[^"]*"') 225 if default == "" 226 return indent + &sw * 3 227 endif 228 endif 229 endwhile 230 elseif declaration == 'ENTITY' 231 " Check for entity name. If none exists, indent one level. Otherwise, if 232 " the name actually turns out to be a percent sign, “%”, this is a 233 " parameter entity. Read another token to determine the entity name and, 234 " again, if none exists, indent one level. 235 let [name, end] = s:lex(line, end) 236 if name == "" 237 return indent + &sw 238 elseif name == '%' 239 let [name, end] = s:lex(line, end) 240 if name == "" 241 return indent + &sw 242 endif 243 endif 244 245 " Now check for the entity value. If none exists, indent one level. If it 246 " does exist, indent to same level as first line, as we’re now done with 247 " this entity. 248 " 249 " The entity value can be a string in single or double quotes (no escapes 250 " to worry about, as entities are used instead). However, it can also be 251 " that this is an external unparsed entity. In that case we have to look 252 " further for (possibly) a public ID and an URI followed by the NDATA 253 " keyword and the actual notation name. For the public ID and URI, indent 254 " two levels, if they don’t exist. If the NDATA keyword doesn’t exist, 255 " indent one level. Otherwise, if the actual notation name doesn’t exist, 256 " indent two level. If it does, indent to same level as first line, as 257 " we’re now done with this entity. 258 let [value, end] = s:lex(line, end) 259 if value == "" 260 return indent + &sw 261 elseif value == 'SYSTEM' || value == 'PUBLIC' 262 let [quoted_string, end] = s:lex(line, end, '\%("[^"]\+"\|''[^'']\+''\)') 263 if quoted_string == "" 264 return indent + &sw * 2 265 endif 266 267 if value == 'PUBLIC' 268 let [quoted_string, end] = s:lex(line, end, '\%("[^"]\+"\|''[^'']\+''\)') 269 if quoted_string == "" 270 return indent + &sw * 2 271 endif 272 endif 273 274 let [ndata, end] = s:lex(line, end) 275 if ndata == "" 276 return indent + &sw 277 endif 278 279 let [name, end] = s:lex(line, end) 280 return name == "" ? (indent + &sw * 2) : indent 281 else 282 return indent 283 endif 284 elseif declaration == 'NOTATION' 285 " Check for notation name. If none exists, indent one level. 286 let [name, end] = s:lex(line, end) 287 if name == "" 288 return indent + &sw 289 endif 290 291 " Now check for the external ID. If none exists, indent one level. 292 let [id, end] = s:lex(line, end) 293 if id == "" 294 return indent + &sw 295 elseif id == 'SYSTEM' || id == 'PUBLIC' 296 let [quoted_string, end] = s:lex(line, end, '\%("[^"]\+"\|''[^'']\+''\)') 297 if quoted_string == "" 298 return indent + &sw * 2 299 endif 300 301 if id == 'PUBLIC' 302 let [quoted_string, end] = s:lex(line, end, '\%("[^"]\+"\|''[^'']\+''\|>\)') 303 if quoted_string == "" 304 " TODO: Should use s:lex here on getline(v:lnum) and check for >. 305 return getline(v:lnum) =~ '^\s*>' ? indent : (indent + &sw * 2) 306 elseif quoted_string == '>' 307 return indent 308 endif 309 endif 310 endif 311 312 return indent 313 endif 314 315 " TODO: Processing directives could be indented I suppose. But perhaps it’s 316 " just as well to let the user decide how to indent them (perhaps extending 317 " this function to include proper support for whatever processing directive 318 " language they want to use). 319 320 " Conditional sections are simply passed along to let Vim decide what to do 321 " (and hence the user). 322 return -1 323endfunction 324 325let &cpo = s:cpo_save 326