1;;; cc-awk.el --- AWK specific code within cc-mode. 2 3;; Copyright (C) 1988, 1994, 1996, 2000, 2001, 2002, 2003, 2004, 2005, 4;; 2006, 2007 Free Software Foundation, Inc. 5 6;; Author: Alan Mackenzie <acm@muc.de> (originally based on awk-mode.el) 7;; Maintainer: FSF 8;; Keywords: AWK, cc-mode, unix, languages 9 10;; This file is part of GNU Emacs. 11 12;; GNU Emacs is free software; you can redistribute it and/or modify 13;; it under the terms of the GNU General Public License as published by 14;; the Free Software Foundation; either version 2, or (at your option) 15;; any later version. 16 17;; GNU Emacs is distributed in the hope that it will be useful, 18;; but WITHOUT ANY WARRANTY; without even the implied warranty of 19;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 20;; GNU General Public License for more details. 21 22;; You should have received a copy of the GNU General Public License 23;; along with this program; see the file COPYING. If not, write to the 24;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 25;; Boston, MA 02110-1301, USA. 26 27;;; Commentary: 28 29;; This file contains (most of) the adaptations to cc-mode required for the 30;; integration of AWK Mode. 31;; It is organised thusly, the sections being separated by page breaks: 32;; 1. The AWK Mode syntax table. 33;; 2. Regular expressions for analysing AWK code. 34;; 3. Indentation calculation stuff ("c-awk-NL-prop text-property"). 35;; 4. Syntax-table property/font-locking stuff, including the 36;; font-lock-keywords setting. 37;; 5. The AWK Mode before/after-change-functions. 38;; 6. AWK Mode specific versions of commands like beginning-of-defun. 39;; The AWK Mode keymap, abbreviation table, and the mode function itself are 40;; in cc-mode.el. 41 42;;; Code: 43 44(eval-when-compile 45 (let ((load-path 46 (if (and (boundp 'byte-compile-dest-file) 47 (stringp byte-compile-dest-file)) 48 (cons (file-name-directory byte-compile-dest-file) load-path) 49 load-path))) 50 (load "cc-bytecomp" nil t))) 51 52(cc-require 'cc-defs) 53 54;; Silence the byte compiler. 55(cc-bytecomp-defvar font-lock-mode) ; Checked with boundp before use. 56 57;; Some functions in cc-engine that are used below. There's a cyclic 58;; dependency so it can't be required here. (Perhaps some functions 59;; could be moved to cc-engine to avoid it.) 60(cc-bytecomp-defun c-backward-token-1) 61(cc-bytecomp-defun c-beginning-of-statement-1) 62(cc-bytecomp-defun c-backward-sws) 63 64(defvar awk-mode-syntax-table 65 (let ((st (make-syntax-table))) 66 (modify-syntax-entry ?\\ "\\" st) 67 (modify-syntax-entry ?\n "> " st) 68 (modify-syntax-entry ?\r "> " st) 69 (modify-syntax-entry ?\f "> " st) 70 (modify-syntax-entry ?\# "< " st) 71 ;; / can delimit regexes or be a division operator. By default we assume 72 ;; that it is a division sign, and fix the regexp operator cases with 73 ;; `font-lock-syntactic-keywords'. 74 (modify-syntax-entry ?/ "." st) ; ACM 2002/4/27. 75 (modify-syntax-entry ?* "." st) 76 (modify-syntax-entry ?+ "." st) 77 (modify-syntax-entry ?- "." st) 78 (modify-syntax-entry ?= "." st) 79 (modify-syntax-entry ?% "." st) 80 (modify-syntax-entry ?< "." st) 81 (modify-syntax-entry ?> "." st) 82 (modify-syntax-entry ?& "." st) 83 (modify-syntax-entry ?| "." st) 84 (modify-syntax-entry ?_ "_" st) 85 (modify-syntax-entry ?\' "." st) 86 st) 87 "Syntax table in use in AWK Mode buffers.") 88 89 90;; This section defines regular expressions used in the analysis of AWK code. 91 92;; N.B. In the following regexps, an EOL is either \n OR \r. This is because 93;; Emacs has in the past used \r to mark hidden lines in some fashion (and 94;; maybe still does). 95 96(defconst c-awk-esc-pair-re "\\\\\\(.\\|\n\\|\r\\|\\'\\)") 97;; Matches any escaped (with \) character-pair, including an escaped newline. 98(defconst c-awk-non-eol-esc-pair-re "\\\\\\(.\\|\\'\\)") 99;; Matches any escaped (with \) character-pair, apart from an escaped newline. 100(defconst c-awk-comment-without-nl "#.*") 101;; Matches an AWK comment, not including the terminating NL (if any). Note 102;; that the "enclosing" (elisp) regexp must ensure the # is real. 103(defconst c-awk-nl-or-eob "\\(\n\\|\r\\|\\'\\)") 104;; Matches a newline, or the end of buffer. 105 106;; "Space" regular expressions. 107(eval-and-compile 108 (defconst c-awk-escaped-nl "\\\\[\n\r]")) 109;; Matches an escaped newline. 110(eval-and-compile 111 (defconst c-awk-escaped-nls* (concat "\\(" c-awk-escaped-nl "\\)*"))) 112;; Matches a possibly empty sequence of escaped newlines. Used in 113;; awk-font-lock-keywords. 114;; (defconst c-awk-escaped-nls*-with-space* 115;; (concat "\\(" c-awk-escaped-nls* "\\|" "[ \t]+" "\\)*")) 116;; The above RE was very slow. It's runtime was doubling with each additional 117;; space :-( Reformulate it as below: 118(eval-and-compile 119 (defconst c-awk-escaped-nls*-with-space* 120 (concat "\\(" c-awk-escaped-nl "\\|" "[ \t]" "\\)*"))) 121;; Matches a possibly empty sequence of escaped newlines with optional 122;; interspersed spaces and tabs. Used in awk-font-lock-keywords. 123(defconst c-awk-blank-or-comment-line-re 124 (concat "[ \t]*\\(#\\|\\\\?$\\)")) 125;; Matche (the tail of) a line containing at most either a comment or an 126;; escaped EOL. 127 128;; REGEXPS FOR "HARMLESS" STRINGS/LINES. 129(defconst c-awk-harmless-char-re "[^_#/\"\\\\\n\r]") 130;; Matches any character but a _, #, /, ", \, or newline. N.B. _" starts a 131;; localisation string in gawk 3.1 132(defconst c-awk-harmless-_ "_\\([^\"]\\|\\'\\)") 133;; Matches an underline NOT followed by ". 134(defconst c-awk-harmless-string*-re 135 (concat "\\(" c-awk-harmless-char-re "\\|" c-awk-esc-pair-re "\\|" c-awk-harmless-_ "\\)*")) 136;; Matches a (possibly empty) sequence of chars without unescaped /, ", \, 137;; #, or newlines. 138(defconst c-awk-harmless-string*-here-re 139 (concat "\\=" c-awk-harmless-string*-re)) 140;; Matches the (possibly empty) sequence of chars without unescaped /, ", \, 141;; at point. 142(defconst c-awk-harmless-line-re 143 (concat c-awk-harmless-string*-re 144 "\\(" c-awk-comment-without-nl "\\)?" c-awk-nl-or-eob)) 145;; Matches (the tail of) an AWK \"logical\" line not containing an unescaped 146;; " or /. "logical" means "possibly containing escaped newlines". A comment 147;; is matched as part of the line even if it contains a " or a /. The End of 148;; buffer is also an end of line. 149(defconst c-awk-harmless-lines+-here-re 150 (concat "\\=\\(" c-awk-harmless-line-re "\\)+")) 151;; Matches a sequence of (at least one) \"harmless-line\" at point. 152 153 154;; REGEXPS FOR AWK STRINGS. 155(defconst c-awk-string-ch-re "[^\"\\\n\r]") 156;; Matches any character which can appear unescaped in a string. 157(defconst c-awk-string-innards-re 158 (concat "\\(" c-awk-string-ch-re "\\|" c-awk-esc-pair-re "\\)*")) 159;; Matches the inside of an AWK string (i.e. without the enclosing quotes). 160(defconst c-awk-string-without-end-here-re 161 (concat "\\=_?\"" c-awk-string-innards-re)) 162;; Matches an AWK string at point up to, but not including, any terminator. 163;; A gawk 3.1+ string may look like _"localisable string". 164(defconst c-awk-one-line-possibly-open-string-re 165 (concat "\"\\(" c-awk-string-ch-re "\\|" c-awk-non-eol-esc-pair-re "\\)*" 166 "\\(\"\\|\\\\?$\\|\\'\\)")) 167 168;; REGEXPS FOR AWK REGEXPS. 169(defconst c-awk-regexp-normal-re "[^[/\\\n\r]") 170;; Matches any AWK regexp character which doesn't require special analysis. 171(defconst c-awk-escaped-newlines*-re "\\(\\\\[\n\r]\\)*") 172;; Matches a (possibly empty) sequence of escaped newlines. 173 174;; NOTE: In what follows, "[asdf]" in a regexp will be called a "character 175;; list", and "[:alpha:]" inside a character list will be known as a 176;; "character class". These terms for these things vary between regexp 177;; descriptions . 178(defconst c-awk-regexp-char-class-re 179 "\\[:[a-z]+:\\]") 180 ;; Matches a character class spec (e.g. [:alpha:]). 181(defconst c-awk-regexp-char-list-re 182 (concat "\\[" c-awk-escaped-newlines*-re "^?" c-awk-escaped-newlines*-re "]?" 183 "\\(" c-awk-esc-pair-re "\\|" c-awk-regexp-char-class-re 184 "\\|" "[^]\n\r]" "\\)*" "\\(]\\|$\\)")) 185;; Matches a regexp char list, up to (but not including) EOL if the ] is 186;; missing. 187(defconst c-awk-regexp-one-line-possibly-open-char-list-re 188 (concat "\\[\\]?\\(" c-awk-non-eol-esc-pair-re "\\|" "[^]\n\r]" "\\)*" 189 "\\(]\\|\\\\?$\\|\\'\\)")) 190;; Matches the head (or all) of a regexp char class, up to (but not 191;; including) the first EOL. 192(defconst c-awk-regexp-innards-re 193 (concat "\\(" c-awk-esc-pair-re "\\|" c-awk-regexp-char-list-re 194 "\\|" c-awk-regexp-normal-re "\\)*")) 195;; Matches the inside of an AWK regexp (i.e. without the enclosing /s) 196(defconst c-awk-regexp-without-end-re 197 (concat "/" c-awk-regexp-innards-re)) 198;; Matches an AWK regexp up to, but not including, any terminating /. 199(defconst c-awk-one-line-possibly-open-regexp-re 200 (concat "/\\(" c-awk-non-eol-esc-pair-re 201 "\\|" c-awk-regexp-one-line-possibly-open-char-list-re 202 "\\|" c-awk-regexp-normal-re "\\)*" 203 "\\(/\\|\\\\?$\\|\\'\\)")) 204;; Matches as much of the head of an AWK regexp which fits on one line, 205;; possibly all of it. 206 207;; REGEXPS used for scanning an AWK buffer in order to decide IF A '/' IS A 208;; REGEXP OPENER OR A DIVISION SIGN. By "state" in the following is meant 209;; whether a '/' at the current position would by a regexp opener or a 210;; division sign. 211(defconst c-awk-neutral-re 212; "\\([{}@` \t]\\|\\+\\+\\|--\\|\\\\.\\)+") ; changed, 2003/6/7 213 "\\([{}@` \t]\\|\\+\\+\\|--\\|\\\\.\\)") 214;; A "neutral" char(pair). Doesn't change the "state" of a subsequent /. 215;; This is space/tab, braces, an auto-increment/decrement operator or an 216;; escaped character. Or one of the (illegal) characters @ or `. But NOT an 217;; end of line (even if escaped). 218(defconst c-awk-neutrals*-re 219 (concat "\\(" c-awk-neutral-re "\\)*")) 220;; A (possibly empty) string of neutral characters (or character pairs). 221(defconst c-awk-var-num-ket-re "[]\)0-9a-zA-Z_$.\x80-\xff]+") 222;; Matches a char which is a constituent of a variable or number, or a ket 223;; (i.e. closing bracKET), round or square. Assume that all characters \x80 to 224;; \xff are "letters". 225(defconst c-awk-div-sign-re 226 (concat c-awk-var-num-ket-re c-awk-neutrals*-re "/")) 227;; Will match a piece of AWK buffer ending in / which is a division sign, in 228;; a context where an immediate / would be a regexp bracket. It follows a 229;; variable or number (with optional intervening "neutral" characters). This 230;; will only work when there won't be a preceding " or / before the sought / 231;; to foul things up. 232(defconst c-awk-non-arith-op-bra-re 233 "[[\(&=:!><,?;'~|]") 234;; Matches an openeing BRAcket ,round or square, or any operator character 235;; apart from +,-,/,*,%. For the purpose at hand (detecting a / which is a 236;; regexp bracket) these arith ops are unnecessary and a pain, because of "++" 237;; and "--". 238(defconst c-awk-regexp-sign-re 239 (concat c-awk-non-arith-op-bra-re c-awk-neutrals*-re "/")) 240;; Will match a piece of AWK buffer ending in / which is an opening regexp 241;; bracket, in a context where an immediate / would be a division sign. This 242;; will only work when there won't be a preceding " or / before the sought / 243;; to foul things up. 244 245;; REGEXPS USED FOR FINDING THE POSITION OF A "virtual semicolon" 246(defconst c-awk-_-harmless-nonws-char-re "[^#/\"\\\\\n\r \t]") 247;;;; NEW VERSION! (which will be restricted to the current line) 248(defconst c-awk-one-line-non-syn-ws*-re 249 (concat "\\([ \t]*" 250 "\\(" c-awk-_-harmless-nonws-char-re "\\|" 251 c-awk-non-eol-esc-pair-re "\\|" 252 c-awk-one-line-possibly-open-string-re "\\|" 253 c-awk-one-line-possibly-open-regexp-re 254 "\\)" 255 "\\)*")) 256 257 258;; ACM, 2002/5/29: 259;; 260;; The next section of code is about determining whether or not an AWK 261;; statement is complete or not. We use this to indent the following line. 262;; The determination is pretty straightforward in C, where a statement ends 263;; with either a ; or a }. Only "while" really gives any trouble there, since 264;; it might be the end of a do-while. In AWK, on the other hand, semicolons 265;; are rarely used, and EOLs _usually_ act as "virtual semicolons". In 266;; addition, we have the complexity of escaped EOLs. The core of this 267;; analysis is in the middle of the function 268;; c-awk-calculate-NL-prop-prev-line, about 130 lines lower down. 269;; 270;; To avoid continually repeating this expensive analysis, we "cache" its 271;; result in a text-property, c-awk-NL-prop, whose value for a line is set on 272;; the EOL (if any) which terminates that line. Should the property be 273;; required for the very last line (which has no EOL), it is calculated as 274;; required but not cached. The c-awk-NL-prop property should be thought of 275;; as only really valid immediately after a buffer change, not a permanently 276;; set property. (By contrast, the syntax-table text properties (set by an 277;; after-change function) must be constantly updated for the mode to work 278;; properly). 279;; 280;; This text property is also used for "syntactic whitespace" movement, this 281;; being where the distinction between the values '$' and '}' is significant. 282;; 283;; The valid values for c-awk-NL-prop are: 284;; 285;; nil The property is not currently set for this line. 286;; '#' There is NO statement on this line (at most a comment), and no open 287;; statement from a previous line which could have been completed on this 288;; line. 289;; '{' There is an unfinished statement on this (or a previous) line which 290;; doesn't require \s to continue onto another line, e.g. the line ends 291;; with {, or the && operator, or "if (condition)". Note that even if the 292;; newline is redundantly escaped, it remains a '{' line. 293;; '\' There is an escaped newline at the end of this line and this '\' is 294;; essential to the syntax of the program. (i.e. if it had been a 295;; frivolous \, it would have been ignored and the line been given one of 296;; the other property values.) 297;; '$' A non-empty statement is terminated on the line by an EOL (a "virtual 298;; semicolon"). This might be a content-free line terminating a statement 299;; from the preceding (continued) line (which has property \). 300;; '}' A statement, being the last thing (aside from ws/comments) is 301;; explicitly terminated on this line by a closing brace (or sometimes a 302;; semicolon). 303;; 304;; This set of values has been chosen so that the property's value on a line 305;; is completely determined by the contents of the line and the property on 306;; the previous line, EXCEPT for where a "while" might be the closing 307;; statement of a do-while. 308 309(defun c-awk-after-if-for-while-condition-p (&optional do-lim) 310 ;; Are we just after the ) in "if/for/while (<condition>)"? 311 ;; 312 ;; Note that the end of the ) in a do .... while (<condition>) doesn't 313 ;; count, since the purpose of this routine is essentially to decide 314 ;; whether to indent the next line. 315 ;; 316 ;; DO-LIM sets a limit on how far back we search for the "do" of a possible 317 ;; do-while. 318 ;; 319 ;; This function might do hidden buffer changes. 320 (and 321 (eq (char-before) ?\)) 322 (save-excursion 323 (let ((par-pos (c-safe (scan-lists (point) -1 0)))) 324 (when par-pos 325 (goto-char par-pos) ; back over "(...)" 326 (c-backward-token-1) ; BOB isn't a problem. 327 (or (looking-at "\\(if\\|for\\)\\>\\([^_]\\|$\\)") 328 (and (looking-at "while\\>\\([^_]\\|$\\)") ; Ensure this isn't a do-while. 329 (not (eq (c-beginning-of-statement-1 do-lim) 330 'beginning))))))))) 331 332(defun c-awk-after-function-decl-param-list () 333 ;; Are we just after the ) in "function foo (bar)" ? 334 ;; 335 ;; This function might do hidden buffer changes. 336 (and (eq (char-before) ?\)) 337 (save-excursion 338 (let ((par-pos (c-safe (scan-lists (point) -1 0)))) 339 (when par-pos 340 (goto-char par-pos) ; back over "(...)" 341 (c-backward-token-1) ; BOB isn't a problem 342 (and (looking-at "[_a-zA-Z][_a-zA-Z0-9]*\\>") 343 (progn (c-backward-token-1) 344 (looking-at "func\\(tion\\)?\\>")))))))) 345 346;; 2002/11/8: FIXME! Check c-backward-token-1/2 for success (0 return code). 347(defun c-awk-after-continue-token () 348;; Are we just after a token which can be continued onto the next line without 349;; a backslash? 350;; 351;; This function might do hidden buffer changes. 352 (save-excursion 353 (c-backward-token-1) ; FIXME 2002/10/27. What if this fails? 354 (if (and (looking-at "[&|]") (not (bobp))) 355 (backward-char)) ; c-backward-token-1 doesn't do this :-( 356 (looking-at "[,{?:]\\|&&\\|||\\|do\\>\\|else\\>"))) 357 358(defun c-awk-after-rbrace-or-statement-semicolon () 359 ;; Are we just after a } or a ; which closes a statement? 360 ;; Be careful about ;s in for loop control bits. They don't count! 361 ;; 362 ;; This function might do hidden buffer changes. 363 (or (eq (char-before) ?\}) 364 (and 365 (eq (char-before) ?\;) 366 (save-excursion 367 (let ((par-pos (c-safe (scan-lists (point) -1 1)))) 368 (when par-pos 369 (goto-char par-pos) ; go back to containing ( 370 (not (and (looking-at "(") 371 (c-backward-token-1) ; BOB isn't a problem 372 (looking-at "for\\>"))))))))) 373 374(defun c-awk-back-to-contentful-text-or-NL-prop () 375 ;; Move back to just after the first found of either (i) an EOL which has 376 ;; the c-awk-NL-prop text-property set; or (ii) non-ws text; or (iii) BOB. 377 ;; We return either the value of c-awk-NL-prop (in case (i)) or nil. 378 ;; Calling functions can best distinguish cases (ii) and (iii) with (bolp). 379 ;; 380 ;; Note that an escaped eol counts as whitespace here. 381 ;; 382 ;; Kludge: If c-backward-syntactic-ws gets stuck at a BOL, it is likely 383 ;; that the previous line contains an unterminated string (without \). In 384 ;; this case, assume that the previous line's c-awk-NL-prop is a $. 385 ;; 386 ;; POINT MUST BE AT THE START OF A LINE when calling this function. This 387 ;; is to ensure that the various backward-comment functions will work 388 ;; properly. 389 ;; 390 ;; This function might do hidden buffer changes. 391 (let ((nl-prop nil) 392 bol-pos bsws-pos) ; starting pos for a backward-syntactic-ws call. 393 (while ;; We are at a BOL here. Go back one line each iteration. 394 (and 395 (not (bobp)) 396 (not (setq nl-prop (c-get-char-property (1- (point)) 'c-awk-NL-prop))) 397 (progn (setq bol-pos (c-point 'bopl)) 398 (setq bsws-pos (point)) 399 ;; N.B. the following function will not go back past an EOL if 400 ;; there is an open string (without \) on the previous line. 401 ;; If we find such, set the c-awk-NL-prop on it, too 402 ;; (2004/3/29). 403 (c-backward-syntactic-ws bol-pos) 404 (or (/= (point) bsws-pos) 405 (progn (setq nl-prop ?\$) 406 (c-put-char-property (1- (point)) 'c-awk-NL-prop nl-prop) 407 nil))) 408 ;; If we had a backslash at EOL, c-backward-syntactic-ws will 409 ;; have gone backwards over it. Check the backslash was "real". 410 (progn 411 (if (looking-at "[ \t]*\\\\+$") 412 (if (progn 413 (end-of-line) 414 (search-backward-regexp 415 "\\(^\\|[^\\]\\)\\(\\\\\\\\\\)*\\\\$" ; ODD number of \s at EOL :-) 416 bol-pos t)) 417 (progn (end-of-line) ; escaped EOL. 418 (backward-char) 419 (c-backward-syntactic-ws bol-pos)) 420 (end-of-line))) ; The \ at eol is a fake. 421 (bolp)))) 422 nl-prop)) 423 424(defun c-awk-calculate-NL-prop-prev-line (&optional do-lim) 425 ;; Calculate and set the value of the c-awk-NL-prop on the immediately 426 ;; preceding EOL. This may also involve doing the same for several 427 ;; preceding EOLs. 428 ;; 429 ;; NOTE that if the property was already set, we return it without 430 ;; recalculation. (This is by accident rather than design.) 431 ;; 432 ;; Return the property which got set (or was already set) on the previous 433 ;; line. Return nil if we hit BOB. 434 ;; 435 ;; See c-awk-after-if-for-while-condition-p for a description of DO-LIM. 436 ;; 437 ;; This function might do hidden buffer changes. 438 (save-excursion 439 (save-match-data 440 (beginning-of-line) 441 (let* ((pos (point)) 442 (nl-prop (c-awk-back-to-contentful-text-or-NL-prop))) 443 ;; We are either (1) at a BOL (with nl-prop containing the previous 444 ;; line's c-awk-NL-prop) or (2) after contentful text on a line. At 445 ;; the BOB counts as case (1), so we test next for bolp rather than 446 ;; non-nil nl-prop. 447 (when (not (bolp)) 448 (setq nl-prop 449 (cond 450 ;; Incomplete statement which doesn't require escaped EOL? 451 ((or (c-awk-after-if-for-while-condition-p do-lim) 452 (c-awk-after-function-decl-param-list) 453 (c-awk-after-continue-token)) 454 ?\{) 455 ;; Escaped EOL (where there's also something to continue)? 456 ((and (looking-at "[ \t]*\\\\$") 457 (not (c-awk-after-rbrace-or-statement-semicolon))) 458 ?\\) 459 ;; A statement was completed on this line. How? 460 ((memq (char-before) '(?\; ?\})) ?\}) ; Real ; or } 461 (t ?\$))) ; A virtual semicolon. 462 (end-of-line) 463 (c-put-char-property (point) 'c-awk-NL-prop nl-prop) 464 (forward-line)) 465 466 ;; We are now at a (possibly empty) sequence of content-free lines. 467 ;; Set c-awk-NL-prop on each of these lines's EOL. 468 (while (< (point) pos) ; one content-free line each iteration. 469 (cond ; recalculate nl-prop from previous line's value. 470 ((memq nl-prop '(?\} ?\$ nil)) (setq nl-prop ?\#)) 471 ((eq nl-prop ?\\) 472 (if (not (looking-at "[ \t]*\\\\$")) (setq nl-prop ?\$))) 473 ;; ?\# (empty line) and ?\{ (open stmt) don't change. 474 ) 475 (forward-line) 476 (c-put-char-property (1- (point)) 'c-awk-NL-prop nl-prop)) 477 nl-prop)))) 478 479(defun c-awk-get-NL-prop-prev-line (&optional do-lim) 480 ;; Get the c-awk-NL-prop text-property from the previous line, calculating 481 ;; it if necessary. Return nil iff we're already at BOB. 482 ;; See c-awk-after-if-for-while-condition-p for a description of DO-LIM. 483 ;; 484 ;; This function might do hidden buffer changes. 485 (if (bobp) 486 nil 487 (or (c-get-char-property (c-point 'eopl) 'c-awk-NL-prop) 488 (c-awk-calculate-NL-prop-prev-line do-lim)))) 489 490(defun c-awk-get-NL-prop-cur-line (&optional do-lim) 491 ;; Get the c-awk-NL-prop text-property from the current line, calculating it 492 ;; if necessary. (As a special case, the property doesn't get set on an 493 ;; empty line at EOB (there's no position to set the property on), but the 494 ;; function returns the property value an EOL would have got.) 495 ;; 496 ;; See c-awk-after-if-for-while-condition-p for a description of DO-LIM. 497 ;; 498 ;; This function might do hidden buffer changes. 499 (save-excursion 500 (let ((extra-nl nil)) 501 (end-of-line) ; Necessary for the following test to work. 502 (when (= (forward-line) 1) ; if we were on the last line.... 503 (insert-char ?\n 1) ; ...artificial eol is needed for comment detection. 504 (setq extra-nl t)) 505 (prog1 (c-awk-get-NL-prop-prev-line do-lim) 506 (if extra-nl (delete-backward-char 1)))))) 507 508(defsubst c-awk-prev-line-incomplete-p (&optional do-lim) 509 ;; Is there an incomplete statement at the end of the previous line? 510 ;; See c-awk-after-if-for-while-condition-p for a description of DO-LIM. 511 ;; 512 ;; This function might do hidden buffer changes. 513 (memq (c-awk-get-NL-prop-prev-line do-lim) '(?\\ ?\{))) 514 515(defsubst c-awk-cur-line-incomplete-p (&optional do-lim) 516 ;; Is there an incomplete statement at the end of the current line? 517 ;; See c-awk-after-if-for-while-condition-p for a description of DO-LIM. 518 ;; 519 ;; This function might do hidden buffer changes. 520 (memq (c-awk-get-NL-prop-cur-line do-lim) '(?\\ ?\{))) 521 522;;;; NOTES ON "VIRTUAL SEMICOLONS" 523;;;; 524;;;; A "virtual semicolon" is what terminates a statement when there is no ; 525;;;; or } to do the job. Like point, it is considered to lie _between_ two 526;;;; characters. As from mid-March 2004, it is considered to lie just after 527;;;; the last non-syntactic-whitespace character on the line; (previously, it 528;;;; was considered an attribute of the EOL on the line). A real semicolon 529;;;; never counts as a virtual one. 530 531(defun c-awk-at-vsemi-p (&optional pos) 532 ;; Is there a virtual semicolon at POS (or POINT)? 533 (save-excursion 534 (let (nl-prop 535 (pos-or-point (progn (if pos (goto-char pos)) (point)))) 536 (forward-line 0) 537 (search-forward-regexp c-awk-one-line-non-syn-ws*-re) 538 (and (eq (point) pos-or-point) 539 (progn 540 (while (and (eq (setq nl-prop (c-awk-get-NL-prop-cur-line)) ?\\) 541 (eq (forward-line) 0) 542 (looking-at c-awk-blank-or-comment-line-re))) 543 (eq nl-prop ?\$)))))) 544 545(defun c-awk-vsemi-status-unknown-p () 546 ;; Are we unsure whether there is a virtual semicolon on the current line? 547 ;; DO NOT under any circumstances attempt to calculate this; that would 548 ;; defeat the (admittedly kludgey) purpose of this function, which is to 549 ;; prevent an infinite recursion in c-beginning-of-statement-1 when point 550 ;; starts at a `while' token. 551 (not (c-get-char-property (c-point 'eol) 'c-awk-NL-prop))) 552 553(defun c-awk-clear-NL-props (beg end) 554 ;; This function is run from before-change-hooks. It clears the 555 ;; c-awk-NL-prop text property from beg to the end of the buffer (The END 556 ;; parameter is ignored). This ensures that the indentation engine will 557 ;; never use stale values for this property. 558 ;; 559 ;; This function might do hidden buffer changes. 560 (save-restriction 561 (widen) 562 (c-clear-char-properties beg (point-max) 'c-awk-NL-prop))) 563 564(defun c-awk-unstick-NL-prop () 565 ;; Ensure that the text property c-awk-NL-prop is "non-sticky". Without 566 ;; this, a new newline inserted after an old newline (e.g. by C-j) would 567 ;; inherit any c-awk-NL-prop from the old newline. This would be a Bad 568 ;; Thing. This function's action is required by c-put-char-property. 569 (if (and (boundp 'text-property-default-nonsticky) ; doesn't exist in Xemacs 570 (not (assoc 'c-awk-NL-prop text-property-default-nonsticky))) 571 (setq text-property-default-nonsticky 572 (cons '(c-awk-NL-prop . t) text-property-default-nonsticky)))) 573 574;; The following is purely a diagnostic command, to be commented out of the 575;; final release. ACM, 2002/6/1 576;; (defun NL-props () 577;; (interactive) 578;; (let (pl-prop cl-prop) 579;; (message "Prev-line: %s Cur-line: %s" 580;; (if (setq pl-prop (c-get-char-property (c-point 'eopl) 'c-awk-NL-prop)) 581;; (char-to-string pl-prop) 582;; "nil") 583;; (if (setq cl-prop (c-get-char-property (c-point 'eol) 'c-awk-NL-prop)) 584;; (char-to-string cl-prop) 585;; "nil")))) 586;(define-key awk-mode-map [?\C-c ?\r] 'NL-props) ; commented out, 2002/8/31 587;for now. In the byte compiled version, this causes things to crash because 588;awk-mode-map isn't yet defined. :-( 589 590;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 591 592;; The following section of the code is to do with font-locking. The biggest 593;; problem for font-locking is deciding whether a / is a regular expression 594;; delimiter or a division sign - determining precisely where strings and 595;; regular expressions start and stop is also troublesome. This is the 596;; purpose of the function c-awk-set-syntax-table-properties and the myriad 597;; elisp regular expressions it uses. 598;; 599;; Because AWK is a line oriented language, I felt the normal cc-mode strategy 600;; for font-locking unterminated strings (i.e. font-locking the buffer up to 601;; the next string delimiter as a string) was inappropriate. Instead, 602;; unbalanced string/regexp delimiters are given the warning font, being 603;; refonted with the string font as soon as the matching delimiter is entered. 604;; 605;; This requires the region processed by the current font-lock after-change 606;; function to have access to the start of the string/regexp, which may be 607;; several lines back. The elisp "advice" feature is used on these functions 608;; to allow this. 609 610(defun c-awk-beginning-of-logical-line (&optional pos) 611;; Go back to the start of the (apparent) current line (or the start of the 612;; line containing POS), returning the buffer position of that point. I.e., 613;; go back to the last line which doesn't have an escaped EOL before it. 614;; 615;; This is guaranteed to be "safe" for syntactic analysis, i.e. outwith any 616;; comment, string or regexp. IT MAY WELL BE that this function should not be 617;; executed on a narrowed buffer. 618;; 619;; This function might do hidden buffer changes. 620 (if pos (goto-char pos)) 621 (forward-line 0) 622 (while (and (> (point) (point-min)) 623 (eq (char-before (1- (point))) ?\\)) 624 (forward-line -1)) 625 (point)) 626 627(defun c-awk-end-of-logical-line (&optional pos) 628;; Go forward to the end of the (apparent) current logical line (or the end of 629;; the line containing POS), returning the buffer position of that point. I.e., 630;; go to the end of the next line which doesn't have an escaped EOL. 631;; 632;; This is guaranteed to be "safe" for syntactic analysis, i.e. outwith any 633;; comment, string or regexp. IT MAY WELL BE that this function should not be 634;; executed on a narrowed buffer. 635;; 636;; This function might do hidden buffer changes. 637 (if pos (goto-char pos)) 638 (end-of-line) 639 (while (and (< (point) (point-max)) 640 (eq (char-before) ?\\)) 641 (end-of-line 2)) 642 (point)) 643 644;; ACM, 2002/02/15: The idea of the next function is to put the "Error font" 645;; on strings/regexps which are missing their closing delimiter. 646;; 2002/4/28. The default syntax for / has been changed from "string" to 647;; "punctuation", to reduce hassle when this character appears within a string 648;; or comment. 649 650(defun c-awk-set-string-regexp-syntax-table-properties (beg end) 651;; BEG and END bracket a (possibly unterminated) string or regexp. The 652;; opening delimiter is after BEG, and the closing delimiter, IF ANY, is AFTER 653;; END. Set the appropriate syntax-table properties on the delimiters and 654;; contents of this string/regex. 655;; 656;; "String" here can also mean a gawk 3.1 "localizable" string which starts 657;; with _". In this case, we step over the _ and ignore it; It will get it's 658;; font from an entry in awk-font-lock-keywords. 659;; 660;; If the closing delimiter is missing (i.e., there is an EOL there) set the 661;; STRING-FENCE property on the opening " or / and closing EOL. 662;; 663;; This function does hidden buffer changes. 664 (if (eq (char-after beg) ?_) (setq beg (1+ beg))) 665 666 ;; First put the properties on the delimiters. 667 (cond ((eq end (point-max)) ; string/regexp terminated by EOB 668 (c-put-char-property beg 'syntax-table '(15))) ; (15) = "string fence" 669 ((/= (char-after beg) (char-after end)) ; missing end delimiter 670 (c-put-char-property beg 'syntax-table '(15)) 671 (c-put-char-property end 'syntax-table '(15))) 672 ((eq (char-after beg) ?/) ; Properly bracketed regexp 673 (c-put-char-property beg 'syntax-table '(7)) ; (7) = "string" 674 (c-put-char-property end 'syntax-table '(7))) 675 (t)) ; Properly bracketed string: Nothing to do. 676 ;; Now change the properties of any escaped "s in the string to punctuation. 677 (save-excursion 678 (goto-char (1+ beg)) 679 (or (eobp) 680 (while (search-forward "\"" end t) 681 (c-put-char-property (1- (point)) 'syntax-table '(1)))))) 682 683(defun c-awk-syntax-tablify-string () 684 ;; Point is at the opening " or _" of a string. Set the syntax-table 685 ;; properties on this string, leaving point just after the string. 686 ;; 687 ;; The result is nil if a / immediately after the string would be a regexp 688 ;; opener, t if it would be a division sign. 689 ;; 690 ;; This function does hidden buffer changes. 691 (search-forward-regexp c-awk-string-without-end-here-re nil t) ; a (possibly unterminated) string 692 (c-awk-set-string-regexp-syntax-table-properties 693 (match-beginning 0) (match-end 0)) 694 (cond ((looking-at "\"") 695 (forward-char) 696 t) ; In AWK, ("15" / 5) gives 3 ;-) 697 ((looking-at "[\n\r]") ; Unterminated string with EOL. 698 (forward-char) 699 nil) ; / on next line would start a regexp 700 (t nil))) ; Unterminated string at EOB 701 702(defun c-awk-syntax-tablify-/ (anchor anchor-state-/div) 703 ;; Point is at a /. Determine whether this is a division sign or a regexp 704 ;; opener, and if the latter, apply syntax-table properties to the entire 705 ;; regexp. Point is left immediately after the division sign or regexp, as 706 ;; the case may be. 707 ;; 708 ;; ANCHOR-STATE-/DIV identifies whether a / at ANCHOR would have been a 709 ;; division sign (value t) or a regexp opener (value nil). The idea is that 710 ;; we analyse the line from ANCHOR up till point to determine what the / at 711 ;; point is. 712 ;; 713 ;; The result is what ANCHOR-STATE-/DIV (see above) is where point is left. 714 ;; 715 ;; This function might do hidden buffer changes. 716 (let ((/point (point))) 717 (goto-char anchor) 718 ;; Analyse the line to find out what the / is. 719 (if (if anchor-state-/div 720 (not (search-forward-regexp c-awk-regexp-sign-re (1+ /point) t)) 721 (search-forward-regexp c-awk-div-sign-re (1+ /point) t)) 722 ;; A division sign. 723 (progn (goto-char (1+ /point)) nil) 724 ;; A regexp opener 725 ;; Jump over the regexp innards, setting the match data. 726 (goto-char /point) 727 (search-forward-regexp c-awk-regexp-without-end-re) 728 (c-awk-set-string-regexp-syntax-table-properties 729 (match-beginning 0) (match-end 0)) 730 (cond ((looking-at "/") ; Terminating / 731 (forward-char) 732 t) 733 ((looking-at "[\n\r]") ; Incomplete regexp terminated by EOL 734 (forward-char) 735 nil) ; / on next line would start another regexp 736 (t nil))))) ; Unterminated regexp at EOB 737 738(defun c-awk-set-syntax-table-properties (lim) 739;; Scan the buffer text between point and LIM, setting (and clearing) the 740;; syntax-table property where necessary. 741;; 742;; This function is designed to be called as the FUNCTION in a MATCHER in 743;; font-lock-syntactic-keywords, and it always returns NIL (to inhibit 744;; repeated calls from font-lock: See elisp info page "Search-based 745;; Fontification"). It also gets called, with a bit of glue, from 746;; after-change-functions when font-lock isn't active. Point is left 747;; "undefined" after this function exits. THE BUFFER SHOULD HAVE BEEN 748;; WIDENED, AND ANY PRECIOUS MATCH-DATA SAVED BEFORE CALLING THIS ROUTINE. 749;; 750;; We need to set/clear the syntax-table property on: 751;; (i) / - It is set to "string" on a / which is the opening or closing 752;; delimiter of the properly terminated regexp (and left unset on a 753;; division sign). 754;; (ii) the opener of an unterminated string/regexp, we set the property 755;; "generic string delimiter" on both the opening " or / and the end of the 756;; line where the closing delimiter is missing. 757;; (iii) "s inside strings/regexps (these will all be escaped "s). They are 758;; given the property "punctuation". This will later allow other routines 759;; to use the regexp "\\S\"*" to skip over the string innards. 760;; (iv) Inside a comment, all syntax-table properties are cleared. 761;; 762;; This function does hidden buffer changes. 763 (let (anchor 764 (anchor-state-/div nil)) ; t means a following / would be a div sign. 765 (c-awk-beginning-of-logical-line) ; ACM 2002/7/21. This is probably redundant. 766 (c-clear-char-properties (point) lim 'syntax-table) 767 ;; Once round the next loop for each string, regexp, or div sign 768 (while (progn 769 ;; Skip any "harmless" lines before the next tricky one. 770 (if (search-forward-regexp c-awk-harmless-lines+-here-re nil t) 771 (setq anchor-state-/div nil)) 772 (< (point) lim)) 773 (setq anchor (point)) 774 (search-forward-regexp c-awk-harmless-string*-here-re nil t) 775 ;; We are now looking at either a " or a /. 776 ;; Do our thing on the string, regexp or divsion sign. 777 (setq anchor-state-/div 778 (if (looking-at "_?\"") 779 (c-awk-syntax-tablify-string) 780 (c-awk-syntax-tablify-/ anchor anchor-state-/div)))) 781 nil)) 782 783 784;; ACM, 2002/07/21: Thoughts: We need an AWK Mode after-change function to set 785;; the syntax-table properties even when font-lock isn't enabled, for the 786;; subsequent use of movement functions, etc. However, it seems that if font 787;; lock _is_ enabled, we can always leave it to do the job. 788(defvar c-awk-old-EOLL 0) 789(make-variable-buffer-local 'c-awk-old-EOLL) 790;; End of logical line following the region which is about to be changed. Set 791;; in c-awk-before-change and used in c-awk-after-change. 792 793(defun c-awk-before-change (beg end) 794;; This function is called exclusively from the before-change-functions hook. 795;; It does two things: Finds the end of the (logical) line on which END lies, 796;; and clears c-awk-NL-prop text properties from this point onwards. 797;; 798;; This function might do hidden buffer changes. 799 (save-restriction 800 (save-excursion 801 (setq c-awk-old-EOLL (c-awk-end-of-logical-line end)) 802 (c-save-buffer-state nil 803 (c-awk-clear-NL-props end (point-max)))))) 804 805(defun c-awk-end-of-change-region (beg end old-len) 806 ;; Find the end of the region which needs to be font-locked after a change. 807 ;; This is the end of the logical line on which the change happened, either 808 ;; as it was before the change, or as it is now, whichever is later. 809 ;; N.B. point is left undefined. 810 ;; 811 ;; This function might do hidden buffer changes. 812 (max (+ (- c-awk-old-EOLL old-len) (- end beg)) 813 (c-awk-end-of-logical-line end))) 814 815(defun c-awk-after-change (beg end old-len) 816;; This function is called exclusively as an after-change function in 817;; AWK Mode. It ensures that the syntax-table properties get set in the 818;; changed region. However, if font-lock is enabled, this function does 819;; nothing, since an enabled font-lock after-change function will always do 820;; this. 821;; 822;; This function might do hidden buffer changes. 823 (unless (and (boundp 'font-lock-mode) font-lock-mode) 824 (save-restriction 825 (save-excursion 826 (save-match-data 827 (setq end (c-awk-end-of-change-region beg end old-len)) 828 (c-awk-beginning-of-logical-line beg) 829 (c-save-buffer-state nil ; So that read-only status isn't affected. 830 ; (e.g. when first loading the buffer) 831 (c-awk-set-syntax-table-properties end))))))) 832 833;; ACM 2002/5/25. When font-locking is invoked by a buffer change, the region 834;; specified by the font-lock after-change function must be expanded to 835;; include ALL of any string or regexp within the region. The simplest way to 836;; do this in practice is to use the beginning/end-of-logical-line functions. 837;; Don't overlook the possibility of the buffer change being the "recapturing" 838;; of a previously escaped newline. 839(defmacro c-awk-advise-fl-for-awk-region (function) 840 `(defadvice ,function (before get-awk-region activate) 841;; When font-locking an AWK Mode buffer, make sure that any string/regexp is 842;; completely font-locked. 843 (when (eq major-mode 'awk-mode) 844 (save-excursion 845 (ad-set-arg 1 (c-awk-end-of-change-region 846 (ad-get-arg 0) ; beg 847 (ad-get-arg 1) ; end 848 (ad-get-arg 2))) ; old-len 849 (ad-set-arg 0 (c-awk-beginning-of-logical-line (ad-get-arg 0))))))) 850 851(c-awk-advise-fl-for-awk-region font-lock-after-change-function) 852(c-awk-advise-fl-for-awk-region jit-lock-after-change) 853(c-awk-advise-fl-for-awk-region lazy-lock-defer-rest-after-change) 854(c-awk-advise-fl-for-awk-region lazy-lock-defer-line-after-change) 855 856;; Awk regexps written with help from Peter Galbraith 857;; <galbraith@mixing.qc.dfo.ca>. 858;; Take GNU Emacs's 'words out of the following regexp-opts. They dont work 859;; in Xemacs 21.4.4. acm 2002/9/19. 860(defconst awk-font-lock-keywords 861 (eval-when-compile 862 (list 863 ;; Function names. 864 '("^\\s *\\(func\\(tion\\)?\\)\\>\\s *\\(\\sw+\\)?" 865 (1 font-lock-keyword-face) (3 font-lock-function-name-face nil t)) 866 ;; 867 ;; Variable names. 868 (cons 869 (concat "\\<" 870 (regexp-opt 871 '("ARGC" "ARGIND" "ARGV" "BINMODE" "CONVFMT" "ENVIRON" 872 "ERRNO" "FIELDWIDTHS" "FILENAME" "FNR" "FS" "IGNORECASE" 873 "LINT" "NF" "NR" "OFMT" "OFS" "ORS" "PROCINFO" "RLENGTH" 874 "RS" "RSTART" "RT" "SUBSEP" "TEXTDOMAIN") t) "\\>") 875 'font-lock-variable-name-face) 876 877 ;; Special file names. (acm, 2002/7/22) 878 ;; The following regexp was created by first evaluating this in GNU Emacs 21.1: 879 ;; (regexp-opt '("/dev/stdin" "/dev/stdout" "/dev/stderr" "/dev/fd/n" "/dev/pid" 880 ;; "/dev/ppid" "/dev/pgrpid" "/dev/user") 'words) 881 ;; , removing the "?:" from each "\\(?:" (for backward compatibility with older Emacsen) 882 ;; , replacing the "n" in "dev/fd/n" with "[0-9]+" 883 ;; , removing the unwanted \\< at the beginning, and finally filling out the 884 ;; regexp so that a " must come before, and either a " or heuristic stuff after. 885 ;; The surrounding quotes are fontified along with the filename, since, semantically, 886 ;; they are an indivisible unit. 887 '("\\(\"/dev/\\(fd/[0-9]+\\|p\\(\\(\\(gr\\)?p\\)?id\\)\\|\ 888std\\(err\\|in\\|out\\)\\|user\\)\\)\\>\ 889\\(\\(\"\\)\\|\\([^\"/\n\r][^\"\n\r]*\\)?$\\)" 890 (1 font-lock-variable-name-face t) 891 (8 font-lock-variable-name-face t t)) 892 ;; Do the same (almost) with 893 ;; (regexp-opt '("/inet/tcp/lport/rhost/rport" "/inet/udp/lport/rhost/rport" 894 ;; "/inet/raw/lport/rhost/rport") 'words) 895 ;; This cannot be combined with the above pattern, because the match number 896 ;; for the (optional) closing \" would then exceed 9. 897 '("\\(\"/inet/\\(\\(raw\\|\\(tc\\|ud\\)p\\)/lport/rhost/rport\\)\\)\\>\ 898\\(\\(\"\\)\\|\\([^\"/\n\r][^\"\n\r]*\\)?$\\)" 899 (1 font-lock-variable-name-face t) 900 (6 font-lock-variable-name-face t t)) 901 902 ;; Keywords. 903 (concat "\\<" 904 (regexp-opt 905 '("BEGIN" "END" "break" "continue" "delete" "do" "else" 906 "exit" "for" "getline" "if" "in" "next" "nextfile" 907 "return" "while") 908 t) "\\>") 909 910 ;; Builtins. 911 `(eval . (list 912 ,(concat 913 "\\<" 914 (regexp-opt 915 '("adump" "and" "asort" "atan2" "bindtextdomain" "close" 916 "compl" "cos" "dcgettext" "exp" "extension" "fflush" 917 "gensub" "gsub" "index" "int" "length" "log" "lshift" 918 "match" "mktime" "or" "print" "printf" "rand" "rshift" 919 "sin" "split" "sprintf" "sqrt" "srand" "stopme" 920 "strftime" "strtonum" "sub" "substr" "system" 921 "systime" "tolower" "toupper" "xor") t) 922 "\\>") 923 0 c-preprocessor-face-name)) 924 925 ;; gawk debugging keywords. (acm, 2002/7/21) 926 ;; (Removed, 2003/6/6. These functions are now fontified as built-ins) 927 ;; (list (concat "\\<" (regexp-opt '("adump" "stopme") t) "\\>") 928 ;; 0 'font-lock-warning-face) 929 930 ;; User defined functions with an apparent spurious space before the 931 ;; opening parenthesis. acm, 2002/5/30. 932 `(,(concat "\\(\\w\\|_\\)" c-awk-escaped-nls* "\\s " 933 c-awk-escaped-nls*-with-space* "(") 934 (0 'font-lock-warning-face)) 935 936 ;; Space after \ in what looks like an escaped newline. 2002/5/31 937 '("\\\\\\s +$" 0 font-lock-warning-face t) 938 939 ;; Unbalanced string (") or regexp (/) delimiters. 2002/02/16. 940 '("\\s|" 0 font-lock-warning-face t nil) 941 ;; gawk 3.1 localizable strings ( _"translate me!"). 2002/5/21 942 '("\\(_\\)\\s|" 1 font-lock-warning-face) 943 '("\\(_\\)\\s\"" 1 font-lock-string-face) ; FIXME! not for XEmacs. 2002/10/6 944 )) 945 "Default expressions to highlight in AWK mode.") 946 947;; ACM 2002/9/29. Movement functions, e.g. for C-M-a and C-M-e 948 949;; The following three regexps differ from those earlier on in cc-awk.el in 950;; that they assume the syntax-table properties have been set. They are thus 951;; not useful for code which sets these properties. 952(defconst c-awk-terminated-regexp-or-string-here-re "\\=\\s\"\\S\"*\\s\"") 953;; Matches a terminated string/regexp. 954 955(defconst c-awk-unterminated-regexp-or-string-here-re "\\=\\s|\\S|*$") 956;; Matches an unterminated string/regexp, NOT including the eol at the end. 957 958(defconst c-awk-harmless-pattern-characters* 959 (concat "\\([^{;#/\"\\\\\n\r]\\|" c-awk-esc-pair-re "\\)*")) 960;; Matches any "harmless" character in a pattern or an escaped character pair. 961 962(defun c-awk-at-statement-end-p () 963 ;; Point is not inside a comment or string. Is it AT the end of a 964 ;; statement? This means immediately after the last non-ws character of the 965 ;; statement. The caller is responsible for widening the buffer, if 966 ;; appropriate. 967 (and (not (bobp)) 968 (save-excursion 969 (backward-char) 970 (or (looking-at "[};]") 971 (and (memq (c-awk-get-NL-prop-cur-line) '(?\$ ?\\)) 972 (looking-at 973 (eval-when-compile 974 (concat "[^ \t\n\r\\]" c-awk-escaped-nls*-with-space* 975 "[#\n\r]")))))))) 976 977(defun c-awk-beginning-of-defun (&optional arg) 978 "Move backward to the beginning of an AWK \"defun\". With ARG, do it that 979many times. Negative arg -N means move forward to Nth following beginning of 980defun. Returns t unless search stops due to beginning or end of buffer. 981 982By a \"defun\" is meant either a pattern-action pair or a function. The start 983of a defun is recognized as code starting at column zero which is neither a 984closing brace nor a comment nor a continuation of the previous line. Unlike 985in some other modes, having an opening brace at column 0 is neither necessary 986nor helpful. 987 988Note that this function might do hidden buffer changes. See the 989comment at the start of cc-engine.el for more info." 990 (interactive "p") 991 (or arg (setq arg 1)) 992 (save-match-data 993 (c-save-buffer-state ; ensures the buffer is writable. 994 nil 995 (let ((found t)) ; Has the most recent regexp search found b-of-defun? 996 (if (>= arg 0) 997 ;; Go back one defun each time round the following loop. (For +ve arg) 998 (while (and found (> arg 0) (not (eq (point) (point-min)))) 999 ;; Go back one "candidate" each time round the next loop until one 1000 ;; is genuinely a beginning-of-defun. 1001 (while (and (setq found (search-backward-regexp 1002 "^[^#} \t\n\r]" (point-min) 'stop-at-limit)) 1003 (not (memq (c-awk-get-NL-prop-prev-line) '(?\$ ?\} ?\#))))) 1004 (setq arg (1- arg))) 1005 ;; The same for a -ve arg. 1006 (if (not (eq (point) (point-max))) (forward-char 1)) 1007 (while (and found (< arg 0) (not (eq (point) (point-max)))) ; The same for -ve arg. 1008 (while (and (setq found (search-forward-regexp 1009 "^[^#} \t\n\r]" (point-max) 'stop-at-limit)) 1010 (not (memq (c-awk-get-NL-prop-prev-line) '(?\$ ?\} ?\#))))) 1011 (setq arg (1+ arg))) 1012 (if found (goto-char (match-beginning 0)))) 1013 (eq arg 0))))) 1014 1015(defun c-awk-forward-awk-pattern () 1016 ;; Point is at the start of an AWK pattern (which may be null) or function 1017 ;; declaration. Move to the pattern's end, and past any trailing space or 1018 ;; comment. Typically, we stop at the { which denotes the corresponding AWK 1019 ;; action/function body. Otherwise we stop at the EOL (or ;) marking the 1020 ;; absence of an explicit action. 1021 ;; 1022 ;; This function might do hidden buffer changes. 1023 (while 1024 (progn 1025 (search-forward-regexp c-awk-harmless-pattern-characters*) 1026 (if (looking-at "#") (end-of-line)) 1027 (cond 1028 ((eobp) nil) 1029 ((looking-at "[{;]") nil) ; We've finished! 1030 ((eolp) 1031 (if (c-awk-cur-line-incomplete-p) 1032 (forward-line) ; returns non-nil 1033 nil)) 1034 ((search-forward-regexp c-awk-terminated-regexp-or-string-here-re nil t)) 1035 ((search-forward-regexp c-awk-unterminated-regexp-or-string-here-re nil t)) 1036 ((looking-at "/") (forward-char) t))))) ; division sign. 1037 1038(defun c-awk-end-of-defun1 () 1039 ;; point is at the start of a "defun". Move to its end. Return end position. 1040 ;; 1041 ;; This function might do hidden buffer changes. 1042 (c-awk-forward-awk-pattern) 1043 (cond 1044 ((looking-at "{") (goto-char (scan-sexps (point) 1))) 1045 ((looking-at ";") (forward-char)) 1046 ((eolp)) 1047 (t (error "c-awk-end-of-defun1: Failure of c-awk-forward-awk-pattern"))) 1048 (point)) 1049 1050(defun c-awk-beginning-of-defun-p () 1051 ;; Are we already at the beginning of a defun? (i.e. at code in column 0 1052 ;; which isn't a }, and isn't a continuation line of any sort. 1053 ;; 1054 ;; This function might do hidden buffer changes. 1055 (and (looking-at "^[^#} \t\n\r]") 1056 (not (c-awk-prev-line-incomplete-p)))) 1057 1058(defun c-awk-end-of-defun (&optional arg) 1059 "Move forward to next end of defun. With argument, do it that many times. 1060Negative argument -N means move back to Nth preceding end of defun. 1061 1062An end of a defun occurs right after the closing brace that matches the 1063opening brace at its start, or immediately after the AWK pattern when there is 1064no explicit action; see function `c-awk-beginning-of-defun'. 1065 1066Note that this function might do hidden buffer changes. See the 1067comment at the start of cc-engine.el for more info." 1068 (interactive "p") 1069 (or arg (setq arg 1)) 1070 (save-match-data 1071 (c-save-buffer-state 1072 nil 1073 (let ((start-point (point)) end-point) 1074 ;; Strategy: (For +ve ARG): If we're not already at a beginning-of-defun, 1075 ;; move backwards to one. 1076 ;; Repeat [(i) move forward to end-of-current-defun (see below); 1077 ;; (ii) If this isn't it, move forward to beginning-of-defun]. 1078 ;; We start counting ARG only when step (i) has passed the original point. 1079 (when (> arg 0) 1080 ;; Try to move back to a beginning-of-defun, if not already at one. 1081 (if (not (c-awk-beginning-of-defun-p)) 1082 (when (not (c-awk-beginning-of-defun 1)) ; No bo-defun before point. 1083 (goto-char start-point) 1084 (c-awk-beginning-of-defun -1))) ; if this fails, we're at EOB, tough! 1085 ;; Now count forward, one defun at a time 1086 (while (and (not (eobp)) 1087 (c-awk-end-of-defun1) 1088 (if (> (point) start-point) (setq arg (1- arg)) t) 1089 (> arg 0) 1090 (c-awk-beginning-of-defun -1)))) 1091 1092 (when (< arg 0) 1093 (setq end-point start-point) 1094 (while (and (not (bobp)) 1095 (c-awk-beginning-of-defun 1) 1096 (if (< (setq end-point (if (bobp) (point) 1097 (save-excursion (c-awk-end-of-defun1)))) 1098 start-point) 1099 (setq arg (1+ arg)) t) 1100 (< arg 0))) 1101 (goto-char (min start-point end-point))))))) 1102 1103 1104(cc-provide 'cc-awk) ; Changed from 'awk-mode, ACM 2002/5/21 1105 1106;;; arch-tag: c4836289-3aa4-4a59-9934-9ccc2bacccf3 1107;;; awk-mode.el ends here 1108