1/* $NetBSD: v_word.c,v 1.2 2008/12/05 22:51:43 christos Exp $ */ 2 3/*- 4 * Copyright (c) 1992, 1993, 1994 5 * The Regents of the University of California. All rights reserved. 6 * Copyright (c) 1992, 1993, 1994, 1995, 1996 7 * Keith Bostic. All rights reserved. 8 * 9 * See the LICENSE file for redistribution information. 10 */ 11 12#include "config.h" 13 14#ifndef lint 15static const char sccsid[] = "Id: v_word.c,v 10.6 2001/06/25 15:19:36 skimo Exp (Berkeley) Date: 2001/06/25 15:19:36"; 16#endif /* not lint */ 17 18#include <sys/types.h> 19#include <sys/queue.h> 20#include <sys/time.h> 21 22#include <bitstring.h> 23#include <ctype.h> 24#include <limits.h> 25#include <stdio.h> 26 27#include "../common/common.h" 28#include "vi.h" 29 30/* 31 * There are two types of "words". Bigwords are easy -- groups of anything 32 * delimited by whitespace. Normal words are trickier. They are either a 33 * group of characters, numbers and underscores, or a group of anything but, 34 * delimited by whitespace. When for a word, if you're in whitespace, it's 35 * easy, just remove the whitespace and go to the beginning or end of the 36 * word. Otherwise, figure out if the next character is in a different group. 37 * If it is, go to the beginning or end of that group, otherwise, go to the 38 * beginning or end of the current group. The historic version of vi didn't 39 * get this right, so, for example, there were cases where "4e" was not the 40 * same as "eeee" -- in particular, single character words, and commands that 41 * began in whitespace were almost always handled incorrectly. To get it right 42 * you have to resolve the cursor after each search so that the look-ahead to 43 * figure out what type of "word" the cursor is in will be correct. 44 * 45 * Empty lines, and lines that consist of only white-space characters count 46 * as a single word, and the beginning and end of the file counts as an 47 * infinite number of words. 48 * 49 * Movements associated with commands are different than movement commands. 50 * For example, in "abc def", with the cursor on the 'a', "cw" is from 51 * 'a' to 'c', while "w" is from 'a' to 'd'. In general, trailing white 52 * space is discarded from the change movement. Another example is that, 53 * in the same string, a "cw" on any white space character replaces that 54 * single character, and nothing else. Ain't nothin' in here that's easy. 55 * 56 * One historic note -- in the original vi, the 'w', 'W' and 'B' commands 57 * would treat groups of empty lines as individual words, i.e. the command 58 * would move the cursor to each new empty line. The 'e' and 'E' commands 59 * would treat groups of empty lines as a single word, i.e. the first use 60 * would move past the group of lines. The 'b' command would just beep at 61 * you, or, if you did it from the start of the line as part of a motion 62 * command, go absolutely nuts. If the lines contained only white-space 63 * characters, the 'w' and 'W' commands would just beep at you, and the 'B', 64 * 'b', 'E' and 'e' commands would treat the group as a single word, and 65 * the 'B' and 'b' commands will treat the lines as individual words. This 66 * implementation treats all of these cases as a single white-space word. 67 */ 68 69enum which {BIGWORD, LITTLEWORD}; 70 71static int bword __P((SCR *, VICMD *, enum which)); 72static int eword __P((SCR *, VICMD *, enum which)); 73static int fword __P((SCR *, VICMD *, enum which)); 74 75/* 76 * v_wordW -- [count]W 77 * Move forward a bigword at a time. 78 * 79 * PUBLIC: int v_wordW __P((SCR *, VICMD *)); 80 */ 81int 82v_wordW(SCR *sp, VICMD *vp) 83{ 84 return (fword(sp, vp, BIGWORD)); 85} 86 87/* 88 * v_wordw -- [count]w 89 * Move forward a word at a time. 90 * 91 * PUBLIC: int v_wordw __P((SCR *, VICMD *)); 92 */ 93int 94v_wordw(SCR *sp, VICMD *vp) 95{ 96 return (fword(sp, vp, LITTLEWORD)); 97} 98 99/* 100 * fword -- 101 * Move forward by words. 102 */ 103static int 104fword(SCR *sp, VICMD *vp, enum which type) 105{ 106 enum { INWORD, NOTWORD } state; 107 VCS cs; 108 u_long cnt; 109 110 cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1; 111 cs.cs_lno = vp->m_start.lno; 112 cs.cs_cno = vp->m_start.cno; 113 if (cs_init(sp, &cs)) 114 return (1); 115 116 /* 117 * If in white-space: 118 * If the count is 1, and it's a change command, we're done. 119 * Else, move to the first non-white-space character, which 120 * counts as a single word move. If it's a motion command, 121 * don't move off the end of the line. 122 */ 123 if (cs.cs_flags == CS_EMP || (cs.cs_flags == 0 && ISBLANK2(cs.cs_ch))) { 124 if (ISMOTION(vp) && cs.cs_flags != CS_EMP && cnt == 1) { 125 if (ISCMD(vp->rkp, 'c')) 126 return (0); 127 if (ISCMD(vp->rkp, 'd') || ISCMD(vp->rkp, 'y')) { 128 if (cs_fspace(sp, &cs)) 129 return (1); 130 goto ret; 131 } 132 } 133 if (cs_fblank(sp, &cs)) 134 return (1); 135 --cnt; 136 } 137 138 /* 139 * Cyclically move to the next word -- this involves skipping 140 * over word characters and then any trailing non-word characters. 141 * Note, for the 'w' command, the definition of a word keeps 142 * switching. 143 */ 144 if (type == BIGWORD) 145 while (cnt--) { 146 for (;;) { 147 if (cs_next(sp, &cs)) 148 return (1); 149 if (cs.cs_flags == CS_EOF) 150 goto ret; 151 if (cs.cs_flags != 0 || ISBLANK2(cs.cs_ch)) 152 break; 153 } 154 /* 155 * If a motion command and we're at the end of the 156 * last word, we're done. Delete and yank eat any 157 * trailing blanks, but we don't move off the end 158 * of the line regardless. 159 */ 160 if (cnt == 0 && ISMOTION(vp)) { 161 if ((ISCMD(vp->rkp, 'd') || 162 ISCMD(vp->rkp, 'y')) && 163 cs_fspace(sp, &cs)) 164 return (1); 165 break; 166 } 167 168 /* Eat whitespace characters. */ 169 if (cs_fblank(sp, &cs)) 170 return (1); 171 if (cs.cs_flags == CS_EOF) 172 goto ret; 173 } 174 else 175 while (cnt--) { 176 state = cs.cs_flags == 0 && 177 inword(cs.cs_ch) ? INWORD : NOTWORD; 178 for (;;) { 179 if (cs_next(sp, &cs)) 180 return (1); 181 if (cs.cs_flags == CS_EOF) 182 goto ret; 183 if (cs.cs_flags != 0 || ISBLANK2(cs.cs_ch)) 184 break; 185 if (state == INWORD) { 186 if (!inword(cs.cs_ch)) 187 break; 188 } else 189 if (inword(cs.cs_ch)) 190 break; 191 } 192 /* See comment above. */ 193 if (cnt == 0 && ISMOTION(vp)) { 194 if ((ISCMD(vp->rkp, 'd') || 195 ISCMD(vp->rkp, 'y')) && 196 cs_fspace(sp, &cs)) 197 return (1); 198 break; 199 } 200 201 /* Eat whitespace characters. */ 202 if (cs.cs_flags != 0 || ISBLANK2(cs.cs_ch)) 203 if (cs_fblank(sp, &cs)) 204 return (1); 205 if (cs.cs_flags == CS_EOF) 206 goto ret; 207 } 208 209 /* 210 * If we didn't move, we must be at EOF. 211 * 212 * !!! 213 * That's okay for motion commands, however. 214 */ 215ret: if (!ISMOTION(vp) && 216 cs.cs_lno == vp->m_start.lno && cs.cs_cno == vp->m_start.cno) { 217 v_eof(sp, &vp->m_start); 218 return (1); 219 } 220 221 /* Adjust the end of the range for motion commands. */ 222 vp->m_stop.lno = cs.cs_lno; 223 vp->m_stop.cno = cs.cs_cno; 224 if (ISMOTION(vp) && cs.cs_flags == 0) 225 --vp->m_stop.cno; 226 227 /* 228 * Non-motion commands move to the end of the range. Delete 229 * and yank stay at the start, ignore others. 230 */ 231 vp->m_final = ISMOTION(vp) ? vp->m_start : vp->m_stop; 232 return (0); 233} 234 235/* 236 * v_wordE -- [count]E 237 * Move forward to the end of the bigword. 238 * 239 * PUBLIC: int v_wordE __P((SCR *, VICMD *)); 240 */ 241int 242v_wordE(SCR *sp, VICMD *vp) 243{ 244 return (eword(sp, vp, BIGWORD)); 245} 246 247/* 248 * v_worde -- [count]e 249 * Move forward to the end of the word. 250 * 251 * PUBLIC: int v_worde __P((SCR *, VICMD *)); 252 */ 253int 254v_worde(SCR *sp, VICMD *vp) 255{ 256 return (eword(sp, vp, LITTLEWORD)); 257} 258 259/* 260 * eword -- 261 * Move forward to the end of the word. 262 */ 263static int 264eword(SCR *sp, VICMD *vp, enum which type) 265{ 266 enum { INWORD, NOTWORD } state; 267 VCS cs; 268 u_long cnt; 269 270 cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1; 271 cs.cs_lno = vp->m_start.lno; 272 cs.cs_cno = vp->m_start.cno; 273 if (cs_init(sp, &cs)) 274 return (1); 275 276 /* 277 * !!! 278 * If in whitespace, or the next character is whitespace, move past 279 * it. (This doesn't count as a word move.) Stay at the character 280 * past the current one, it sets word "state" for the 'e' command. 281 */ 282 if (cs.cs_flags == 0 && !ISBLANK2(cs.cs_ch)) { 283 if (cs_next(sp, &cs)) 284 return (1); 285 if (cs.cs_flags == 0 && !ISBLANK2(cs.cs_ch)) 286 goto start; 287 } 288 if (cs_fblank(sp, &cs)) 289 return (1); 290 291 /* 292 * Cyclically move to the next word -- this involves skipping 293 * over word characters and then any trailing non-word characters. 294 * Note, for the 'e' command, the definition of a word keeps 295 * switching. 296 */ 297start: if (type == BIGWORD) 298 while (cnt--) { 299 for (;;) { 300 if (cs_next(sp, &cs)) 301 return (1); 302 if (cs.cs_flags == CS_EOF) 303 goto ret; 304 if (cs.cs_flags != 0 || ISBLANK2(cs.cs_ch)) 305 break; 306 } 307 /* 308 * When we reach the start of the word after the last 309 * word, we're done. If we changed state, back up one 310 * to the end of the previous word. 311 */ 312 if (cnt == 0) { 313 if (cs.cs_flags == 0 && cs_prev(sp, &cs)) 314 return (1); 315 break; 316 } 317 318 /* Eat whitespace characters. */ 319 if (cs_fblank(sp, &cs)) 320 return (1); 321 if (cs.cs_flags == CS_EOF) 322 goto ret; 323 } 324 else 325 while (cnt--) { 326 state = cs.cs_flags == 0 && 327 inword(cs.cs_ch) ? INWORD : NOTWORD; 328 for (;;) { 329 if (cs_next(sp, &cs)) 330 return (1); 331 if (cs.cs_flags == CS_EOF) 332 goto ret; 333 if (cs.cs_flags != 0 || ISBLANK2(cs.cs_ch)) 334 break; 335 if (state == INWORD) { 336 if (!inword(cs.cs_ch)) 337 break; 338 } else 339 if (inword(cs.cs_ch)) 340 break; 341 } 342 /* See comment above. */ 343 if (cnt == 0) { 344 if (cs.cs_flags == 0 && cs_prev(sp, &cs)) 345 return (1); 346 break; 347 } 348 349 /* Eat whitespace characters. */ 350 if (cs.cs_flags != 0 || ISBLANK2(cs.cs_ch)) 351 if (cs_fblank(sp, &cs)) 352 return (1); 353 if (cs.cs_flags == CS_EOF) 354 goto ret; 355 } 356 357 /* 358 * If we didn't move, we must be at EOF. 359 * 360 * !!! 361 * That's okay for motion commands, however. 362 */ 363ret: if (!ISMOTION(vp) && 364 cs.cs_lno == vp->m_start.lno && cs.cs_cno == vp->m_start.cno) { 365 v_eof(sp, &vp->m_start); 366 return (1); 367 } 368 369 /* Set the end of the range for motion commands. */ 370 vp->m_stop.lno = cs.cs_lno; 371 vp->m_stop.cno = cs.cs_cno; 372 373 /* 374 * Non-motion commands move to the end of the range. 375 * Delete and yank stay at the start, ignore others. 376 */ 377 vp->m_final = ISMOTION(vp) ? vp->m_start : vp->m_stop; 378 return (0); 379} 380 381/* 382 * v_WordB -- [count]B 383 * Move backward a bigword at a time. 384 * 385 * PUBLIC: int v_wordB __P((SCR *, VICMD *)); 386 */ 387int 388v_wordB(SCR *sp, VICMD *vp) 389{ 390 return (bword(sp, vp, BIGWORD)); 391} 392 393/* 394 * v_wordb -- [count]b 395 * Move backward a word at a time. 396 * 397 * PUBLIC: int v_wordb __P((SCR *, VICMD *)); 398 */ 399int 400v_wordb(SCR *sp, VICMD *vp) 401{ 402 return (bword(sp, vp, LITTLEWORD)); 403} 404 405/* 406 * bword -- 407 * Move backward by words. 408 */ 409static int 410bword(SCR *sp, VICMD *vp, enum which type) 411{ 412 enum { INWORD, NOTWORD } state; 413 VCS cs; 414 u_long cnt; 415 416 cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1; 417 cs.cs_lno = vp->m_start.lno; 418 cs.cs_cno = vp->m_start.cno; 419 if (cs_init(sp, &cs)) 420 return (1); 421 422 /* 423 * !!! 424 * If in whitespace, or the previous character is whitespace, move 425 * past it. (This doesn't count as a word move.) Stay at the 426 * character before the current one, it sets word "state" for the 427 * 'b' command. 428 */ 429 if (cs.cs_flags == 0 && !ISBLANK2(cs.cs_ch)) { 430 if (cs_prev(sp, &cs)) 431 return (1); 432 if (cs.cs_flags == 0 && !ISBLANK2(cs.cs_ch)) 433 goto start; 434 } 435 if (cs_bblank(sp, &cs)) 436 return (1); 437 438 /* 439 * Cyclically move to the beginning of the previous word -- this 440 * involves skipping over word characters and then any trailing 441 * non-word characters. Note, for the 'b' command, the definition 442 * of a word keeps switching. 443 */ 444start: if (type == BIGWORD) 445 while (cnt--) { 446 for (;;) { 447 if (cs_prev(sp, &cs)) 448 return (1); 449 if (cs.cs_flags == CS_SOF) 450 goto ret; 451 if (cs.cs_flags != 0 || ISBLANK2(cs.cs_ch)) 452 break; 453 } 454 /* 455 * When we reach the end of the word before the last 456 * word, we're done. If we changed state, move forward 457 * one to the end of the next word. 458 */ 459 if (cnt == 0) { 460 if (cs.cs_flags == 0 && cs_next(sp, &cs)) 461 return (1); 462 break; 463 } 464 465 /* Eat whitespace characters. */ 466 if (cs_bblank(sp, &cs)) 467 return (1); 468 if (cs.cs_flags == CS_SOF) 469 goto ret; 470 } 471 else 472 while (cnt--) { 473 state = cs.cs_flags == 0 && 474 inword(cs.cs_ch) ? INWORD : NOTWORD; 475 for (;;) { 476 if (cs_prev(sp, &cs)) 477 return (1); 478 if (cs.cs_flags == CS_SOF) 479 goto ret; 480 if (cs.cs_flags != 0 || ISBLANK2(cs.cs_ch)) 481 break; 482 if (state == INWORD) { 483 if (!inword(cs.cs_ch)) 484 break; 485 } else 486 if (inword(cs.cs_ch)) 487 break; 488 } 489 /* See comment above. */ 490 if (cnt == 0) { 491 if (cs.cs_flags == 0 && cs_next(sp, &cs)) 492 return (1); 493 break; 494 } 495 496 /* Eat whitespace characters. */ 497 if (cs.cs_flags != 0 || ISBLANK2(cs.cs_ch)) 498 if (cs_bblank(sp, &cs)) 499 return (1); 500 if (cs.cs_flags == CS_SOF) 501 goto ret; 502 } 503 504 /* If we didn't move, we must be at SOF. */ 505ret: if (cs.cs_lno == vp->m_start.lno && cs.cs_cno == vp->m_start.cno) { 506 v_sof(sp, &vp->m_start); 507 return (1); 508 } 509 510 /* Set the end of the range for motion commands. */ 511 vp->m_stop.lno = cs.cs_lno; 512 vp->m_stop.cno = cs.cs_cno; 513 514 /* 515 * All commands move to the end of the range. Motion commands 516 * adjust the starting point to the character before the current 517 * one. 518 * 519 * !!! 520 * The historic vi didn't get this right -- the `yb' command yanked 521 * the right stuff and even updated the cursor value, but the cursor 522 * was not actually updated on the screen. 523 */ 524 vp->m_final = vp->m_stop; 525 if (ISMOTION(vp)) 526 --vp->m_start.cno; 527 return (0); 528} 529