1/* $OpenBSD: v_word.c,v 1.7 2014/11/12 04:28:41 bentley Exp $ */ 2 3/*- 4 * Copyright (c) 1992, 1993, 1994 5 * The Regents of the University of California. All rights reserved. 6 * Copyright (c) 1992, 1993, 1994, 1995, 1996 7 * Keith Bostic. All rights reserved. 8 * 9 * See the LICENSE file for redistribution information. 10 */ 11 12#include "config.h" 13 14#include <sys/types.h> 15#include <sys/queue.h> 16#include <sys/time.h> 17 18#include <bitstring.h> 19#include <ctype.h> 20#include <limits.h> 21#include <stdio.h> 22 23#include "../common/common.h" 24#include "vi.h" 25 26/* 27 * There are two types of "words". Bigwords are easy -- groups of anything 28 * delimited by whitespace. Normal words are trickier. They are either a 29 * group of characters, numbers and underscores, or a group of anything but, 30 * delimited by whitespace. When for a word, if you're in whitespace, it's 31 * easy, just remove the whitespace and go to the beginning or end of the 32 * word. Otherwise, figure out if the next character is in a different group. 33 * If it is, go to the beginning or end of that group, otherwise, go to the 34 * beginning or end of the current group. The historic version of vi didn't 35 * get this right, so, for example, there were cases where "4e" was not the 36 * same as "eeee" -- in particular, single character words, and commands that 37 * began in whitespace were almost always handled incorrectly. To get it right 38 * you have to resolve the cursor after each search so that the look-ahead to 39 * figure out what type of "word" the cursor is in will be correct. 40 * 41 * Empty lines, and lines that consist of only white-space characters count 42 * as a single word, and the beginning and end of the file counts as an 43 * infinite number of words. 44 * 45 * Movements associated with commands are different than movement commands. 46 * For example, in "abc def", with the cursor on the 'a', "cw" is from 47 * 'a' to 'c', while "w" is from 'a' to 'd'. In general, trailing white 48 * space is discarded from the change movement. Another example is that, 49 * in the same string, a "cw" on any white space character replaces that 50 * single character, and nothing else. Ain't nothin' in here that's easy. 51 * 52 * One historic note -- in the original vi, the 'w', 'W' and 'B' commands 53 * would treat groups of empty lines as individual words, i.e. the command 54 * would move the cursor to each new empty line. The 'e' and 'E' commands 55 * would treat groups of empty lines as a single word, i.e. the first use 56 * would move past the group of lines. The 'b' command would just beep at 57 * you, or, if you did it from the start of the line as part of a motion 58 * command, go absolutely nuts. If the lines contained only white-space 59 * characters, the 'w' and 'W' commands would just beep at you, and the 'B', 60 * 'b', 'E' and 'e' commands would treat the group as a single word, and 61 * the 'B' and 'b' commands will treat the lines as individual words. This 62 * implementation treats all of these cases as a single white-space word. 63 */ 64 65enum which {BIGWORD, LITTLEWORD}; 66 67static int bword(SCR *, VICMD *, enum which); 68static int eword(SCR *, VICMD *, enum which); 69static int fword(SCR *, VICMD *, enum which); 70 71/* 72 * v_wordW -- [count]W 73 * Move forward a bigword at a time. 74 * 75 * PUBLIC: int v_wordW(SCR *, VICMD *); 76 */ 77int 78v_wordW(SCR *sp, VICMD *vp) 79{ 80 return (fword(sp, vp, BIGWORD)); 81} 82 83/* 84 * v_wordw -- [count]w 85 * Move forward a word at a time. 86 * 87 * PUBLIC: int v_wordw(SCR *, VICMD *); 88 */ 89int 90v_wordw(SCR *sp, VICMD *vp) 91{ 92 return (fword(sp, vp, LITTLEWORD)); 93} 94 95/* 96 * fword -- 97 * Move forward by words. 98 */ 99static int 100fword(SCR *sp, VICMD *vp, enum which type) 101{ 102 enum { INWORD, NOTWORD } state; 103 VCS cs; 104 u_long cnt; 105 106 cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1; 107 cs.cs_lno = vp->m_start.lno; 108 cs.cs_cno = vp->m_start.cno; 109 if (cs_init(sp, &cs)) 110 return (1); 111 112 /* 113 * If in white-space: 114 * If the count is 1, and it's a change command, we're done. 115 * Else, move to the first non-white-space character, which 116 * counts as a single word move. If it's a motion command, 117 * don't move off the end of the line. 118 */ 119 if (cs.cs_flags == CS_EMP || (cs.cs_flags == 0 && isblank(cs.cs_ch))) { 120 if (ISMOTION(vp) && cs.cs_flags != CS_EMP && cnt == 1) { 121 if (ISCMD(vp->rkp, 'c')) 122 return (0); 123 if (ISCMD(vp->rkp, 'd') || ISCMD(vp->rkp, 'y')) { 124 if (cs_fspace(sp, &cs)) 125 return (1); 126 goto ret; 127 } 128 } 129 if (cs_fblank(sp, &cs)) 130 return (1); 131 --cnt; 132 } 133 134 /* 135 * Cyclically move to the next word -- this involves skipping 136 * over word characters and then any trailing non-word characters. 137 * Note, for the 'w' command, the definition of a word keeps 138 * switching. 139 */ 140 if (type == BIGWORD) 141 while (cnt--) { 142 for (;;) { 143 if (cs_next(sp, &cs)) 144 return (1); 145 if (cs.cs_flags == CS_EOF) 146 goto ret; 147 if (cs.cs_flags != 0 || isblank(cs.cs_ch)) 148 break; 149 } 150 /* 151 * If a motion command and we're at the end of the 152 * last word, we're done. Delete and yank eat any 153 * trailing blanks, but we don't move off the end 154 * of the line regardless. 155 */ 156 if (cnt == 0 && ISMOTION(vp)) { 157 if ((ISCMD(vp->rkp, 'd') || 158 ISCMD(vp->rkp, 'y')) && 159 cs_fspace(sp, &cs)) 160 return (1); 161 break; 162 } 163 164 /* Eat whitespace characters. */ 165 if (cs_fblank(sp, &cs)) 166 return (1); 167 if (cs.cs_flags == CS_EOF) 168 goto ret; 169 } 170 else 171 while (cnt--) { 172 state = cs.cs_flags == 0 && 173 inword(cs.cs_ch) ? INWORD : NOTWORD; 174 for (;;) { 175 if (cs_next(sp, &cs)) 176 return (1); 177 if (cs.cs_flags == CS_EOF) 178 goto ret; 179 if (cs.cs_flags != 0 || isblank(cs.cs_ch)) 180 break; 181 if (state == INWORD) { 182 if (!inword(cs.cs_ch)) 183 break; 184 } else 185 if (inword(cs.cs_ch)) 186 break; 187 } 188 /* See comment above. */ 189 if (cnt == 0 && ISMOTION(vp)) { 190 if ((ISCMD(vp->rkp, 'd') || 191 ISCMD(vp->rkp, 'y')) && 192 cs_fspace(sp, &cs)) 193 return (1); 194 break; 195 } 196 197 /* Eat whitespace characters. */ 198 if (cs.cs_flags != 0 || isblank(cs.cs_ch)) 199 if (cs_fblank(sp, &cs)) 200 return (1); 201 if (cs.cs_flags == CS_EOF) 202 goto ret; 203 } 204 205 /* 206 * If we didn't move, we must be at EOF. 207 * 208 * !!! 209 * That's okay for motion commands, however. 210 */ 211ret: if (!ISMOTION(vp) && 212 cs.cs_lno == vp->m_start.lno && cs.cs_cno == vp->m_start.cno) { 213 v_eof(sp, &vp->m_start); 214 return (1); 215 } 216 217 /* Adjust the end of the range for motion commands. */ 218 vp->m_stop.lno = cs.cs_lno; 219 vp->m_stop.cno = cs.cs_cno; 220 if (ISMOTION(vp) && cs.cs_flags == 0) 221 --vp->m_stop.cno; 222 223 /* 224 * Non-motion commands move to the end of the range. Delete 225 * and yank stay at the start, ignore others. 226 */ 227 vp->m_final = ISMOTION(vp) ? vp->m_start : vp->m_stop; 228 return (0); 229} 230 231/* 232 * v_wordE -- [count]E 233 * Move forward to the end of the bigword. 234 * 235 * PUBLIC: int v_wordE(SCR *, VICMD *); 236 */ 237int 238v_wordE(SCR *sp, VICMD *vp) 239{ 240 return (eword(sp, vp, BIGWORD)); 241} 242 243/* 244 * v_worde -- [count]e 245 * Move forward to the end of the word. 246 * 247 * PUBLIC: int v_worde(SCR *, VICMD *); 248 */ 249int 250v_worde(SCR *sp, VICMD *vp) 251{ 252 return (eword(sp, vp, LITTLEWORD)); 253} 254 255/* 256 * eword -- 257 * Move forward to the end of the word. 258 */ 259static int 260eword(SCR *sp, VICMD *vp, enum which type) 261{ 262 enum { INWORD, NOTWORD } state; 263 VCS cs; 264 u_long cnt; 265 266 cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1; 267 cs.cs_lno = vp->m_start.lno; 268 cs.cs_cno = vp->m_start.cno; 269 if (cs_init(sp, &cs)) 270 return (1); 271 272 /* 273 * !!! 274 * If in whitespace, or the next character is whitespace, move past 275 * it. (This doesn't count as a word move.) Stay at the character 276 * past the current one, it sets word "state" for the 'e' command. 277 */ 278 if (cs.cs_flags == 0 && !isblank(cs.cs_ch)) { 279 if (cs_next(sp, &cs)) 280 return (1); 281 if (cs.cs_flags == 0 && !isblank(cs.cs_ch)) 282 goto start; 283 } 284 if (cs_fblank(sp, &cs)) 285 return (1); 286 287 /* 288 * Cyclically move to the next word -- this involves skipping 289 * over word characters and then any trailing non-word characters. 290 * Note, for the 'e' command, the definition of a word keeps 291 * switching. 292 */ 293start: if (type == BIGWORD) 294 while (cnt--) { 295 for (;;) { 296 if (cs_next(sp, &cs)) 297 return (1); 298 if (cs.cs_flags == CS_EOF) 299 goto ret; 300 if (cs.cs_flags != 0 || isblank(cs.cs_ch)) 301 break; 302 } 303 /* 304 * When we reach the start of the word after the last 305 * word, we're done. If we changed state, back up one 306 * to the end of the previous word. 307 */ 308 if (cnt == 0) { 309 if (cs.cs_flags == 0 && cs_prev(sp, &cs)) 310 return (1); 311 break; 312 } 313 314 /* Eat whitespace characters. */ 315 if (cs_fblank(sp, &cs)) 316 return (1); 317 if (cs.cs_flags == CS_EOF) 318 goto ret; 319 } 320 else 321 while (cnt--) { 322 state = cs.cs_flags == 0 && 323 inword(cs.cs_ch) ? INWORD : NOTWORD; 324 for (;;) { 325 if (cs_next(sp, &cs)) 326 return (1); 327 if (cs.cs_flags == CS_EOF) 328 goto ret; 329 if (cs.cs_flags != 0 || isblank(cs.cs_ch)) 330 break; 331 if (state == INWORD) { 332 if (!inword(cs.cs_ch)) 333 break; 334 } else 335 if (inword(cs.cs_ch)) 336 break; 337 } 338 /* See comment above. */ 339 if (cnt == 0) { 340 if (cs.cs_flags == 0 && cs_prev(sp, &cs)) 341 return (1); 342 break; 343 } 344 345 /* Eat whitespace characters. */ 346 if (cs.cs_flags != 0 || isblank(cs.cs_ch)) 347 if (cs_fblank(sp, &cs)) 348 return (1); 349 if (cs.cs_flags == CS_EOF) 350 goto ret; 351 } 352 353 /* 354 * If we didn't move, we must be at EOF. 355 * 356 * !!! 357 * That's okay for motion commands, however. 358 */ 359ret: if (!ISMOTION(vp) && 360 cs.cs_lno == vp->m_start.lno && cs.cs_cno == vp->m_start.cno) { 361 v_eof(sp, &vp->m_start); 362 return (1); 363 } 364 365 /* Set the end of the range for motion commands. */ 366 vp->m_stop.lno = cs.cs_lno; 367 vp->m_stop.cno = cs.cs_cno; 368 369 /* 370 * Non-motion commands move to the end of the range. 371 * Delete and yank stay at the start, ignore others. 372 */ 373 vp->m_final = ISMOTION(vp) ? vp->m_start : vp->m_stop; 374 return (0); 375} 376 377/* 378 * v_WordB -- [count]B 379 * Move backward a bigword at a time. 380 * 381 * PUBLIC: int v_wordB(SCR *, VICMD *); 382 */ 383int 384v_wordB(SCR *sp, VICMD *vp) 385{ 386 return (bword(sp, vp, BIGWORD)); 387} 388 389/* 390 * v_wordb -- [count]b 391 * Move backward a word at a time. 392 * 393 * PUBLIC: int v_wordb(SCR *, VICMD *); 394 */ 395int 396v_wordb(SCR *sp, VICMD *vp) 397{ 398 return (bword(sp, vp, LITTLEWORD)); 399} 400 401/* 402 * bword -- 403 * Move backward by words. 404 */ 405static int 406bword(SCR *sp, VICMD *vp, enum which type) 407{ 408 enum { INWORD, NOTWORD } state; 409 VCS cs; 410 u_long cnt; 411 412 cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1; 413 cs.cs_lno = vp->m_start.lno; 414 cs.cs_cno = vp->m_start.cno; 415 if (cs_init(sp, &cs)) 416 return (1); 417 418 /* 419 * !!! 420 * If in whitespace, or the previous character is whitespace, move 421 * past it. (This doesn't count as a word move.) Stay at the 422 * character before the current one, it sets word "state" for the 423 * 'b' command. 424 */ 425 if (cs.cs_flags == 0 && !isblank(cs.cs_ch)) { 426 if (cs_prev(sp, &cs)) 427 return (1); 428 if (cs.cs_flags == 0 && !isblank(cs.cs_ch)) 429 goto start; 430 } 431 if (cs_bblank(sp, &cs)) 432 return (1); 433 434 /* 435 * Cyclically move to the beginning of the previous word -- this 436 * involves skipping over word characters and then any trailing 437 * non-word characters. Note, for the 'b' command, the definition 438 * of a word keeps switching. 439 */ 440start: if (type == BIGWORD) 441 while (cnt--) { 442 for (;;) { 443 if (cs_prev(sp, &cs)) 444 return (1); 445 if (cs.cs_flags == CS_SOF) 446 goto ret; 447 if (cs.cs_flags != 0 || isblank(cs.cs_ch)) 448 break; 449 } 450 /* 451 * When we reach the end of the word before the last 452 * word, we're done. If we changed state, move forward 453 * one to the end of the next word. 454 */ 455 if (cnt == 0) { 456 if (cs.cs_flags == 0 && cs_next(sp, &cs)) 457 return (1); 458 break; 459 } 460 461 /* Eat whitespace characters. */ 462 if (cs_bblank(sp, &cs)) 463 return (1); 464 if (cs.cs_flags == CS_SOF) 465 goto ret; 466 } 467 else 468 while (cnt--) { 469 state = cs.cs_flags == 0 && 470 inword(cs.cs_ch) ? INWORD : NOTWORD; 471 for (;;) { 472 if (cs_prev(sp, &cs)) 473 return (1); 474 if (cs.cs_flags == CS_SOF) 475 goto ret; 476 if (cs.cs_flags != 0 || isblank(cs.cs_ch)) 477 break; 478 if (state == INWORD) { 479 if (!inword(cs.cs_ch)) 480 break; 481 } else 482 if (inword(cs.cs_ch)) 483 break; 484 } 485 /* See comment above. */ 486 if (cnt == 0) { 487 if (cs.cs_flags == 0 && cs_next(sp, &cs)) 488 return (1); 489 break; 490 } 491 492 /* Eat whitespace characters. */ 493 if (cs.cs_flags != 0 || isblank(cs.cs_ch)) 494 if (cs_bblank(sp, &cs)) 495 return (1); 496 if (cs.cs_flags == CS_SOF) 497 goto ret; 498 } 499 500 /* If we didn't move, we must be at SOF. */ 501ret: if (cs.cs_lno == vp->m_start.lno && cs.cs_cno == vp->m_start.cno) { 502 v_sof(sp, &vp->m_start); 503 return (1); 504 } 505 506 /* Set the end of the range for motion commands. */ 507 vp->m_stop.lno = cs.cs_lno; 508 vp->m_stop.cno = cs.cs_cno; 509 510 /* 511 * All commands move to the end of the range. Motion commands 512 * adjust the starting point to the character before the current 513 * one. 514 * 515 * !!! 516 * The historic vi didn't get this right -- the `yb' command yanked 517 * the right stuff and even updated the cursor value, but the cursor 518 * was not actually updated on the screen. 519 */ 520 vp->m_final = vp->m_stop; 521 if (ISMOTION(vp)) 522 --vp->m_start.cno; 523 return (0); 524} 525