119304Speter/*- 219304Speter * Copyright (c) 1992, 1993, 1994 319304Speter * The Regents of the University of California. All rights reserved. 419304Speter * Copyright (c) 1992, 1993, 1994, 1995, 1996 519304Speter * Keith Bostic. All rights reserved. 619304Speter * 719304Speter * See the LICENSE file for redistribution information. 819304Speter */ 919304Speter 1019304Speter#include "config.h" 1119304Speter 1219304Speter#ifndef lint 13254225Speterstatic const char sccsid[] = "$Id: v_word.c,v 10.7 2011/12/27 00:49:31 zy Exp $"; 1419304Speter#endif /* not lint */ 1519304Speter 1619304Speter#include <sys/types.h> 1719304Speter#include <sys/queue.h> 1819304Speter#include <sys/time.h> 1919304Speter 2019304Speter#include <bitstring.h> 2119304Speter#include <ctype.h> 2219304Speter#include <limits.h> 2319304Speter#include <stdio.h> 2419304Speter 2519304Speter#include "../common/common.h" 2619304Speter#include "vi.h" 2719304Speter 2819304Speter/* 2919304Speter * There are two types of "words". Bigwords are easy -- groups of anything 3019304Speter * delimited by whitespace. Normal words are trickier. They are either a 3119304Speter * group of characters, numbers and underscores, or a group of anything but, 3219304Speter * delimited by whitespace. When for a word, if you're in whitespace, it's 3319304Speter * easy, just remove the whitespace and go to the beginning or end of the 3419304Speter * word. Otherwise, figure out if the next character is in a different group. 3519304Speter * If it is, go to the beginning or end of that group, otherwise, go to the 3619304Speter * beginning or end of the current group. The historic version of vi didn't 3719304Speter * get this right, so, for example, there were cases where "4e" was not the 3819304Speter * same as "eeee" -- in particular, single character words, and commands that 3919304Speter * began in whitespace were almost always handled incorrectly. To get it right 4019304Speter * you have to resolve the cursor after each search so that the look-ahead to 4119304Speter * figure out what type of "word" the cursor is in will be correct. 4219304Speter * 4319304Speter * Empty lines, and lines that consist of only white-space characters count 4419304Speter * as a single word, and the beginning and end of the file counts as an 4519304Speter * infinite number of words. 4619304Speter * 4719304Speter * Movements associated with commands are different than movement commands. 4819304Speter * For example, in "abc def", with the cursor on the 'a', "cw" is from 4919304Speter * 'a' to 'c', while "w" is from 'a' to 'd'. In general, trailing white 5019304Speter * space is discarded from the change movement. Another example is that, 5119304Speter * in the same string, a "cw" on any white space character replaces that 5219304Speter * single character, and nothing else. Ain't nothin' in here that's easy. 5319304Speter * 5419304Speter * One historic note -- in the original vi, the 'w', 'W' and 'B' commands 5519304Speter * would treat groups of empty lines as individual words, i.e. the command 5619304Speter * would move the cursor to each new empty line. The 'e' and 'E' commands 5719304Speter * would treat groups of empty lines as a single word, i.e. the first use 5819304Speter * would move past the group of lines. The 'b' command would just beep at 5919304Speter * you, or, if you did it from the start of the line as part of a motion 6019304Speter * command, go absolutely nuts. If the lines contained only white-space 6119304Speter * characters, the 'w' and 'W' commands would just beep at you, and the 'B', 6219304Speter * 'b', 'E' and 'e' commands would treat the group as a single word, and 6319304Speter * the 'B' and 'b' commands will treat the lines as individual words. This 6419304Speter * implementation treats all of these cases as a single white-space word. 6519304Speter */ 6619304Speter 6719304Speterenum which {BIGWORD, LITTLEWORD}; 6819304Speter 6919304Speterstatic int bword __P((SCR *, VICMD *, enum which)); 7019304Speterstatic int eword __P((SCR *, VICMD *, enum which)); 7119304Speterstatic int fword __P((SCR *, VICMD *, enum which)); 7219304Speter 7319304Speter/* 7419304Speter * v_wordW -- [count]W 7519304Speter * Move forward a bigword at a time. 7619304Speter * 7719304Speter * PUBLIC: int v_wordW __P((SCR *, VICMD *)); 7819304Speter */ 7919304Speterint 80254225Speterv_wordW(SCR *sp, VICMD *vp) 8119304Speter{ 8219304Speter return (fword(sp, vp, BIGWORD)); 8319304Speter} 8419304Speter 8519304Speter/* 8619304Speter * v_wordw -- [count]w 8719304Speter * Move forward a word at a time. 8819304Speter * 8919304Speter * PUBLIC: int v_wordw __P((SCR *, VICMD *)); 9019304Speter */ 9119304Speterint 92254225Speterv_wordw(SCR *sp, VICMD *vp) 9319304Speter{ 9419304Speter return (fword(sp, vp, LITTLEWORD)); 9519304Speter} 9619304Speter 9719304Speter/* 9819304Speter * fword -- 9919304Speter * Move forward by words. 10019304Speter */ 10119304Speterstatic int 102254225Speterfword(SCR *sp, VICMD *vp, enum which type) 10319304Speter{ 10419304Speter enum { INWORD, NOTWORD } state; 10519304Speter VCS cs; 10619304Speter u_long cnt; 10719304Speter 10819304Speter cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1; 10919304Speter cs.cs_lno = vp->m_start.lno; 11019304Speter cs.cs_cno = vp->m_start.cno; 11119304Speter if (cs_init(sp, &cs)) 11219304Speter return (1); 11319304Speter 11419304Speter /* 11519304Speter * If in white-space: 11619304Speter * If the count is 1, and it's a change command, we're done. 11719304Speter * Else, move to the first non-white-space character, which 11819304Speter * counts as a single word move. If it's a motion command, 11919304Speter * don't move off the end of the line. 12019304Speter */ 121254225Speter if (cs.cs_flags == CS_EMP || (cs.cs_flags == 0 && ISBLANK(cs.cs_ch))) { 12219304Speter if (ISMOTION(vp) && cs.cs_flags != CS_EMP && cnt == 1) { 12319304Speter if (ISCMD(vp->rkp, 'c')) 12419304Speter return (0); 12519304Speter if (ISCMD(vp->rkp, 'd') || ISCMD(vp->rkp, 'y')) { 12619304Speter if (cs_fspace(sp, &cs)) 12719304Speter return (1); 12819304Speter goto ret; 12919304Speter } 13019304Speter } 13119304Speter if (cs_fblank(sp, &cs)) 13219304Speter return (1); 13319304Speter --cnt; 13419304Speter } 13519304Speter 13619304Speter /* 13719304Speter * Cyclically move to the next word -- this involves skipping 13819304Speter * over word characters and then any trailing non-word characters. 13919304Speter * Note, for the 'w' command, the definition of a word keeps 14019304Speter * switching. 14119304Speter */ 14219304Speter if (type == BIGWORD) 14319304Speter while (cnt--) { 14419304Speter for (;;) { 14519304Speter if (cs_next(sp, &cs)) 14619304Speter return (1); 14719304Speter if (cs.cs_flags == CS_EOF) 14819304Speter goto ret; 149254225Speter if (cs.cs_flags != 0 || ISBLANK(cs.cs_ch)) 15019304Speter break; 15119304Speter } 15219304Speter /* 15319304Speter * If a motion command and we're at the end of the 15419304Speter * last word, we're done. Delete and yank eat any 15519304Speter * trailing blanks, but we don't move off the end 15619304Speter * of the line regardless. 15719304Speter */ 15819304Speter if (cnt == 0 && ISMOTION(vp)) { 15919304Speter if ((ISCMD(vp->rkp, 'd') || 16019304Speter ISCMD(vp->rkp, 'y')) && 16119304Speter cs_fspace(sp, &cs)) 16219304Speter return (1); 16319304Speter break; 16419304Speter } 16519304Speter 16619304Speter /* Eat whitespace characters. */ 16719304Speter if (cs_fblank(sp, &cs)) 16819304Speter return (1); 16919304Speter if (cs.cs_flags == CS_EOF) 17019304Speter goto ret; 17119304Speter } 17219304Speter else 17319304Speter while (cnt--) { 17419304Speter state = cs.cs_flags == 0 && 17519304Speter inword(cs.cs_ch) ? INWORD : NOTWORD; 17619304Speter for (;;) { 17719304Speter if (cs_next(sp, &cs)) 17819304Speter return (1); 17919304Speter if (cs.cs_flags == CS_EOF) 18019304Speter goto ret; 181254225Speter if (cs.cs_flags != 0 || ISBLANK(cs.cs_ch)) 18219304Speter break; 18319304Speter if (state == INWORD) { 18419304Speter if (!inword(cs.cs_ch)) 18519304Speter break; 18619304Speter } else 18719304Speter if (inword(cs.cs_ch)) 18819304Speter break; 18919304Speter } 19019304Speter /* See comment above. */ 19119304Speter if (cnt == 0 && ISMOTION(vp)) { 19219304Speter if ((ISCMD(vp->rkp, 'd') || 19319304Speter ISCMD(vp->rkp, 'y')) && 19419304Speter cs_fspace(sp, &cs)) 19519304Speter return (1); 19619304Speter break; 19719304Speter } 19819304Speter 19919304Speter /* Eat whitespace characters. */ 200254225Speter if (cs.cs_flags != 0 || ISBLANK(cs.cs_ch)) 20119304Speter if (cs_fblank(sp, &cs)) 20219304Speter return (1); 20319304Speter if (cs.cs_flags == CS_EOF) 20419304Speter goto ret; 20519304Speter } 20619304Speter 20719304Speter /* 20819304Speter * If we didn't move, we must be at EOF. 20919304Speter * 21019304Speter * !!! 21119304Speter * That's okay for motion commands, however. 21219304Speter */ 21319304Speterret: if (!ISMOTION(vp) && 21419304Speter cs.cs_lno == vp->m_start.lno && cs.cs_cno == vp->m_start.cno) { 21519304Speter v_eof(sp, &vp->m_start); 21619304Speter return (1); 21719304Speter } 21819304Speter 21919304Speter /* Adjust the end of the range for motion commands. */ 22019304Speter vp->m_stop.lno = cs.cs_lno; 22119304Speter vp->m_stop.cno = cs.cs_cno; 22219304Speter if (ISMOTION(vp) && cs.cs_flags == 0) 22319304Speter --vp->m_stop.cno; 22419304Speter 22519304Speter /* 22619304Speter * Non-motion commands move to the end of the range. Delete 22719304Speter * and yank stay at the start, ignore others. 22819304Speter */ 22919304Speter vp->m_final = ISMOTION(vp) ? vp->m_start : vp->m_stop; 23019304Speter return (0); 23119304Speter} 23219304Speter 23319304Speter/* 23419304Speter * v_wordE -- [count]E 23519304Speter * Move forward to the end of the bigword. 23619304Speter * 23719304Speter * PUBLIC: int v_wordE __P((SCR *, VICMD *)); 23819304Speter */ 23919304Speterint 240254225Speterv_wordE(SCR *sp, VICMD *vp) 24119304Speter{ 24219304Speter return (eword(sp, vp, BIGWORD)); 24319304Speter} 24419304Speter 24519304Speter/* 24619304Speter * v_worde -- [count]e 24719304Speter * Move forward to the end of the word. 24819304Speter * 24919304Speter * PUBLIC: int v_worde __P((SCR *, VICMD *)); 25019304Speter */ 25119304Speterint 252254225Speterv_worde(SCR *sp, VICMD *vp) 25319304Speter{ 25419304Speter return (eword(sp, vp, LITTLEWORD)); 25519304Speter} 25619304Speter 25719304Speter/* 25819304Speter * eword -- 25919304Speter * Move forward to the end of the word. 26019304Speter */ 26119304Speterstatic int 262254225Spetereword(SCR *sp, VICMD *vp, enum which type) 26319304Speter{ 26419304Speter enum { INWORD, NOTWORD } state; 26519304Speter VCS cs; 26619304Speter u_long cnt; 26719304Speter 26819304Speter cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1; 26919304Speter cs.cs_lno = vp->m_start.lno; 27019304Speter cs.cs_cno = vp->m_start.cno; 27119304Speter if (cs_init(sp, &cs)) 27219304Speter return (1); 27319304Speter 27419304Speter /* 27519304Speter * !!! 27619304Speter * If in whitespace, or the next character is whitespace, move past 27719304Speter * it. (This doesn't count as a word move.) Stay at the character 27819304Speter * past the current one, it sets word "state" for the 'e' command. 27919304Speter */ 280254225Speter if (cs.cs_flags == 0 && !ISBLANK(cs.cs_ch)) { 28119304Speter if (cs_next(sp, &cs)) 28219304Speter return (1); 283254225Speter if (cs.cs_flags == 0 && !ISBLANK(cs.cs_ch)) 28419304Speter goto start; 28519304Speter } 28619304Speter if (cs_fblank(sp, &cs)) 28719304Speter return (1); 28819304Speter 28919304Speter /* 29019304Speter * Cyclically move to the next word -- this involves skipping 29119304Speter * over word characters and then any trailing non-word characters. 29219304Speter * Note, for the 'e' command, the definition of a word keeps 29319304Speter * switching. 29419304Speter */ 29519304Speterstart: if (type == BIGWORD) 29619304Speter while (cnt--) { 29719304Speter for (;;) { 29819304Speter if (cs_next(sp, &cs)) 29919304Speter return (1); 30019304Speter if (cs.cs_flags == CS_EOF) 30119304Speter goto ret; 302254225Speter if (cs.cs_flags != 0 || ISBLANK(cs.cs_ch)) 30319304Speter break; 30419304Speter } 30519304Speter /* 30619304Speter * When we reach the start of the word after the last 30719304Speter * word, we're done. If we changed state, back up one 30819304Speter * to the end of the previous word. 30919304Speter */ 31019304Speter if (cnt == 0) { 31119304Speter if (cs.cs_flags == 0 && cs_prev(sp, &cs)) 31219304Speter return (1); 31319304Speter break; 31419304Speter } 31519304Speter 31619304Speter /* Eat whitespace characters. */ 31719304Speter if (cs_fblank(sp, &cs)) 31819304Speter return (1); 31919304Speter if (cs.cs_flags == CS_EOF) 32019304Speter goto ret; 32119304Speter } 32219304Speter else 32319304Speter while (cnt--) { 32419304Speter state = cs.cs_flags == 0 && 32519304Speter inword(cs.cs_ch) ? INWORD : NOTWORD; 32619304Speter for (;;) { 32719304Speter if (cs_next(sp, &cs)) 32819304Speter return (1); 32919304Speter if (cs.cs_flags == CS_EOF) 33019304Speter goto ret; 331254225Speter if (cs.cs_flags != 0 || ISBLANK(cs.cs_ch)) 33219304Speter break; 33319304Speter if (state == INWORD) { 33419304Speter if (!inword(cs.cs_ch)) 33519304Speter break; 33619304Speter } else 33719304Speter if (inword(cs.cs_ch)) 33819304Speter break; 33919304Speter } 34019304Speter /* See comment above. */ 34119304Speter if (cnt == 0) { 34219304Speter if (cs.cs_flags == 0 && cs_prev(sp, &cs)) 34319304Speter return (1); 34419304Speter break; 34519304Speter } 34619304Speter 34719304Speter /* Eat whitespace characters. */ 348254225Speter if (cs.cs_flags != 0 || ISBLANK(cs.cs_ch)) 34919304Speter if (cs_fblank(sp, &cs)) 35019304Speter return (1); 35119304Speter if (cs.cs_flags == CS_EOF) 35219304Speter goto ret; 35319304Speter } 35419304Speter 35519304Speter /* 35619304Speter * If we didn't move, we must be at EOF. 35719304Speter * 35819304Speter * !!! 35919304Speter * That's okay for motion commands, however. 36019304Speter */ 36119304Speterret: if (!ISMOTION(vp) && 36219304Speter cs.cs_lno == vp->m_start.lno && cs.cs_cno == vp->m_start.cno) { 36319304Speter v_eof(sp, &vp->m_start); 36419304Speter return (1); 36519304Speter } 36619304Speter 36719304Speter /* Set the end of the range for motion commands. */ 36819304Speter vp->m_stop.lno = cs.cs_lno; 36919304Speter vp->m_stop.cno = cs.cs_cno; 37019304Speter 37119304Speter /* 37219304Speter * Non-motion commands move to the end of the range. 37319304Speter * Delete and yank stay at the start, ignore others. 37419304Speter */ 37519304Speter vp->m_final = ISMOTION(vp) ? vp->m_start : vp->m_stop; 37619304Speter return (0); 37719304Speter} 37819304Speter 37919304Speter/* 38019304Speter * v_WordB -- [count]B 38119304Speter * Move backward a bigword at a time. 38219304Speter * 38319304Speter * PUBLIC: int v_wordB __P((SCR *, VICMD *)); 38419304Speter */ 38519304Speterint 386254225Speterv_wordB(SCR *sp, VICMD *vp) 38719304Speter{ 38819304Speter return (bword(sp, vp, BIGWORD)); 38919304Speter} 39019304Speter 39119304Speter/* 39219304Speter * v_wordb -- [count]b 39319304Speter * Move backward a word at a time. 39419304Speter * 39519304Speter * PUBLIC: int v_wordb __P((SCR *, VICMD *)); 39619304Speter */ 39719304Speterint 398254225Speterv_wordb(SCR *sp, VICMD *vp) 39919304Speter{ 40019304Speter return (bword(sp, vp, LITTLEWORD)); 40119304Speter} 40219304Speter 40319304Speter/* 40419304Speter * bword -- 40519304Speter * Move backward by words. 40619304Speter */ 40719304Speterstatic int 408254225Speterbword(SCR *sp, VICMD *vp, enum which type) 40919304Speter{ 41019304Speter enum { INWORD, NOTWORD } state; 41119304Speter VCS cs; 41219304Speter u_long cnt; 41319304Speter 41419304Speter cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1; 41519304Speter cs.cs_lno = vp->m_start.lno; 41619304Speter cs.cs_cno = vp->m_start.cno; 41719304Speter if (cs_init(sp, &cs)) 41819304Speter return (1); 41919304Speter 42019304Speter /* 42119304Speter * !!! 42219304Speter * If in whitespace, or the previous character is whitespace, move 42319304Speter * past it. (This doesn't count as a word move.) Stay at the 42419304Speter * character before the current one, it sets word "state" for the 42519304Speter * 'b' command. 42619304Speter */ 427254225Speter if (cs.cs_flags == 0 && !ISBLANK(cs.cs_ch)) { 42819304Speter if (cs_prev(sp, &cs)) 42919304Speter return (1); 430254225Speter if (cs.cs_flags == 0 && !ISBLANK(cs.cs_ch)) 43119304Speter goto start; 43219304Speter } 43319304Speter if (cs_bblank(sp, &cs)) 43419304Speter return (1); 43519304Speter 43619304Speter /* 43719304Speter * Cyclically move to the beginning of the previous word -- this 43819304Speter * involves skipping over word characters and then any trailing 43919304Speter * non-word characters. Note, for the 'b' command, the definition 44019304Speter * of a word keeps switching. 44119304Speter */ 44219304Speterstart: if (type == BIGWORD) 44319304Speter while (cnt--) { 44419304Speter for (;;) { 44519304Speter if (cs_prev(sp, &cs)) 44619304Speter return (1); 44719304Speter if (cs.cs_flags == CS_SOF) 44819304Speter goto ret; 449254225Speter if (cs.cs_flags != 0 || ISBLANK(cs.cs_ch)) 45019304Speter break; 45119304Speter } 45219304Speter /* 45319304Speter * When we reach the end of the word before the last 45419304Speter * word, we're done. If we changed state, move forward 45519304Speter * one to the end of the next word. 45619304Speter */ 45719304Speter if (cnt == 0) { 45819304Speter if (cs.cs_flags == 0 && cs_next(sp, &cs)) 45919304Speter return (1); 46019304Speter break; 46119304Speter } 46219304Speter 46319304Speter /* Eat whitespace characters. */ 46419304Speter if (cs_bblank(sp, &cs)) 46519304Speter return (1); 46619304Speter if (cs.cs_flags == CS_SOF) 46719304Speter goto ret; 46819304Speter } 46919304Speter else 47019304Speter while (cnt--) { 47119304Speter state = cs.cs_flags == 0 && 47219304Speter inword(cs.cs_ch) ? INWORD : NOTWORD; 47319304Speter for (;;) { 47419304Speter if (cs_prev(sp, &cs)) 47519304Speter return (1); 47619304Speter if (cs.cs_flags == CS_SOF) 47719304Speter goto ret; 478254225Speter if (cs.cs_flags != 0 || ISBLANK(cs.cs_ch)) 47919304Speter break; 48019304Speter if (state == INWORD) { 48119304Speter if (!inword(cs.cs_ch)) 48219304Speter break; 48319304Speter } else 48419304Speter if (inword(cs.cs_ch)) 48519304Speter break; 48619304Speter } 48719304Speter /* See comment above. */ 48819304Speter if (cnt == 0) { 48919304Speter if (cs.cs_flags == 0 && cs_next(sp, &cs)) 49019304Speter return (1); 49119304Speter break; 49219304Speter } 49319304Speter 49419304Speter /* Eat whitespace characters. */ 495254225Speter if (cs.cs_flags != 0 || ISBLANK(cs.cs_ch)) 49619304Speter if (cs_bblank(sp, &cs)) 49719304Speter return (1); 49819304Speter if (cs.cs_flags == CS_SOF) 49919304Speter goto ret; 50019304Speter } 50119304Speter 50219304Speter /* If we didn't move, we must be at SOF. */ 50319304Speterret: if (cs.cs_lno == vp->m_start.lno && cs.cs_cno == vp->m_start.cno) { 50419304Speter v_sof(sp, &vp->m_start); 50519304Speter return (1); 50619304Speter } 50719304Speter 50819304Speter /* Set the end of the range for motion commands. */ 50919304Speter vp->m_stop.lno = cs.cs_lno; 51019304Speter vp->m_stop.cno = cs.cs_cno; 51119304Speter 51219304Speter /* 51319304Speter * All commands move to the end of the range. Motion commands 51419304Speter * adjust the starting point to the character before the current 51519304Speter * one. 51619304Speter * 51719304Speter * !!! 51819304Speter * The historic vi didn't get this right -- the `yb' command yanked 51919304Speter * the right stuff and even updated the cursor value, but the cursor 52019304Speter * was not actually updated on the screen. 52119304Speter */ 52219304Speter vp->m_final = vp->m_stop; 52319304Speter if (ISMOTION(vp)) 52419304Speter --vp->m_start.cno; 52519304Speter return (0); 52619304Speter} 527