119304Speter/*- 219304Speter * Copyright (c) 1992, 1993, 1994 319304Speter * The Regents of the University of California. All rights reserved. 419304Speter * Copyright (c) 1992, 1993, 1994, 1995, 1996 519304Speter * Keith Bostic. All rights reserved. 619304Speter * 719304Speter * See the LICENSE file for redistribution information. 819304Speter */ 919304Speter 1019304Speter#include "config.h" 1119304Speter 1219304Speter#ifndef lint 1319304Speterstatic const char sccsid[] = "@(#)v_word.c 10.5 (Berkeley) 3/6/96"; 1419304Speter#endif /* not lint */ 1519304Speter 1619304Speter#include <sys/types.h> 1719304Speter#include <sys/queue.h> 1819304Speter#include <sys/time.h> 1919304Speter 2019304Speter#include <bitstring.h> 2119304Speter#include <ctype.h> 2219304Speter#include <limits.h> 2319304Speter#include <stdio.h> 2419304Speter 2519304Speter#include "../common/common.h" 2619304Speter#include "vi.h" 2719304Speter 2819304Speter/* 2919304Speter * There are two types of "words". Bigwords are easy -- groups of anything 3019304Speter * delimited by whitespace. Normal words are trickier. They are either a 3119304Speter * group of characters, numbers and underscores, or a group of anything but, 3219304Speter * delimited by whitespace. When for a word, if you're in whitespace, it's 3319304Speter * easy, just remove the whitespace and go to the beginning or end of the 3419304Speter * word. Otherwise, figure out if the next character is in a different group. 3519304Speter * If it is, go to the beginning or end of that group, otherwise, go to the 3619304Speter * beginning or end of the current group. The historic version of vi didn't 3719304Speter * get this right, so, for example, there were cases where "4e" was not the 3819304Speter * same as "eeee" -- in particular, single character words, and commands that 3919304Speter * began in whitespace were almost always handled incorrectly. To get it right 4019304Speter * you have to resolve the cursor after each search so that the look-ahead to 4119304Speter * figure out what type of "word" the cursor is in will be correct. 4219304Speter * 4319304Speter * Empty lines, and lines that consist of only white-space characters count 4419304Speter * as a single word, and the beginning and end of the file counts as an 4519304Speter * infinite number of words. 4619304Speter * 4719304Speter * Movements associated with commands are different than movement commands. 4819304Speter * For example, in "abc def", with the cursor on the 'a', "cw" is from 4919304Speter * 'a' to 'c', while "w" is from 'a' to 'd'. In general, trailing white 5019304Speter * space is discarded from the change movement. Another example is that, 5119304Speter * in the same string, a "cw" on any white space character replaces that 5219304Speter * single character, and nothing else. Ain't nothin' in here that's easy. 5319304Speter * 5419304Speter * One historic note -- in the original vi, the 'w', 'W' and 'B' commands 5519304Speter * would treat groups of empty lines as individual words, i.e. the command 5619304Speter * would move the cursor to each new empty line. The 'e' and 'E' commands 5719304Speter * would treat groups of empty lines as a single word, i.e. the first use 5819304Speter * would move past the group of lines. The 'b' command would just beep at 5919304Speter * you, or, if you did it from the start of the line as part of a motion 6019304Speter * command, go absolutely nuts. If the lines contained only white-space 6119304Speter * characters, the 'w' and 'W' commands would just beep at you, and the 'B', 6219304Speter * 'b', 'E' and 'e' commands would treat the group as a single word, and 6319304Speter * the 'B' and 'b' commands will treat the lines as individual words. This 6419304Speter * implementation treats all of these cases as a single white-space word. 6519304Speter */ 6619304Speter 6719304Speterenum which {BIGWORD, LITTLEWORD}; 6819304Speter 6919304Speterstatic int bword __P((SCR *, VICMD *, enum which)); 7019304Speterstatic int eword __P((SCR *, VICMD *, enum which)); 7119304Speterstatic int fword __P((SCR *, VICMD *, enum which)); 7219304Speter 7319304Speter/* 7419304Speter * v_wordW -- [count]W 7519304Speter * Move forward a bigword at a time. 7619304Speter * 7719304Speter * PUBLIC: int v_wordW __P((SCR *, VICMD *)); 7819304Speter */ 7919304Speterint 8019304Speterv_wordW(sp, vp) 8119304Speter SCR *sp; 8219304Speter VICMD *vp; 8319304Speter{ 8419304Speter return (fword(sp, vp, BIGWORD)); 8519304Speter} 8619304Speter 8719304Speter/* 8819304Speter * v_wordw -- [count]w 8919304Speter * Move forward a word at a time. 9019304Speter * 9119304Speter * PUBLIC: int v_wordw __P((SCR *, VICMD *)); 9219304Speter */ 9319304Speterint 9419304Speterv_wordw(sp, vp) 9519304Speter SCR *sp; 9619304Speter VICMD *vp; 9719304Speter{ 9819304Speter return (fword(sp, vp, LITTLEWORD)); 9919304Speter} 10019304Speter 10119304Speter/* 10219304Speter * fword -- 10319304Speter * Move forward by words. 10419304Speter */ 10519304Speterstatic int 10619304Speterfword(sp, vp, type) 10719304Speter SCR *sp; 10819304Speter VICMD *vp; 10919304Speter enum which type; 11019304Speter{ 11119304Speter enum { INWORD, NOTWORD } state; 11219304Speter VCS cs; 11319304Speter u_long cnt; 11419304Speter 11519304Speter cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1; 11619304Speter cs.cs_lno = vp->m_start.lno; 11719304Speter cs.cs_cno = vp->m_start.cno; 11819304Speter if (cs_init(sp, &cs)) 11919304Speter return (1); 12019304Speter 12119304Speter /* 12219304Speter * If in white-space: 12319304Speter * If the count is 1, and it's a change command, we're done. 12419304Speter * Else, move to the first non-white-space character, which 12519304Speter * counts as a single word move. If it's a motion command, 12619304Speter * don't move off the end of the line. 12719304Speter */ 12819304Speter if (cs.cs_flags == CS_EMP || cs.cs_flags == 0 && isblank(cs.cs_ch)) { 12919304Speter if (ISMOTION(vp) && cs.cs_flags != CS_EMP && cnt == 1) { 13019304Speter if (ISCMD(vp->rkp, 'c')) 13119304Speter return (0); 13219304Speter if (ISCMD(vp->rkp, 'd') || ISCMD(vp->rkp, 'y')) { 13319304Speter if (cs_fspace(sp, &cs)) 13419304Speter return (1); 13519304Speter goto ret; 13619304Speter } 13719304Speter } 13819304Speter if (cs_fblank(sp, &cs)) 13919304Speter return (1); 14019304Speter --cnt; 14119304Speter } 14219304Speter 14319304Speter /* 14419304Speter * Cyclically move to the next word -- this involves skipping 14519304Speter * over word characters and then any trailing non-word characters. 14619304Speter * Note, for the 'w' command, the definition of a word keeps 14719304Speter * switching. 14819304Speter */ 14919304Speter if (type == BIGWORD) 15019304Speter while (cnt--) { 15119304Speter for (;;) { 15219304Speter if (cs_next(sp, &cs)) 15319304Speter return (1); 15419304Speter if (cs.cs_flags == CS_EOF) 15519304Speter goto ret; 15619304Speter if (cs.cs_flags != 0 || isblank(cs.cs_ch)) 15719304Speter break; 15819304Speter } 15919304Speter /* 16019304Speter * If a motion command and we're at the end of the 16119304Speter * last word, we're done. Delete and yank eat any 16219304Speter * trailing blanks, but we don't move off the end 16319304Speter * of the line regardless. 16419304Speter */ 16519304Speter if (cnt == 0 && ISMOTION(vp)) { 16619304Speter if ((ISCMD(vp->rkp, 'd') || 16719304Speter ISCMD(vp->rkp, 'y')) && 16819304Speter cs_fspace(sp, &cs)) 16919304Speter return (1); 17019304Speter break; 17119304Speter } 17219304Speter 17319304Speter /* Eat whitespace characters. */ 17419304Speter if (cs_fblank(sp, &cs)) 17519304Speter return (1); 17619304Speter if (cs.cs_flags == CS_EOF) 17719304Speter goto ret; 17819304Speter } 17919304Speter else 18019304Speter while (cnt--) { 18119304Speter state = cs.cs_flags == 0 && 18219304Speter inword(cs.cs_ch) ? INWORD : NOTWORD; 18319304Speter for (;;) { 18419304Speter if (cs_next(sp, &cs)) 18519304Speter return (1); 18619304Speter if (cs.cs_flags == CS_EOF) 18719304Speter goto ret; 18819304Speter if (cs.cs_flags != 0 || isblank(cs.cs_ch)) 18919304Speter break; 19019304Speter if (state == INWORD) { 19119304Speter if (!inword(cs.cs_ch)) 19219304Speter break; 19319304Speter } else 19419304Speter if (inword(cs.cs_ch)) 19519304Speter break; 19619304Speter } 19719304Speter /* See comment above. */ 19819304Speter if (cnt == 0 && ISMOTION(vp)) { 19919304Speter if ((ISCMD(vp->rkp, 'd') || 20019304Speter ISCMD(vp->rkp, 'y')) && 20119304Speter cs_fspace(sp, &cs)) 20219304Speter return (1); 20319304Speter break; 20419304Speter } 20519304Speter 20619304Speter /* Eat whitespace characters. */ 20719304Speter if (cs.cs_flags != 0 || isblank(cs.cs_ch)) 20819304Speter if (cs_fblank(sp, &cs)) 20919304Speter return (1); 21019304Speter if (cs.cs_flags == CS_EOF) 21119304Speter goto ret; 21219304Speter } 21319304Speter 21419304Speter /* 21519304Speter * If we didn't move, we must be at EOF. 21619304Speter * 21719304Speter * !!! 21819304Speter * That's okay for motion commands, however. 21919304Speter */ 22019304Speterret: if (!ISMOTION(vp) && 22119304Speter cs.cs_lno == vp->m_start.lno && cs.cs_cno == vp->m_start.cno) { 22219304Speter v_eof(sp, &vp->m_start); 22319304Speter return (1); 22419304Speter } 22519304Speter 22619304Speter /* Adjust the end of the range for motion commands. */ 22719304Speter vp->m_stop.lno = cs.cs_lno; 22819304Speter vp->m_stop.cno = cs.cs_cno; 22919304Speter if (ISMOTION(vp) && cs.cs_flags == 0) 23019304Speter --vp->m_stop.cno; 23119304Speter 23219304Speter /* 23319304Speter * Non-motion commands move to the end of the range. Delete 23419304Speter * and yank stay at the start, ignore others. 23519304Speter */ 23619304Speter vp->m_final = ISMOTION(vp) ? vp->m_start : vp->m_stop; 23719304Speter return (0); 23819304Speter} 23919304Speter 24019304Speter/* 24119304Speter * v_wordE -- [count]E 24219304Speter * Move forward to the end of the bigword. 24319304Speter * 24419304Speter * PUBLIC: int v_wordE __P((SCR *, VICMD *)); 24519304Speter */ 24619304Speterint 24719304Speterv_wordE(sp, vp) 24819304Speter SCR *sp; 24919304Speter VICMD *vp; 25019304Speter{ 25119304Speter return (eword(sp, vp, BIGWORD)); 25219304Speter} 25319304Speter 25419304Speter/* 25519304Speter * v_worde -- [count]e 25619304Speter * Move forward to the end of the word. 25719304Speter * 25819304Speter * PUBLIC: int v_worde __P((SCR *, VICMD *)); 25919304Speter */ 26019304Speterint 26119304Speterv_worde(sp, vp) 26219304Speter SCR *sp; 26319304Speter VICMD *vp; 26419304Speter{ 26519304Speter return (eword(sp, vp, LITTLEWORD)); 26619304Speter} 26719304Speter 26819304Speter/* 26919304Speter * eword -- 27019304Speter * Move forward to the end of the word. 27119304Speter */ 27219304Speterstatic int 27319304Spetereword(sp, vp, type) 27419304Speter SCR *sp; 27519304Speter VICMD *vp; 27619304Speter enum which type; 27719304Speter{ 27819304Speter enum { INWORD, NOTWORD } state; 27919304Speter VCS cs; 28019304Speter u_long cnt; 28119304Speter 28219304Speter cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1; 28319304Speter cs.cs_lno = vp->m_start.lno; 28419304Speter cs.cs_cno = vp->m_start.cno; 28519304Speter if (cs_init(sp, &cs)) 28619304Speter return (1); 28719304Speter 28819304Speter /* 28919304Speter * !!! 29019304Speter * If in whitespace, or the next character is whitespace, move past 29119304Speter * it. (This doesn't count as a word move.) Stay at the character 29219304Speter * past the current one, it sets word "state" for the 'e' command. 29319304Speter */ 29419304Speter if (cs.cs_flags == 0 && !isblank(cs.cs_ch)) { 29519304Speter if (cs_next(sp, &cs)) 29619304Speter return (1); 29719304Speter if (cs.cs_flags == 0 && !isblank(cs.cs_ch)) 29819304Speter goto start; 29919304Speter } 30019304Speter if (cs_fblank(sp, &cs)) 30119304Speter return (1); 30219304Speter 30319304Speter /* 30419304Speter * Cyclically move to the next word -- this involves skipping 30519304Speter * over word characters and then any trailing non-word characters. 30619304Speter * Note, for the 'e' command, the definition of a word keeps 30719304Speter * switching. 30819304Speter */ 30919304Speterstart: if (type == BIGWORD) 31019304Speter while (cnt--) { 31119304Speter for (;;) { 31219304Speter if (cs_next(sp, &cs)) 31319304Speter return (1); 31419304Speter if (cs.cs_flags == CS_EOF) 31519304Speter goto ret; 31619304Speter if (cs.cs_flags != 0 || isblank(cs.cs_ch)) 31719304Speter break; 31819304Speter } 31919304Speter /* 32019304Speter * When we reach the start of the word after the last 32119304Speter * word, we're done. If we changed state, back up one 32219304Speter * to the end of the previous word. 32319304Speter */ 32419304Speter if (cnt == 0) { 32519304Speter if (cs.cs_flags == 0 && cs_prev(sp, &cs)) 32619304Speter return (1); 32719304Speter break; 32819304Speter } 32919304Speter 33019304Speter /* Eat whitespace characters. */ 33119304Speter if (cs_fblank(sp, &cs)) 33219304Speter return (1); 33319304Speter if (cs.cs_flags == CS_EOF) 33419304Speter goto ret; 33519304Speter } 33619304Speter else 33719304Speter while (cnt--) { 33819304Speter state = cs.cs_flags == 0 && 33919304Speter inword(cs.cs_ch) ? INWORD : NOTWORD; 34019304Speter for (;;) { 34119304Speter if (cs_next(sp, &cs)) 34219304Speter return (1); 34319304Speter if (cs.cs_flags == CS_EOF) 34419304Speter goto ret; 34519304Speter if (cs.cs_flags != 0 || isblank(cs.cs_ch)) 34619304Speter break; 34719304Speter if (state == INWORD) { 34819304Speter if (!inword(cs.cs_ch)) 34919304Speter break; 35019304Speter } else 35119304Speter if (inword(cs.cs_ch)) 35219304Speter break; 35319304Speter } 35419304Speter /* See comment above. */ 35519304Speter if (cnt == 0) { 35619304Speter if (cs.cs_flags == 0 && cs_prev(sp, &cs)) 35719304Speter return (1); 35819304Speter break; 35919304Speter } 36019304Speter 36119304Speter /* Eat whitespace characters. */ 36219304Speter if (cs.cs_flags != 0 || isblank(cs.cs_ch)) 36319304Speter if (cs_fblank(sp, &cs)) 36419304Speter return (1); 36519304Speter if (cs.cs_flags == CS_EOF) 36619304Speter goto ret; 36719304Speter } 36819304Speter 36919304Speter /* 37019304Speter * If we didn't move, we must be at EOF. 37119304Speter * 37219304Speter * !!! 37319304Speter * That's okay for motion commands, however. 37419304Speter */ 37519304Speterret: if (!ISMOTION(vp) && 37619304Speter cs.cs_lno == vp->m_start.lno && cs.cs_cno == vp->m_start.cno) { 37719304Speter v_eof(sp, &vp->m_start); 37819304Speter return (1); 37919304Speter } 38019304Speter 38119304Speter /* Set the end of the range for motion commands. */ 38219304Speter vp->m_stop.lno = cs.cs_lno; 38319304Speter vp->m_stop.cno = cs.cs_cno; 38419304Speter 38519304Speter /* 38619304Speter * Non-motion commands move to the end of the range. 38719304Speter * Delete and yank stay at the start, ignore others. 38819304Speter */ 38919304Speter vp->m_final = ISMOTION(vp) ? vp->m_start : vp->m_stop; 39019304Speter return (0); 39119304Speter} 39219304Speter 39319304Speter/* 39419304Speter * v_WordB -- [count]B 39519304Speter * Move backward a bigword at a time. 39619304Speter * 39719304Speter * PUBLIC: int v_wordB __P((SCR *, VICMD *)); 39819304Speter */ 39919304Speterint 40019304Speterv_wordB(sp, vp) 40119304Speter SCR *sp; 40219304Speter VICMD *vp; 40319304Speter{ 40419304Speter return (bword(sp, vp, BIGWORD)); 40519304Speter} 40619304Speter 40719304Speter/* 40819304Speter * v_wordb -- [count]b 40919304Speter * Move backward a word at a time. 41019304Speter * 41119304Speter * PUBLIC: int v_wordb __P((SCR *, VICMD *)); 41219304Speter */ 41319304Speterint 41419304Speterv_wordb(sp, vp) 41519304Speter SCR *sp; 41619304Speter VICMD *vp; 41719304Speter{ 41819304Speter return (bword(sp, vp, LITTLEWORD)); 41919304Speter} 42019304Speter 42119304Speter/* 42219304Speter * bword -- 42319304Speter * Move backward by words. 42419304Speter */ 42519304Speterstatic int 42619304Speterbword(sp, vp, type) 42719304Speter SCR *sp; 42819304Speter VICMD *vp; 42919304Speter enum which type; 43019304Speter{ 43119304Speter enum { INWORD, NOTWORD } state; 43219304Speter VCS cs; 43319304Speter u_long cnt; 43419304Speter 43519304Speter cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1; 43619304Speter cs.cs_lno = vp->m_start.lno; 43719304Speter cs.cs_cno = vp->m_start.cno; 43819304Speter if (cs_init(sp, &cs)) 43919304Speter return (1); 44019304Speter 44119304Speter /* 44219304Speter * !!! 44319304Speter * If in whitespace, or the previous character is whitespace, move 44419304Speter * past it. (This doesn't count as a word move.) Stay at the 44519304Speter * character before the current one, it sets word "state" for the 44619304Speter * 'b' command. 44719304Speter */ 44819304Speter if (cs.cs_flags == 0 && !isblank(cs.cs_ch)) { 44919304Speter if (cs_prev(sp, &cs)) 45019304Speter return (1); 45119304Speter if (cs.cs_flags == 0 && !isblank(cs.cs_ch)) 45219304Speter goto start; 45319304Speter } 45419304Speter if (cs_bblank(sp, &cs)) 45519304Speter return (1); 45619304Speter 45719304Speter /* 45819304Speter * Cyclically move to the beginning of the previous word -- this 45919304Speter * involves skipping over word characters and then any trailing 46019304Speter * non-word characters. Note, for the 'b' command, the definition 46119304Speter * of a word keeps switching. 46219304Speter */ 46319304Speterstart: if (type == BIGWORD) 46419304Speter while (cnt--) { 46519304Speter for (;;) { 46619304Speter if (cs_prev(sp, &cs)) 46719304Speter return (1); 46819304Speter if (cs.cs_flags == CS_SOF) 46919304Speter goto ret; 47019304Speter if (cs.cs_flags != 0 || isblank(cs.cs_ch)) 47119304Speter break; 47219304Speter } 47319304Speter /* 47419304Speter * When we reach the end of the word before the last 47519304Speter * word, we're done. If we changed state, move forward 47619304Speter * one to the end of the next word. 47719304Speter */ 47819304Speter if (cnt == 0) { 47919304Speter if (cs.cs_flags == 0 && cs_next(sp, &cs)) 48019304Speter return (1); 48119304Speter break; 48219304Speter } 48319304Speter 48419304Speter /* Eat whitespace characters. */ 48519304Speter if (cs_bblank(sp, &cs)) 48619304Speter return (1); 48719304Speter if (cs.cs_flags == CS_SOF) 48819304Speter goto ret; 48919304Speter } 49019304Speter else 49119304Speter while (cnt--) { 49219304Speter state = cs.cs_flags == 0 && 49319304Speter inword(cs.cs_ch) ? INWORD : NOTWORD; 49419304Speter for (;;) { 49519304Speter if (cs_prev(sp, &cs)) 49619304Speter return (1); 49719304Speter if (cs.cs_flags == CS_SOF) 49819304Speter goto ret; 49919304Speter if (cs.cs_flags != 0 || isblank(cs.cs_ch)) 50019304Speter break; 50119304Speter if (state == INWORD) { 50219304Speter if (!inword(cs.cs_ch)) 50319304Speter break; 50419304Speter } else 50519304Speter if (inword(cs.cs_ch)) 50619304Speter break; 50719304Speter } 50819304Speter /* See comment above. */ 50919304Speter if (cnt == 0) { 51019304Speter if (cs.cs_flags == 0 && cs_next(sp, &cs)) 51119304Speter return (1); 51219304Speter break; 51319304Speter } 51419304Speter 51519304Speter /* Eat whitespace characters. */ 51619304Speter if (cs.cs_flags != 0 || isblank(cs.cs_ch)) 51719304Speter if (cs_bblank(sp, &cs)) 51819304Speter return (1); 51919304Speter if (cs.cs_flags == CS_SOF) 52019304Speter goto ret; 52119304Speter } 52219304Speter 52319304Speter /* If we didn't move, we must be at SOF. */ 52419304Speterret: if (cs.cs_lno == vp->m_start.lno && cs.cs_cno == vp->m_start.cno) { 52519304Speter v_sof(sp, &vp->m_start); 52619304Speter return (1); 52719304Speter } 52819304Speter 52919304Speter /* Set the end of the range for motion commands. */ 53019304Speter vp->m_stop.lno = cs.cs_lno; 53119304Speter vp->m_stop.cno = cs.cs_cno; 53219304Speter 53319304Speter /* 53419304Speter * All commands move to the end of the range. Motion commands 53519304Speter * adjust the starting point to the character before the current 53619304Speter * one. 53719304Speter * 53819304Speter * !!! 53919304Speter * The historic vi didn't get this right -- the `yb' command yanked 54019304Speter * the right stuff and even updated the cursor value, but the cursor 54119304Speter * was not actually updated on the screen. 54219304Speter */ 54319304Speter vp->m_final = vp->m_stop; 54419304Speter if (ISMOTION(vp)) 54519304Speter --vp->m_start.cno; 54619304Speter return (0); 54719304Speter} 548