1// Written in the D programming language. 2 3/** 4String handling functions. 5 6$(SCRIPT inhibitQuickIndex = 1;) 7 8$(DIVC quickindex, 9$(BOOKTABLE , 10$(TR $(TH Category) $(TH Functions) ) 11$(TR $(TDNW Searching) 12 $(TD 13 $(MYREF column) 14 $(MYREF indexOf) 15 $(MYREF indexOfAny) 16 $(MYREF indexOfNeither) 17 $(MYREF lastIndexOf) 18 $(MYREF lastIndexOfAny) 19 $(MYREF lastIndexOfNeither) 20 ) 21) 22$(TR $(TDNW Comparison) 23 $(TD 24 $(MYREF isNumeric) 25 ) 26) 27$(TR $(TDNW Mutation) 28 $(TD 29 $(MYREF capitalize) 30 ) 31) 32$(TR $(TDNW Pruning and Filling) 33 $(TD 34 $(MYREF center) 35 $(MYREF chomp) 36 $(MYREF chompPrefix) 37 $(MYREF chop) 38 $(MYREF detabber) 39 $(MYREF detab) 40 $(MYREF entab) 41 $(MYREF entabber) 42 $(MYREF leftJustify) 43 $(MYREF outdent) 44 $(MYREF rightJustify) 45 $(MYREF strip) 46 $(MYREF stripLeft) 47 $(MYREF stripRight) 48 $(MYREF wrap) 49 ) 50) 51$(TR $(TDNW Substitution) 52 $(TD 53 $(MYREF abbrev) 54 $(MYREF soundex) 55 $(MYREF soundexer) 56 $(MYREF succ) 57 $(MYREF tr) 58 $(MYREF translate) 59 ) 60) 61$(TR $(TDNW Miscellaneous) 62 $(TD 63 $(MYREF assumeUTF) 64 $(MYREF fromStringz) 65 $(MYREF lineSplitter) 66 $(MYREF representation) 67 $(MYREF splitLines) 68 $(MYREF toStringz) 69 ) 70))) 71 72Objects of types $(D _string), $(D wstring), and $(D dstring) are value types 73and cannot be mutated element-by-element. For using mutation during building 74strings, use $(D char[]), $(D wchar[]), or $(D dchar[]). The $(D xxxstring) 75types are preferable because they don't exhibit undesired aliasing, thus 76making code more robust. 77 78The following functions are publicly imported: 79 80$(BOOKTABLE , 81$(TR $(TH Module) $(TH Functions) ) 82$(LEADINGROW Publicly imported functions) 83 $(TR $(TD std.algorithm) 84 $(TD 85 $(REF_SHORT cmp, std,algorithm,comparison) 86 $(REF_SHORT count, std,algorithm,searching) 87 $(REF_SHORT endsWith, std,algorithm,searching) 88 $(REF_SHORT startsWith, std,algorithm,searching) 89 )) 90 $(TR $(TD std.array) 91 $(TD 92 $(REF_SHORT join, std,array) 93 $(REF_SHORT replace, std,array) 94 $(REF_SHORT replaceInPlace, std,array) 95 $(REF_SHORT split, std,array) 96 $(REF_SHORT empty, std,array) 97 )) 98 $(TR $(TD std.format) 99 $(TD 100 $(REF_SHORT format, std,format) 101 $(REF_SHORT sformat, std,format) 102 )) 103 $(TR $(TD std.uni) 104 $(TD 105 $(REF_SHORT icmp, std,uni) 106 $(REF_SHORT toLower, std,uni) 107 $(REF_SHORT toLowerInPlace, std,uni) 108 $(REF_SHORT toUpper, std,uni) 109 $(REF_SHORT toUpperInPlace, std,uni) 110 )) 111) 112 113There is a rich set of functions for _string handling defined in other modules. 114Functions related to Unicode and ASCII are found in $(MREF std, uni) 115and $(MREF std, ascii), respectively. Other functions that have a 116wider generality than just strings can be found in $(MREF std, algorithm) 117and $(MREF std, range). 118 119See_Also: 120 $(LIST 121 $(MREF std, algorithm) and 122 $(MREF std, range) 123 for generic range algorithms 124 , 125 $(MREF std, ascii) 126 for functions that work with ASCII strings 127 , 128 $(MREF std, uni) 129 for functions that work with unicode strings 130 ) 131 132Copyright: Copyright Digital Mars 2007-. 133 134License: $(HTTP boost.org/LICENSE_1_0.txt, Boost License 1.0). 135 136Authors: $(HTTP digitalmars.com, Walter Bright), 137 $(HTTP erdani.org, Andrei Alexandrescu), 138 Jonathan M Davis, 139 and David L. 'SpottedTiger' Davis 140 141Source: $(PHOBOSSRC std/_string.d) 142 143*/ 144module std.string; 145 146version (unittest) 147{ 148private: 149 struct TestAliasedString 150 { 151 string get() @safe @nogc pure nothrow { return _s; } 152 alias get this; 153 @disable this(this); 154 string _s; 155 } 156 157 bool testAliasedString(alias func, Args...)(string s, Args args) 158 { 159 import std.algorithm.comparison : equal; 160 auto a = func(TestAliasedString(s), args); 161 auto b = func(s, args); 162 static if (is(typeof(equal(a, b)))) 163 { 164 // For ranges, compare contents instead of object identity. 165 return equal(a, b); 166 } 167 else 168 { 169 return a == b; 170 } 171 } 172} 173 174public import std.format : format, sformat; 175import std.typecons : Flag, Yes, No; 176public import std.uni : icmp, toLower, toLowerInPlace, toUpper, toUpperInPlace; 177 178import std.meta; // AliasSeq, staticIndexOf 179import std.range.primitives; // back, ElementEncodingType, ElementType, front, 180 // hasLength, hasSlicing, isBidirectionalRange, isForwardRange, isInfinite, 181 // isInputRange, isOutputRange, isRandomAccessRange, popBack, popFront, put, 182 // save; 183import std.traits; // isConvertibleToString, isNarrowString, isSomeChar, 184 // isSomeString, StringTypeOf, Unqual 185 186//public imports for backward compatibility 187public import std.algorithm.comparison : cmp; 188public import std.algorithm.searching : startsWith, endsWith, count; 189public import std.array : join, replace, replaceInPlace, split, empty; 190 191/* ************* Exceptions *************** */ 192 193/++ 194 Exception thrown on errors in std.string functions. 195 +/ 196class StringException : Exception 197{ 198 import std.exception : basicExceptionCtors; 199 200 /// 201 mixin basicExceptionCtors; 202} 203 204 205/++ 206 Params: 207 cString = A null-terminated c-style string. 208 209 Returns: A D-style array of $(D char) referencing the same string. The 210 returned array will retain the same type qualifiers as the input. 211 212 $(RED Important Note:) The returned array is a slice of the original buffer. 213 The original data is not changed and not copied. 214+/ 215 216inout(char)[] fromStringz(inout(char)* cString) @nogc @system pure nothrow { 217 import core.stdc.string : strlen; 218 return cString ? cString[0 .. strlen(cString)] : null; 219} 220 221/// 222@system pure unittest 223{ 224 assert(fromStringz(null) == null); 225 assert(fromStringz("foo") == "foo"); 226} 227 228/++ 229 Params: 230 s = A D-style string. 231 232 Returns: A C-style null-terminated string equivalent to $(D s). $(D s) 233 must not contain embedded $(D '\0')'s as any C function will treat the 234 first $(D '\0') that it sees as the end of the string. If $(D s.empty) is 235 $(D true), then a string containing only $(D '\0') is returned. 236 237 $(RED Important Note:) When passing a $(D char*) to a C function, and the C 238 function keeps it around for any reason, make sure that you keep a 239 reference to it in your D code. Otherwise, it may become invalid during a 240 garbage collection cycle and cause a nasty bug when the C code tries to use 241 it. 242 +/ 243immutable(char)* toStringz(const(char)[] s) @trusted pure nothrow 244out (result) 245{ 246 import core.stdc.string : strlen, memcmp; 247 if (result) 248 { 249 auto slen = s.length; 250 while (slen > 0 && s[slen-1] == 0) --slen; 251 assert(strlen(result) == slen); 252 assert(result[0 .. slen] == s[0 .. slen]); 253 } 254} 255body 256{ 257 import std.exception : assumeUnique; 258 /+ Unfortunately, this isn't reliable. 259 We could make this work if string literals are put 260 in read-only memory and we test if s[] is pointing into 261 that. 262 263 /* Peek past end of s[], if it's 0, no conversion necessary. 264 * Note that the compiler will put a 0 past the end of static 265 * strings, and the storage allocator will put a 0 past the end 266 * of newly allocated char[]'s. 267 */ 268 char* p = &s[0] + s.length; 269 if (*p == 0) 270 return s; 271 +/ 272 273 // Need to make a copy 274 auto copy = new char[s.length + 1]; 275 copy[0 .. s.length] = s[]; 276 copy[s.length] = 0; 277 278 return &assumeUnique(copy)[0]; 279} 280 281/++ Ditto +/ 282immutable(char)* toStringz(in string s) @trusted pure nothrow 283{ 284 if (s.empty) return "".ptr; 285 /* Peek past end of s[], if it's 0, no conversion necessary. 286 * Note that the compiler will put a 0 past the end of static 287 * strings, and the storage allocator will put a 0 past the end 288 * of newly allocated char[]'s. 289 */ 290 immutable p = s.ptr + s.length; 291 // Is p dereferenceable? A simple test: if the p points to an 292 // address multiple of 4, then conservatively assume the pointer 293 // might be pointing to a new block of memory, which might be 294 // unreadable. Otherwise, it's definitely pointing to valid 295 // memory. 296 if ((cast(size_t) p & 3) && *p == 0) 297 return &s[0]; 298 return toStringz(cast(const char[]) s); 299} 300 301/// 302pure nothrow @system unittest 303{ 304 import core.stdc.string : strlen; 305 import std.conv : to; 306 307 auto p = toStringz("foo"); 308 assert(strlen(p) == 3); 309 const(char)[] foo = "abbzxyzzy"; 310 p = toStringz(foo[3 .. 5]); 311 assert(strlen(p) == 2); 312 313 string test = ""; 314 p = toStringz(test); 315 assert(*p == 0); 316 317 test = "\0"; 318 p = toStringz(test); 319 assert(*p == 0); 320 321 test = "foo\0"; 322 p = toStringz(test); 323 assert(p[0] == 'f' && p[1] == 'o' && p[2] == 'o' && p[3] == 0); 324 325 const string test2 = ""; 326 p = toStringz(test2); 327 assert(*p == 0); 328} 329 330 331/** 332 Flag indicating whether a search is case-sensitive. 333*/ 334alias CaseSensitive = Flag!"caseSensitive"; 335 336/++ 337 Searches for character in range. 338 339 Params: 340 s = string or InputRange of characters to search in correct UTF format 341 c = character to search for 342 startIdx = starting index to a well-formed code point 343 cs = $(D Yes.caseSensitive) or $(D No.caseSensitive) 344 345 Returns: 346 the index of the first occurrence of $(D c) in $(D s) with 347 respect to the start index $(D startIdx). If $(D c) 348 is not found, then $(D -1) is returned. 349 If $(D c) is found the value of the returned index is at least 350 $(D startIdx). 351 If the parameters are not valid UTF, the result will still 352 be in the range [-1 .. s.length], but will not be reliable otherwise. 353 354 Throws: 355 If the sequence starting at $(D startIdx) does not represent a well 356 formed codepoint, then a $(REF UTFException, std,utf) may be thrown. 357 358 See_Also: $(REF countUntil, std,algorithm,searching) 359 +/ 360ptrdiff_t indexOf(Range)(Range s, in dchar c, 361 in CaseSensitive cs = Yes.caseSensitive) 362if (isInputRange!Range && isSomeChar!(ElementEncodingType!Range) && 363 !isConvertibleToString!Range) 364{ 365 static import std.ascii; 366 static import std.uni; 367 import std.utf : byDchar, byCodeUnit, UTFException, codeLength; 368 alias Char = Unqual!(ElementEncodingType!Range); 369 370 if (cs == Yes.caseSensitive) 371 { 372 static if (Char.sizeof == 1 && isSomeString!Range) 373 { 374 if (std.ascii.isASCII(c) && !__ctfe) 375 { // Plain old ASCII 376 static ptrdiff_t trustedmemchr(Range s, char c) @trusted 377 { 378 import core.stdc.string : memchr; 379 const p = cast(const(Char)*)memchr(s.ptr, c, s.length); 380 return p ? p - s.ptr : -1; 381 } 382 383 return trustedmemchr(s, cast(char) c); 384 } 385 } 386 387 static if (Char.sizeof == 1) 388 { 389 if (c <= 0x7F) 390 { 391 ptrdiff_t i; 392 foreach (const c2; s) 393 { 394 if (c == c2) 395 return i; 396 ++i; 397 } 398 } 399 else 400 { 401 ptrdiff_t i; 402 foreach (const c2; s.byDchar()) 403 { 404 if (c == c2) 405 return i; 406 i += codeLength!Char(c2); 407 } 408 } 409 } 410 else static if (Char.sizeof == 2) 411 { 412 if (c <= 0xFFFF) 413 { 414 ptrdiff_t i; 415 foreach (const c2; s) 416 { 417 if (c == c2) 418 return i; 419 ++i; 420 } 421 } 422 else if (c <= 0x10FFFF) 423 { 424 // Encode UTF-16 surrogate pair 425 const wchar c1 = cast(wchar)((((c - 0x10000) >> 10) & 0x3FF) + 0xD800); 426 const wchar c2 = cast(wchar)(((c - 0x10000) & 0x3FF) + 0xDC00); 427 ptrdiff_t i; 428 for (auto r = s.byCodeUnit(); !r.empty; r.popFront()) 429 { 430 if (c1 == r.front) 431 { 432 r.popFront(); 433 if (r.empty) // invalid UTF - missing second of pair 434 break; 435 if (c2 == r.front) 436 return i; 437 ++i; 438 } 439 ++i; 440 } 441 } 442 } 443 else static if (Char.sizeof == 4) 444 { 445 ptrdiff_t i; 446 foreach (const c2; s) 447 { 448 if (c == c2) 449 return i; 450 ++i; 451 } 452 } 453 else 454 static assert(0); 455 return -1; 456 } 457 else 458 { 459 if (std.ascii.isASCII(c)) 460 { // Plain old ASCII 461 immutable c1 = cast(char) std.ascii.toLower(c); 462 463 ptrdiff_t i; 464 foreach (const c2; s.byCodeUnit()) 465 { 466 if (c1 == std.ascii.toLower(c2)) 467 return i; 468 ++i; 469 } 470 } 471 else 472 { // c is a universal character 473 immutable c1 = std.uni.toLower(c); 474 475 ptrdiff_t i; 476 foreach (const c2; s.byDchar()) 477 { 478 if (c1 == std.uni.toLower(c2)) 479 return i; 480 i += codeLength!Char(c2); 481 } 482 } 483 } 484 return -1; 485} 486 487/// Ditto 488ptrdiff_t indexOf(Range)(Range s, in dchar c, in size_t startIdx, 489 in CaseSensitive cs = Yes.caseSensitive) 490if (isInputRange!Range && isSomeChar!(ElementEncodingType!Range) && 491 !isConvertibleToString!Range) 492{ 493 static if (isSomeString!(typeof(s)) || 494 (hasSlicing!(typeof(s)) && hasLength!(typeof(s)))) 495 { 496 if (startIdx < s.length) 497 { 498 ptrdiff_t foundIdx = indexOf(s[startIdx .. $], c, cs); 499 if (foundIdx != -1) 500 { 501 return foundIdx + cast(ptrdiff_t) startIdx; 502 } 503 } 504 } 505 else 506 { 507 foreach (i; 0 .. startIdx) 508 { 509 if (s.empty) 510 return -1; 511 s.popFront(); 512 } 513 ptrdiff_t foundIdx = indexOf(s, c, cs); 514 if (foundIdx != -1) 515 { 516 return foundIdx + cast(ptrdiff_t) startIdx; 517 } 518 } 519 return -1; 520} 521 522/// 523@safe pure unittest 524{ 525 import std.typecons : No; 526 527 string s = "Hello World"; 528 assert(indexOf(s, 'W') == 6); 529 assert(indexOf(s, 'Z') == -1); 530 assert(indexOf(s, 'w', No.caseSensitive) == 6); 531} 532 533/// 534@safe pure unittest 535{ 536 import std.typecons : No; 537 538 string s = "Hello World"; 539 assert(indexOf(s, 'W', 4) == 6); 540 assert(indexOf(s, 'Z', 100) == -1); 541 assert(indexOf(s, 'w', 3, No.caseSensitive) == 6); 542} 543 544ptrdiff_t indexOf(Range)(auto ref Range s, in dchar c, 545 in CaseSensitive cs = Yes.caseSensitive) 546if (isConvertibleToString!Range) 547{ 548 return indexOf!(StringTypeOf!Range)(s, c, cs); 549} 550 551ptrdiff_t indexOf(Range)(auto ref Range s, in dchar c, in size_t startIdx, 552 in CaseSensitive cs = Yes.caseSensitive) 553if (isConvertibleToString!Range) 554{ 555 return indexOf!(StringTypeOf!Range)(s, c, startIdx, cs); 556} 557 558@safe pure unittest 559{ 560 assert(testAliasedString!indexOf("std/string.d", '/')); 561} 562 563@safe pure unittest 564{ 565 import std.conv : to; 566 import std.exception : assertCTFEable; 567 import std.traits : EnumMembers; 568 import std.utf : byChar, byWchar, byDchar; 569 570 assertCTFEable!( 571 { 572 foreach (S; AliasSeq!(string, wstring, dstring)) 573 { 574 assert(indexOf(cast(S) null, cast(dchar)'a') == -1); 575 assert(indexOf(to!S("def"), cast(dchar)'a') == -1); 576 assert(indexOf(to!S("abba"), cast(dchar)'a') == 0); 577 assert(indexOf(to!S("def"), cast(dchar)'f') == 2); 578 579 assert(indexOf(to!S("def"), cast(dchar)'a', No.caseSensitive) == -1); 580 assert(indexOf(to!S("def"), cast(dchar)'a', No.caseSensitive) == -1); 581 assert(indexOf(to!S("Abba"), cast(dchar)'a', No.caseSensitive) == 0); 582 assert(indexOf(to!S("def"), cast(dchar)'F', No.caseSensitive) == 2); 583 assert(indexOf(to!S("��def"), '��', No.caseSensitive) == 0); 584 585 S sPlts = "Mars: the fourth Rock (Planet) from the Sun."; 586 assert(indexOf("def", cast(char)'f', No.caseSensitive) == 2); 587 assert(indexOf(sPlts, cast(char)'P', No.caseSensitive) == 23); 588 assert(indexOf(sPlts, cast(char)'R', No.caseSensitive) == 2); 589 } 590 591 foreach (cs; EnumMembers!CaseSensitive) 592 { 593 assert(indexOf("hello\U00010143\u0100\U00010143", '\u0100', cs) == 9); 594 assert(indexOf("hello\U00010143\u0100\U00010143"w, '\u0100', cs) == 7); 595 assert(indexOf("hello\U00010143\u0100\U00010143"d, '\u0100', cs) == 6); 596 597 assert(indexOf("hello\U00010143\u0100\U00010143".byChar, '\u0100', cs) == 9); 598 assert(indexOf("hello\U00010143\u0100\U00010143".byWchar, '\u0100', cs) == 7); 599 assert(indexOf("hello\U00010143\u0100\U00010143".byDchar, '\u0100', cs) == 6); 600 601 assert(indexOf("hello\U000007FF\u0100\U00010143".byChar, 'l', cs) == 2); 602 assert(indexOf("hello\U000007FF\u0100\U00010143".byChar, '\u0100', cs) == 7); 603 assert(indexOf("hello\U0000EFFF\u0100\U00010143".byChar, '\u0100', cs) == 8); 604 605 assert(indexOf("hello\U00010100".byWchar, '\U00010100', cs) == 5); 606 assert(indexOf("hello\U00010100".byWchar, '\U00010101', cs) == -1); 607 } 608 609 char[10] fixedSizeArray = "0123456789"; 610 assert(indexOf(fixedSizeArray, '2') == 2); 611 }); 612} 613 614@safe pure unittest 615{ 616 assert(testAliasedString!indexOf("std/string.d", '/', 3)); 617} 618 619@safe pure unittest 620{ 621 import std.conv : to; 622 import std.traits : EnumMembers; 623 import std.utf : byCodeUnit, byChar, byWchar; 624 625 assert("hello".byCodeUnit.indexOf(cast(dchar)'l', 1) == 2); 626 assert("hello".byWchar.indexOf(cast(dchar)'l', 1) == 2); 627 assert("hello".byWchar.indexOf(cast(dchar)'l', 6) == -1); 628 629 foreach (S; AliasSeq!(string, wstring, dstring)) 630 { 631 assert(indexOf(cast(S) null, cast(dchar)'a', 1) == -1); 632 assert(indexOf(to!S("def"), cast(dchar)'a', 1) == -1); 633 assert(indexOf(to!S("abba"), cast(dchar)'a', 1) == 3); 634 assert(indexOf(to!S("def"), cast(dchar)'f', 1) == 2); 635 636 assert((to!S("def")).indexOf(cast(dchar)'a', 1, 637 No.caseSensitive) == -1); 638 assert(indexOf(to!S("def"), cast(dchar)'a', 1, 639 No.caseSensitive) == -1); 640 assert(indexOf(to!S("def"), cast(dchar)'a', 12, 641 No.caseSensitive) == -1); 642 assert(indexOf(to!S("AbbA"), cast(dchar)'a', 2, 643 No.caseSensitive) == 3); 644 assert(indexOf(to!S("def"), cast(dchar)'F', 2, No.caseSensitive) == 2); 645 646 S sPlts = "Mars: the fourth Rock (Planet) from the Sun."; 647 assert(indexOf("def", cast(char)'f', cast(uint) 2, 648 No.caseSensitive) == 2); 649 assert(indexOf(sPlts, cast(char)'P', 12, No.caseSensitive) == 23); 650 assert(indexOf(sPlts, cast(char)'R', cast(ulong) 1, 651 No.caseSensitive) == 2); 652 } 653 654 foreach (cs; EnumMembers!CaseSensitive) 655 { 656 assert(indexOf("hello\U00010143\u0100\U00010143", '\u0100', 2, cs) 657 == 9); 658 assert(indexOf("hello\U00010143\u0100\U00010143"w, '\u0100', 3, cs) 659 == 7); 660 assert(indexOf("hello\U00010143\u0100\U00010143"d, '\u0100', 6, cs) 661 == 6); 662 } 663} 664 665/++ 666 Searches for substring in $(D s). 667 668 Params: 669 s = string or ForwardRange of characters to search in correct UTF format 670 sub = substring to search for 671 startIdx = the index into s to start searching from 672 cs = $(D Yes.caseSensitive) or $(D No.caseSensitive) 673 674 Returns: 675 the index of the first occurrence of $(D sub) in $(D s) with 676 respect to the start index $(D startIdx). If $(D sub) is not found, 677 then $(D -1) is returned. 678 If the arguments are not valid UTF, the result will still 679 be in the range [-1 .. s.length], but will not be reliable otherwise. 680 If $(D sub) is found the value of the returned index is at least 681 $(D startIdx). 682 683 Throws: 684 If the sequence starting at $(D startIdx) does not represent a well 685 formed codepoint, then a $(REF UTFException, std,utf) may be thrown. 686 687 Bugs: 688 Does not work with case insensitive strings where the mapping of 689 tolower and toupper is not 1:1. 690 +/ 691ptrdiff_t indexOf(Range, Char)(Range s, const(Char)[] sub, 692 in CaseSensitive cs = Yes.caseSensitive) 693if (isForwardRange!Range && isSomeChar!(ElementEncodingType!Range) && 694 isSomeChar!Char) 695{ 696 alias Char1 = Unqual!(ElementEncodingType!Range); 697 698 static if (isSomeString!Range) 699 { 700 import std.algorithm.searching : find; 701 702 const(Char1)[] balance; 703 if (cs == Yes.caseSensitive) 704 { 705 balance = find(s, sub); 706 } 707 else 708 { 709 balance = find! 710 ((a, b) => toLower(a) == toLower(b)) 711 (s, sub); 712 } 713 return () @trusted { return balance.empty ? -1 : balance.ptr - s.ptr; } (); 714 } 715 else 716 { 717 if (s.empty) 718 return -1; 719 if (sub.empty) 720 return 0; // degenerate case 721 722 import std.utf : byDchar, codeLength; 723 auto subr = sub.byDchar; // decode sub[] by dchar's 724 dchar sub0 = subr.front; // cache first character of sub[] 725 subr.popFront(); 726 727 // Special case for single character search 728 if (subr.empty) 729 return indexOf(s, sub0, cs); 730 731 if (cs == No.caseSensitive) 732 sub0 = toLower(sub0); 733 734 /* Classic double nested loop search algorithm 735 */ 736 ptrdiff_t index = 0; // count code unit index into s 737 for (auto sbydchar = s.byDchar(); !sbydchar.empty; sbydchar.popFront()) 738 { 739 dchar c2 = sbydchar.front; 740 if (cs == No.caseSensitive) 741 c2 = toLower(c2); 742 if (c2 == sub0) 743 { 744 auto s2 = sbydchar.save; // why s must be a forward range 745 foreach (c; subr.save) 746 { 747 s2.popFront(); 748 if (s2.empty) 749 return -1; 750 if (cs == Yes.caseSensitive ? c != s2.front 751 : toLower(c) != toLower(s2.front) 752 ) 753 goto Lnext; 754 } 755 return index; 756 } 757 Lnext: 758 index += codeLength!Char1(c2); 759 } 760 return -1; 761 } 762} 763 764/// Ditto 765ptrdiff_t indexOf(Char1, Char2)(const(Char1)[] s, const(Char2)[] sub, 766 in size_t startIdx, in CaseSensitive cs = Yes.caseSensitive) 767@safe 768if (isSomeChar!Char1 && isSomeChar!Char2) 769{ 770 if (startIdx < s.length) 771 { 772 ptrdiff_t foundIdx = indexOf(s[startIdx .. $], sub, cs); 773 if (foundIdx != -1) 774 { 775 return foundIdx + cast(ptrdiff_t) startIdx; 776 } 777 } 778 return -1; 779} 780 781/// 782@safe pure unittest 783{ 784 import std.typecons : No; 785 786 string s = "Hello World"; 787 assert(indexOf(s, "Wo", 4) == 6); 788 assert(indexOf(s, "Zo", 100) == -1); 789 assert(indexOf(s, "wo", 3, No.caseSensitive) == 6); 790} 791 792/// 793@safe pure unittest 794{ 795 import std.typecons : No; 796 797 string s = "Hello World"; 798 assert(indexOf(s, "Wo") == 6); 799 assert(indexOf(s, "Zo") == -1); 800 assert(indexOf(s, "wO", No.caseSensitive) == 6); 801} 802 803ptrdiff_t indexOf(Range, Char)(auto ref Range s, const(Char)[] sub, 804 in CaseSensitive cs = Yes.caseSensitive) 805if (!(isForwardRange!Range && isSomeChar!(ElementEncodingType!Range) && 806 isSomeChar!Char) && 807 is(StringTypeOf!Range)) 808{ 809 return indexOf!(StringTypeOf!Range)(s, sub, cs); 810} 811 812@safe pure unittest 813{ 814 assert(testAliasedString!indexOf("std/string.d", "string")); 815} 816 817@safe pure unittest 818{ 819 import std.conv : to; 820 import std.exception : assertCTFEable; 821 import std.traits : EnumMembers; 822 823 assertCTFEable!( 824 { 825 foreach (S; AliasSeq!(string, wstring, dstring)) 826 { 827 foreach (T; AliasSeq!(string, wstring, dstring)) 828 (){ // avoid slow optimizations for large functions @@@BUG@@@ 2396 829 assert(indexOf(cast(S) null, to!T("a")) == -1); 830 assert(indexOf(to!S("def"), to!T("a")) == -1); 831 assert(indexOf(to!S("abba"), to!T("a")) == 0); 832 assert(indexOf(to!S("def"), to!T("f")) == 2); 833 assert(indexOf(to!S("dfefffg"), to!T("fff")) == 3); 834 assert(indexOf(to!S("dfeffgfff"), to!T("fff")) == 6); 835 836 assert(indexOf(to!S("dfeffgfff"), to!T("a"), No.caseSensitive) == -1); 837 assert(indexOf(to!S("def"), to!T("a"), No.caseSensitive) == -1); 838 assert(indexOf(to!S("abba"), to!T("a"), No.caseSensitive) == 0); 839 assert(indexOf(to!S("def"), to!T("f"), No.caseSensitive) == 2); 840 assert(indexOf(to!S("dfefffg"), to!T("fff"), No.caseSensitive) == 3); 841 assert(indexOf(to!S("dfeffgfff"), to!T("fff"), No.caseSensitive) == 6); 842 843 S sPlts = "Mars: the fourth Rock (Planet) from the Sun."; 844 S sMars = "Who\'s \'My Favorite Maritian?\'"; 845 846 assert(indexOf(sMars, to!T("MY fAVe"), No.caseSensitive) == -1); 847 assert(indexOf(sMars, to!T("mY fAVOriTe"), No.caseSensitive) == 7); 848 assert(indexOf(sPlts, to!T("mArS:"), No.caseSensitive) == 0); 849 assert(indexOf(sPlts, to!T("rOcK"), No.caseSensitive) == 17); 850 assert(indexOf(sPlts, to!T("Un."), No.caseSensitive) == 41); 851 assert(indexOf(sPlts, to!T(sPlts), No.caseSensitive) == 0); 852 853 assert(indexOf("\u0100", to!T("\u0100"), No.caseSensitive) == 0); 854 855 // Thanks to Carlos Santander B. and zwang 856 assert(indexOf("sus mejores cortesanos. Se embarcaron en el puerto de Dubai y", 857 to!T("page-break-before"), No.caseSensitive) == -1); 858 }(); 859 860 foreach (cs; EnumMembers!CaseSensitive) 861 { 862 assert(indexOf("hello\U00010143\u0100\U00010143", to!S("\u0100"), cs) == 9); 863 assert(indexOf("hello\U00010143\u0100\U00010143"w, to!S("\u0100"), cs) == 7); 864 assert(indexOf("hello\U00010143\u0100\U00010143"d, to!S("\u0100"), cs) == 6); 865 } 866 } 867 }); 868} 869 870@safe pure @nogc nothrow 871unittest 872{ 873 import std.traits : EnumMembers; 874 import std.utf : byWchar; 875 876 foreach (cs; EnumMembers!CaseSensitive) 877 { 878 assert(indexOf("".byWchar, "", cs) == -1); 879 assert(indexOf("hello".byWchar, "", cs) == 0); 880 assert(indexOf("hello".byWchar, "l", cs) == 2); 881 assert(indexOf("heLLo".byWchar, "LL", cs) == 2); 882 assert(indexOf("hello".byWchar, "lox", cs) == -1); 883 assert(indexOf("hello".byWchar, "betty", cs) == -1); 884 assert(indexOf("hello\U00010143\u0100*\U00010143".byWchar, "\u0100*", cs) == 7); 885 } 886} 887 888@safe pure unittest 889{ 890 import std.conv : to; 891 import std.traits : EnumMembers; 892 893 foreach (S; AliasSeq!(string, wstring, dstring)) 894 { 895 foreach (T; AliasSeq!(string, wstring, dstring)) 896 (){ // avoid slow optimizations for large functions @@@BUG@@@ 2396 897 assert(indexOf(cast(S) null, to!T("a"), 1337) == -1); 898 assert(indexOf(to!S("def"), to!T("a"), 0) == -1); 899 assert(indexOf(to!S("abba"), to!T("a"), 2) == 3); 900 assert(indexOf(to!S("def"), to!T("f"), 1) == 2); 901 assert(indexOf(to!S("dfefffg"), to!T("fff"), 1) == 3); 902 assert(indexOf(to!S("dfeffgfff"), to!T("fff"), 5) == 6); 903 904 assert(indexOf(to!S("dfeffgfff"), to!T("a"), 1, No.caseSensitive) == -1); 905 assert(indexOf(to!S("def"), to!T("a"), 2, No.caseSensitive) == -1); 906 assert(indexOf(to!S("abba"), to!T("a"), 3, No.caseSensitive) == 3); 907 assert(indexOf(to!S("def"), to!T("f"), 1, No.caseSensitive) == 2); 908 assert(indexOf(to!S("dfefffg"), to!T("fff"), 2, No.caseSensitive) == 3); 909 assert(indexOf(to!S("dfeffgfff"), to!T("fff"), 4, No.caseSensitive) == 6); 910 assert(indexOf(to!S("dfeffgfff����"), to!T("����"), 9, No.caseSensitive) == 9, 911 to!string(indexOf(to!S("dfeffgfff����"), to!T("����"), 9, No.caseSensitive)) 912 ~ " " ~ S.stringof ~ " " ~ T.stringof); 913 914 S sPlts = "Mars: the fourth Rock (Planet) from the Sun."; 915 S sMars = "Who\'s \'My Favorite Maritian?\'"; 916 917 assert(indexOf(sMars, to!T("MY fAVe"), 10, 918 No.caseSensitive) == -1); 919 assert(indexOf(sMars, to!T("mY fAVOriTe"), 4, No.caseSensitive) == 7); 920 assert(indexOf(sPlts, to!T("mArS:"), 0, No.caseSensitive) == 0); 921 assert(indexOf(sPlts, to!T("rOcK"), 12, No.caseSensitive) == 17); 922 assert(indexOf(sPlts, to!T("Un."), 32, No.caseSensitive) == 41); 923 assert(indexOf(sPlts, to!T(sPlts), 0, No.caseSensitive) == 0); 924 925 assert(indexOf("\u0100", to!T("\u0100"), 0, No.caseSensitive) == 0); 926 927 // Thanks to Carlos Santander B. and zwang 928 assert(indexOf("sus mejores cortesanos. Se embarcaron en el puerto de Dubai y", 929 to!T("page-break-before"), 10, No.caseSensitive) == -1); 930 931 // In order for indexOf with and without index to be consistent 932 assert(indexOf(to!S(""), to!T("")) == indexOf(to!S(""), to!T(""), 0)); 933 }(); 934 935 foreach (cs; EnumMembers!CaseSensitive) 936 { 937 assert(indexOf("hello\U00010143\u0100\U00010143", to!S("\u0100"), 938 3, cs) == 9); 939 assert(indexOf("hello\U00010143\u0100\U00010143"w, to!S("\u0100"), 940 3, cs) == 7); 941 assert(indexOf("hello\U00010143\u0100\U00010143"d, to!S("\u0100"), 942 3, cs) == 6); 943 } 944 } 945} 946 947/++ 948 Params: 949 s = string to search 950 c = character to search for 951 startIdx = the index into s to start searching from 952 cs = $(D Yes.caseSensitive) or $(D No.caseSensitive) 953 954 Returns: 955 The index of the last occurrence of $(D c) in $(D s). If $(D c) is not 956 found, then $(D -1) is returned. The $(D startIdx) slices $(D s) in 957 the following way $(D s[0 .. startIdx]). $(D startIdx) represents a 958 codeunit index in $(D s). 959 960 Throws: 961 If the sequence ending at $(D startIdx) does not represent a well 962 formed codepoint, then a $(REF UTFException, std,utf) may be thrown. 963 964 $(D cs) indicates whether the comparisons are case sensitive. 965 +/ 966ptrdiff_t lastIndexOf(Char)(const(Char)[] s, in dchar c, 967 in CaseSensitive cs = Yes.caseSensitive) @safe pure 968if (isSomeChar!Char) 969{ 970 static import std.ascii, std.uni; 971 import std.utf : canSearchInCodeUnits; 972 if (cs == Yes.caseSensitive) 973 { 974 if (canSearchInCodeUnits!Char(c)) 975 { 976 foreach_reverse (i, it; s) 977 { 978 if (it == c) 979 { 980 return i; 981 } 982 } 983 } 984 else 985 { 986 foreach_reverse (i, dchar it; s) 987 { 988 if (it == c) 989 { 990 return i; 991 } 992 } 993 } 994 } 995 else 996 { 997 if (std.ascii.isASCII(c)) 998 { 999 immutable c1 = std.ascii.toLower(c); 1000 1001 foreach_reverse (i, it; s) 1002 { 1003 immutable c2 = std.ascii.toLower(it); 1004 if (c1 == c2) 1005 { 1006 return i; 1007 } 1008 } 1009 } 1010 else 1011 { 1012 immutable c1 = std.uni.toLower(c); 1013 1014 foreach_reverse (i, dchar it; s) 1015 { 1016 immutable c2 = std.uni.toLower(it); 1017 if (c1 == c2) 1018 { 1019 return i; 1020 } 1021 } 1022 } 1023 } 1024 1025 return -1; 1026} 1027 1028/// Ditto 1029ptrdiff_t lastIndexOf(Char)(const(Char)[] s, in dchar c, in size_t startIdx, 1030 in CaseSensitive cs = Yes.caseSensitive) @safe pure 1031if (isSomeChar!Char) 1032{ 1033 if (startIdx <= s.length) 1034 { 1035 return lastIndexOf(s[0u .. startIdx], c, cs); 1036 } 1037 1038 return -1; 1039} 1040 1041/// 1042@safe pure unittest 1043{ 1044 import std.typecons : No; 1045 1046 string s = "Hello World"; 1047 assert(lastIndexOf(s, 'l') == 9); 1048 assert(lastIndexOf(s, 'Z') == -1); 1049 assert(lastIndexOf(s, 'L', No.caseSensitive) == 9); 1050} 1051 1052/// 1053@safe pure unittest 1054{ 1055 import std.typecons : No; 1056 1057 string s = "Hello World"; 1058 assert(lastIndexOf(s, 'l', 4) == 3); 1059 assert(lastIndexOf(s, 'Z', 1337) == -1); 1060 assert(lastIndexOf(s, 'L', 7, No.caseSensitive) == 3); 1061} 1062 1063@safe pure unittest 1064{ 1065 import std.conv : to; 1066 import std.exception : assertCTFEable; 1067 import std.traits : EnumMembers; 1068 1069 assertCTFEable!( 1070 { 1071 foreach (S; AliasSeq!(string, wstring, dstring)) 1072 { 1073 assert(lastIndexOf(cast(S) null, 'a') == -1); 1074 assert(lastIndexOf(to!S("def"), 'a') == -1); 1075 assert(lastIndexOf(to!S("abba"), 'a') == 3); 1076 assert(lastIndexOf(to!S("def"), 'f') == 2); 1077 assert(lastIndexOf(to!S("��def"), '��') == 0); 1078 1079 assert(lastIndexOf(cast(S) null, 'a', No.caseSensitive) == -1); 1080 assert(lastIndexOf(to!S("def"), 'a', No.caseSensitive) == -1); 1081 assert(lastIndexOf(to!S("AbbA"), 'a', No.caseSensitive) == 3); 1082 assert(lastIndexOf(to!S("def"), 'F', No.caseSensitive) == 2); 1083 assert(lastIndexOf(to!S("��def"), '��', No.caseSensitive) == 0); 1084 assert(lastIndexOf(to!S("i\u0100def"), to!dchar("\u0100"), 1085 No.caseSensitive) == 1); 1086 1087 S sPlts = "Mars: the fourth Rock (Planet) from the Sun."; 1088 1089 assert(lastIndexOf(to!S("def"), 'f', No.caseSensitive) == 2); 1090 assert(lastIndexOf(sPlts, 'M', No.caseSensitive) == 34); 1091 assert(lastIndexOf(sPlts, 'S', No.caseSensitive) == 40); 1092 } 1093 1094 foreach (cs; EnumMembers!CaseSensitive) 1095 { 1096 assert(lastIndexOf("\U00010143\u0100\U00010143hello", '\u0100', cs) == 4); 1097 assert(lastIndexOf("\U00010143\u0100\U00010143hello"w, '\u0100', cs) == 2); 1098 assert(lastIndexOf("\U00010143\u0100\U00010143hello"d, '\u0100', cs) == 1); 1099 } 1100 }); 1101} 1102 1103@safe pure unittest 1104{ 1105 import std.conv : to; 1106 import std.traits : EnumMembers; 1107 1108 foreach (S; AliasSeq!(string, wstring, dstring)) 1109 { 1110 assert(lastIndexOf(cast(S) null, 'a') == -1); 1111 assert(lastIndexOf(to!S("def"), 'a') == -1); 1112 assert(lastIndexOf(to!S("abba"), 'a', 3) == 0); 1113 assert(lastIndexOf(to!S("deff"), 'f', 3) == 2); 1114 1115 assert(lastIndexOf(cast(S) null, 'a', No.caseSensitive) == -1); 1116 assert(lastIndexOf(to!S("def"), 'a', No.caseSensitive) == -1); 1117 assert(lastIndexOf(to!S("AbbAa"), 'a', to!ushort(4), No.caseSensitive) == 3, 1118 to!string(lastIndexOf(to!S("AbbAa"), 'a', 4, No.caseSensitive))); 1119 assert(lastIndexOf(to!S("def"), 'F', 3, No.caseSensitive) == 2); 1120 1121 S sPlts = "Mars: the fourth Rock (Planet) from the Sun."; 1122 1123 assert(lastIndexOf(to!S("def"), 'f', 4, No.caseSensitive) == -1); 1124 assert(lastIndexOf(sPlts, 'M', sPlts.length -2, No.caseSensitive) == 34); 1125 assert(lastIndexOf(sPlts, 'S', sPlts.length -2, No.caseSensitive) == 40); 1126 } 1127 1128 foreach (cs; EnumMembers!CaseSensitive) 1129 { 1130 assert(lastIndexOf("\U00010143\u0100\U00010143hello", '\u0100', cs) == 4); 1131 assert(lastIndexOf("\U00010143\u0100\U00010143hello"w, '\u0100', cs) == 2); 1132 assert(lastIndexOf("\U00010143\u0100\U00010143hello"d, '\u0100', cs) == 1); 1133 } 1134} 1135 1136/++ 1137 Params: 1138 s = string to search 1139 sub = substring to search for 1140 startIdx = the index into s to start searching from 1141 cs = $(D Yes.caseSensitive) or $(D No.caseSensitive) 1142 1143 Returns: 1144 the index of the last occurrence of $(D sub) in $(D s). If $(D sub) is 1145 not found, then $(D -1) is returned. The $(D startIdx) slices $(D s) 1146 in the following way $(D s[0 .. startIdx]). $(D startIdx) represents a 1147 codeunit index in $(D s). 1148 1149 Throws: 1150 If the sequence ending at $(D startIdx) does not represent a well 1151 formed codepoint, then a $(REF UTFException, std,utf) may be thrown. 1152 1153 $(D cs) indicates whether the comparisons are case sensitive. 1154 +/ 1155ptrdiff_t lastIndexOf(Char1, Char2)(const(Char1)[] s, const(Char2)[] sub, 1156 in CaseSensitive cs = Yes.caseSensitive) @safe pure 1157if (isSomeChar!Char1 && isSomeChar!Char2) 1158{ 1159 import std.algorithm.searching : endsWith; 1160 import std.conv : to; 1161 import std.range.primitives : walkLength; 1162 static import std.uni; 1163 import std.utf : strideBack; 1164 if (sub.empty) 1165 return -1; 1166 1167 if (walkLength(sub) == 1) 1168 return lastIndexOf(s, sub.front, cs); 1169 1170 if (cs == Yes.caseSensitive) 1171 { 1172 static if (is(Unqual!Char1 == Unqual!Char2)) 1173 { 1174 import core.stdc.string : memcmp; 1175 1176 immutable c = sub[0]; 1177 1178 for (ptrdiff_t i = s.length - sub.length; i >= 0; --i) 1179 { 1180 if (s[i] == c) 1181 { 1182 if (__ctfe) 1183 { 1184 foreach (j; 1 .. sub.length) 1185 { 1186 if (s[i + j] != sub[j]) 1187 continue; 1188 } 1189 return i; 1190 } 1191 else 1192 { 1193 auto trustedMemcmp(in void* s1, in void* s2, size_t n) @trusted 1194 { 1195 return memcmp(s1, s2, n); 1196 } 1197 if (trustedMemcmp(&s[i + 1], &sub[1], 1198 (sub.length - 1) * Char1.sizeof) == 0) 1199 return i; 1200 } 1201 } 1202 } 1203 } 1204 else 1205 { 1206 for (size_t i = s.length; !s.empty;) 1207 { 1208 if (s.endsWith(sub)) 1209 return cast(ptrdiff_t) i - to!(const(Char1)[])(sub).length; 1210 1211 i -= strideBack(s, i); 1212 s = s[0 .. i]; 1213 } 1214 } 1215 } 1216 else 1217 { 1218 for (size_t i = s.length; !s.empty;) 1219 { 1220 if (endsWith!((a, b) => std.uni.toLower(a) == std.uni.toLower(b)) 1221 (s, sub)) 1222 { 1223 return cast(ptrdiff_t) i - to!(const(Char1)[])(sub).length; 1224 } 1225 1226 i -= strideBack(s, i); 1227 s = s[0 .. i]; 1228 } 1229 } 1230 1231 return -1; 1232} 1233 1234/// Ditto 1235ptrdiff_t lastIndexOf(Char1, Char2)(const(Char1)[] s, const(Char2)[] sub, 1236 in size_t startIdx, in CaseSensitive cs = Yes.caseSensitive) @safe pure 1237if (isSomeChar!Char1 && isSomeChar!Char2) 1238{ 1239 if (startIdx <= s.length) 1240 { 1241 return lastIndexOf(s[0u .. startIdx], sub, cs); 1242 } 1243 1244 return -1; 1245} 1246 1247/// 1248@safe pure unittest 1249{ 1250 import std.typecons : No; 1251 1252 string s = "Hello World"; 1253 assert(lastIndexOf(s, "ll") == 2); 1254 assert(lastIndexOf(s, "Zo") == -1); 1255 assert(lastIndexOf(s, "lL", No.caseSensitive) == 2); 1256} 1257 1258/// 1259@safe pure unittest 1260{ 1261 import std.typecons : No; 1262 1263 string s = "Hello World"; 1264 assert(lastIndexOf(s, "ll", 4) == 2); 1265 assert(lastIndexOf(s, "Zo", 128) == -1); 1266 assert(lastIndexOf(s, "lL", 3, No.caseSensitive) == -1); 1267} 1268 1269@safe pure unittest 1270{ 1271 import std.conv : to; 1272 1273 foreach (S; AliasSeq!(string, wstring, dstring)) 1274 { 1275 auto r = to!S("").lastIndexOf("hello"); 1276 assert(r == -1, to!string(r)); 1277 1278 r = to!S("hello").lastIndexOf(""); 1279 assert(r == -1, to!string(r)); 1280 1281 r = to!S("").lastIndexOf(""); 1282 assert(r == -1, to!string(r)); 1283 } 1284} 1285 1286@safe pure unittest 1287{ 1288 import std.conv : to; 1289 import std.exception : assertCTFEable; 1290 import std.traits : EnumMembers; 1291 1292 assertCTFEable!( 1293 { 1294 foreach (S; AliasSeq!(string, wstring, dstring)) 1295 { 1296 foreach (T; AliasSeq!(string, wstring, dstring)) 1297 (){ // avoid slow optimizations for large functions @@@BUG@@@ 2396 1298 enum typeStr = S.stringof ~ " " ~ T.stringof; 1299 1300 assert(lastIndexOf(cast(S) null, to!T("a")) == -1, typeStr); 1301 assert(lastIndexOf(to!S("abcdefcdef"), to!T("c")) == 6, typeStr); 1302 assert(lastIndexOf(to!S("abcdefcdef"), to!T("cd")) == 6, typeStr); 1303 assert(lastIndexOf(to!S("abcdefcdef"), to!T("ef")) == 8, typeStr); 1304 assert(lastIndexOf(to!S("abcdefCdef"), to!T("c")) == 2, typeStr); 1305 assert(lastIndexOf(to!S("abcdefCdef"), to!T("cd")) == 2, typeStr); 1306 assert(lastIndexOf(to!S("abcdefcdef"), to!T("x")) == -1, typeStr); 1307 assert(lastIndexOf(to!S("abcdefcdef"), to!T("xy")) == -1, typeStr); 1308 assert(lastIndexOf(to!S("abcdefcdef"), to!T("")) == -1, typeStr); 1309 assert(lastIndexOf(to!S("��abcdefcdef"), to!T("��")) == 0, typeStr); 1310 1311 assert(lastIndexOf(cast(S) null, to!T("a"), No.caseSensitive) == -1, typeStr); 1312 assert(lastIndexOf(to!S("abcdefCdef"), to!T("c"), No.caseSensitive) == 6, typeStr); 1313 assert(lastIndexOf(to!S("abcdefCdef"), to!T("cD"), No.caseSensitive) == 6, typeStr); 1314 assert(lastIndexOf(to!S("abcdefcdef"), to!T("x"), No.caseSensitive) == -1, typeStr); 1315 assert(lastIndexOf(to!S("abcdefcdef"), to!T("xy"), No.caseSensitive) == -1, typeStr); 1316 assert(lastIndexOf(to!S("abcdefcdef"), to!T(""), No.caseSensitive) == -1, typeStr); 1317 assert(lastIndexOf(to!S("��abcdefcdef"), to!T("��"), No.caseSensitive) == 0, typeStr); 1318 1319 assert(lastIndexOf(to!S("abcdefcdef"), to!T("c"), No.caseSensitive) == 6, typeStr); 1320 assert(lastIndexOf(to!S("abcdefcdef"), to!T("cd"), No.caseSensitive) == 6, typeStr); 1321 assert(lastIndexOf(to!S("abcdefcdef"), to!T("def"), No.caseSensitive) == 7, typeStr); 1322 1323 assert(lastIndexOf(to!S("��dfeffgfff"), to!T("��"), Yes.caseSensitive) == 0); 1324 1325 S sPlts = "Mars: the fourth Rock (Planet) from the Sun."; 1326 S sMars = "Who\'s \'My Favorite Maritian?\'"; 1327 1328 assert(lastIndexOf(sMars, to!T("RiTE maR"), No.caseSensitive) == 14, typeStr); 1329 assert(lastIndexOf(sPlts, to!T("FOuRTh"), No.caseSensitive) == 10, typeStr); 1330 assert(lastIndexOf(sMars, to!T("whO\'s \'MY"), No.caseSensitive) == 0, typeStr); 1331 assert(lastIndexOf(sMars, to!T(sMars), No.caseSensitive) == 0, typeStr); 1332 }(); 1333 1334 foreach (cs; EnumMembers!CaseSensitive) 1335 { 1336 enum csString = to!string(cs); 1337 1338 assert(lastIndexOf("\U00010143\u0100\U00010143hello", to!S("\u0100"), cs) == 4, csString); 1339 assert(lastIndexOf("\U00010143\u0100\U00010143hello"w, to!S("\u0100"), cs) == 2, csString); 1340 assert(lastIndexOf("\U00010143\u0100\U00010143hello"d, to!S("\u0100"), cs) == 1, csString); 1341 } 1342 } 1343 }); 1344} 1345 1346@safe pure unittest // issue13529 1347{ 1348 import std.conv : to; 1349 foreach (S; AliasSeq!(string, wstring, dstring)) 1350 { 1351 foreach (T; AliasSeq!(string, wstring, dstring)) 1352 { 1353 enum typeStr = S.stringof ~ " " ~ T.stringof; 1354 auto idx = lastIndexOf(to!T("H��ll�� W��rld�� ��"),to!S("�� ��")); 1355 assert(idx != -1, to!string(idx) ~ " " ~ typeStr); 1356 1357 idx = lastIndexOf(to!T("H��ll�� W��rld�� ��"),to!S("�� ��d")); 1358 assert(idx == -1, to!string(idx) ~ " " ~ typeStr); 1359 } 1360 } 1361} 1362 1363@safe pure unittest 1364{ 1365 import std.conv : to; 1366 import std.traits : EnumMembers; 1367 1368 foreach (S; AliasSeq!(string, wstring, dstring)) 1369 { 1370 foreach (T; AliasSeq!(string, wstring, dstring)) 1371 (){ // avoid slow optimizations for large functions @@@BUG@@@ 2396 1372 enum typeStr = S.stringof ~ " " ~ T.stringof; 1373 1374 assert(lastIndexOf(cast(S) null, to!T("a")) == -1, typeStr); 1375 assert(lastIndexOf(to!S("abcdefcdef"), to!T("c"), 5) == 2, typeStr); 1376 assert(lastIndexOf(to!S("abcdefcdef"), to!T("cd"), 3) == -1, typeStr); 1377 assert(lastIndexOf(to!S("abcdefcdef"), to!T("ef"), 6) == 4, typeStr ~ 1378 format(" %u", lastIndexOf(to!S("abcdefcdef"), to!T("ef"), 6))); 1379 assert(lastIndexOf(to!S("abcdefCdef"), to!T("c"), 5) == 2, typeStr); 1380 assert(lastIndexOf(to!S("abcdefCdef"), to!T("cd"), 3) == -1, typeStr); 1381 assert(lastIndexOf(to!S("abcdefcdefx"), to!T("x"), 1) == -1, typeStr); 1382 assert(lastIndexOf(to!S("abcdefcdefxy"), to!T("xy"), 6) == -1, typeStr); 1383 assert(lastIndexOf(to!S("abcdefcdef"), to!T(""), 8) == -1, typeStr); 1384 assert(lastIndexOf(to!S("��af��"), to!T("��"), 3) == 0, typeStr ~ 1385 to!string(lastIndexOf(to!S("��af��"), to!T("��"), 3))); //BUG 10472 1386 1387 assert(lastIndexOf(cast(S) null, to!T("a"), 1, No.caseSensitive) == -1, typeStr); 1388 assert(lastIndexOf(to!S("abcdefCdef"), to!T("c"), 5, No.caseSensitive) == 2, typeStr); 1389 assert(lastIndexOf(to!S("abcdefCdef"), to!T("cD"), 4, No.caseSensitive) == 2, typeStr ~ 1390 " " ~ to!string(lastIndexOf(to!S("abcdefCdef"), to!T("cD"), 3, No.caseSensitive))); 1391 assert(lastIndexOf(to!S("abcdefcdef"), to!T("x"),3 , No.caseSensitive) == -1, typeStr); 1392 assert(lastIndexOf(to!S("abcdefcdefXY"), to!T("xy"), 4, No.caseSensitive) == -1, typeStr); 1393 assert(lastIndexOf(to!S("abcdefcdef"), to!T(""), 7, No.caseSensitive) == -1, typeStr); 1394 1395 assert(lastIndexOf(to!S("abcdefcdef"), to!T("c"), 4, No.caseSensitive) == 2, typeStr); 1396 assert(lastIndexOf(to!S("abcdefcdef"), to!T("cd"), 4, No.caseSensitive) == 2, typeStr); 1397 assert(lastIndexOf(to!S("abcdefcdef"), to!T("def"), 6, No.caseSensitive) == 3, typeStr); 1398 assert(lastIndexOf(to!S(""), to!T(""), 0) == lastIndexOf(to!S(""), to!T("")), typeStr); 1399 }(); 1400 1401 foreach (cs; EnumMembers!CaseSensitive) 1402 { 1403 enum csString = to!string(cs); 1404 1405 assert(lastIndexOf("\U00010143\u0100\U00010143hello", to!S("\u0100"), 6, cs) == 4, csString); 1406 assert(lastIndexOf("\U00010143\u0100\U00010143hello"w, to!S("\u0100"), 6, cs) == 2, csString); 1407 assert(lastIndexOf("\U00010143\u0100\U00010143hello"d, to!S("\u0100"), 3, cs) == 1, csString); 1408 } 1409 } 1410} 1411 1412private ptrdiff_t indexOfAnyNeitherImpl(bool forward, bool any, Char, Char2)( 1413 const(Char)[] haystack, const(Char2)[] needles, 1414 in CaseSensitive cs = Yes.caseSensitive) @safe pure 1415if (isSomeChar!Char && isSomeChar!Char2) 1416{ 1417 import std.algorithm.searching : canFind, findAmong; 1418 if (cs == Yes.caseSensitive) 1419 { 1420 static if (forward) 1421 { 1422 static if (any) 1423 { 1424 size_t n = haystack.findAmong(needles).length; 1425 return n ? haystack.length - n : -1; 1426 } 1427 else 1428 { 1429 foreach (idx, dchar hay; haystack) 1430 { 1431 if (!canFind(needles, hay)) 1432 { 1433 return idx; 1434 } 1435 } 1436 } 1437 } 1438 else 1439 { 1440 static if (any) 1441 { 1442 import std.range : retro; 1443 import std.utf : strideBack; 1444 size_t n = haystack.retro.findAmong(needles).source.length; 1445 if (n) 1446 { 1447 return n - haystack.strideBack(n); 1448 } 1449 } 1450 else 1451 { 1452 foreach_reverse (idx, dchar hay; haystack) 1453 { 1454 if (!canFind(needles, hay)) 1455 { 1456 return idx; 1457 } 1458 } 1459 } 1460 } 1461 } 1462 else 1463 { 1464 import std.range.primitives : walkLength; 1465 if (needles.length <= 16 && needles.walkLength(17)) 1466 { 1467 size_t si = 0; 1468 dchar[16] scratch = void; 1469 foreach ( dchar c; needles) 1470 { 1471 scratch[si++] = toLower(c); 1472 } 1473 1474 static if (forward) 1475 { 1476 foreach (i, dchar c; haystack) 1477 { 1478 if (canFind(scratch[0 .. si], toLower(c)) == any) 1479 { 1480 return i; 1481 } 1482 } 1483 } 1484 else 1485 { 1486 foreach_reverse (i, dchar c; haystack) 1487 { 1488 if (canFind(scratch[0 .. si], toLower(c)) == any) 1489 { 1490 return i; 1491 } 1492 } 1493 } 1494 } 1495 else 1496 { 1497 static bool f(dchar a, dchar b) 1498 { 1499 return toLower(a) == b; 1500 } 1501 1502 static if (forward) 1503 { 1504 foreach (i, dchar c; haystack) 1505 { 1506 if (canFind!f(needles, toLower(c)) == any) 1507 { 1508 return i; 1509 } 1510 } 1511 } 1512 else 1513 { 1514 foreach_reverse (i, dchar c; haystack) 1515 { 1516 if (canFind!f(needles, toLower(c)) == any) 1517 { 1518 return i; 1519 } 1520 } 1521 } 1522 } 1523 } 1524 1525 return -1; 1526} 1527 1528/** 1529 Returns the index of the first occurrence of any of the elements in $(D 1530 needles) in $(D haystack). If no element of $(D needles) is found, 1531 then $(D -1) is returned. The $(D startIdx) slices $(D haystack) in the 1532 following way $(D haystack[startIdx .. $]). $(D startIdx) represents a 1533 codeunit index in $(D haystack). If the sequence ending at $(D startIdx) 1534 does not represent a well formed codepoint, then a $(REF UTFException, std,utf) 1535 may be thrown. 1536 1537 Params: 1538 haystack = String to search for needles in. 1539 needles = Strings to search for in haystack. 1540 startIdx = slices haystack like this $(D haystack[startIdx .. $]). If 1541 the startIdx is greater equal the length of haystack the functions 1542 returns $(D -1). 1543 cs = Indicates whether the comparisons are case sensitive. 1544*/ 1545ptrdiff_t indexOfAny(Char,Char2)(const(Char)[] haystack, const(Char2)[] needles, 1546 in CaseSensitive cs = Yes.caseSensitive) @safe pure 1547if (isSomeChar!Char && isSomeChar!Char2) 1548{ 1549 return indexOfAnyNeitherImpl!(true, true)(haystack, needles, cs); 1550} 1551 1552/// Ditto 1553ptrdiff_t indexOfAny(Char,Char2)(const(Char)[] haystack, const(Char2)[] needles, 1554 in size_t startIdx, in CaseSensitive cs = Yes.caseSensitive) @safe pure 1555if (isSomeChar!Char && isSomeChar!Char2) 1556{ 1557 if (startIdx < haystack.length) 1558 { 1559 ptrdiff_t foundIdx = indexOfAny(haystack[startIdx .. $], needles, cs); 1560 if (foundIdx != -1) 1561 { 1562 return foundIdx + cast(ptrdiff_t) startIdx; 1563 } 1564 } 1565 1566 return -1; 1567} 1568 1569/// 1570@safe pure unittest 1571{ 1572 import std.conv : to; 1573 1574 ptrdiff_t i = "helloWorld".indexOfAny("Wr"); 1575 assert(i == 5); 1576 i = "����llo world".indexOfAny("lo "); 1577 assert(i == 4, to!string(i)); 1578} 1579 1580/// 1581@safe pure unittest 1582{ 1583 import std.conv : to; 1584 1585 ptrdiff_t i = "helloWorld".indexOfAny("Wr", 4); 1586 assert(i == 5); 1587 1588 i = "Foo ����llo world".indexOfAny("lh", 3); 1589 assert(i == 8, to!string(i)); 1590} 1591 1592@safe pure unittest 1593{ 1594 import std.conv : to; 1595 1596 foreach (S; AliasSeq!(string, wstring, dstring)) 1597 { 1598 auto r = to!S("").indexOfAny("hello"); 1599 assert(r == -1, to!string(r)); 1600 1601 r = to!S("hello").indexOfAny(""); 1602 assert(r == -1, to!string(r)); 1603 1604 r = to!S("").indexOfAny(""); 1605 assert(r == -1, to!string(r)); 1606 } 1607} 1608 1609@safe pure unittest 1610{ 1611 import std.conv : to; 1612 import std.exception : assertCTFEable; 1613 1614 assertCTFEable!( 1615 { 1616 foreach (S; AliasSeq!(string, wstring, dstring)) 1617 { 1618 foreach (T; AliasSeq!(string, wstring, dstring)) 1619 (){ // avoid slow optimizations for large functions @@@BUG@@@ 2396 1620 assert(indexOfAny(cast(S) null, to!T("a")) == -1); 1621 assert(indexOfAny(to!S("def"), to!T("rsa")) == -1); 1622 assert(indexOfAny(to!S("abba"), to!T("a")) == 0); 1623 assert(indexOfAny(to!S("def"), to!T("f")) == 2); 1624 assert(indexOfAny(to!S("dfefffg"), to!T("fgh")) == 1); 1625 assert(indexOfAny(to!S("dfeffgfff"), to!T("feg")) == 1); 1626 1627 assert(indexOfAny(to!S("zfeffgfff"), to!T("ACDC"), 1628 No.caseSensitive) == -1); 1629 assert(indexOfAny(to!S("def"), to!T("MI6"), 1630 No.caseSensitive) == -1); 1631 assert(indexOfAny(to!S("abba"), to!T("DEA"), 1632 No.caseSensitive) == 0); 1633 assert(indexOfAny(to!S("def"), to!T("FBI"), No.caseSensitive) == 2); 1634 assert(indexOfAny(to!S("dfefffg"), to!T("NSA"), No.caseSensitive) 1635 == -1); 1636 assert(indexOfAny(to!S("dfeffgfff"), to!T("BND"), 1637 No.caseSensitive) == 0); 1638 assert(indexOfAny(to!S("dfeffgfff"), to!T("BNDabCHIJKQEP����SYX��??��"), 1639 No.caseSensitive) == 0); 1640 1641 assert(indexOfAny("\u0100", to!T("\u0100"), No.caseSensitive) == 0); 1642 }(); 1643 } 1644 } 1645 ); 1646} 1647 1648@safe pure unittest 1649{ 1650 import std.conv : to; 1651 import std.traits : EnumMembers; 1652 1653 foreach (S; AliasSeq!(string, wstring, dstring)) 1654 { 1655 foreach (T; AliasSeq!(string, wstring, dstring)) 1656 (){ // avoid slow optimizations for large functions @@@BUG@@@ 2396 1657 assert(indexOfAny(cast(S) null, to!T("a"), 1337) == -1); 1658 assert(indexOfAny(to!S("def"), to!T("AaF"), 0) == -1); 1659 assert(indexOfAny(to!S("abba"), to!T("NSa"), 2) == 3); 1660 assert(indexOfAny(to!S("def"), to!T("fbi"), 1) == 2); 1661 assert(indexOfAny(to!S("dfefffg"), to!T("foo"), 2) == 3); 1662 assert(indexOfAny(to!S("dfeffgfff"), to!T("fsb"), 5) == 6); 1663 1664 assert(indexOfAny(to!S("dfeffgfff"), to!T("NDS"), 1, 1665 No.caseSensitive) == -1); 1666 assert(indexOfAny(to!S("def"), to!T("DRS"), 2, 1667 No.caseSensitive) == -1); 1668 assert(indexOfAny(to!S("abba"), to!T("SI"), 3, 1669 No.caseSensitive) == -1); 1670 assert(indexOfAny(to!S("deO"), to!T("ASIO"), 1, 1671 No.caseSensitive) == 2); 1672 assert(indexOfAny(to!S("dfefffg"), to!T("fbh"), 2, 1673 No.caseSensitive) == 3); 1674 assert(indexOfAny(to!S("dfeffgfff"), to!T("fEe"), 4, 1675 No.caseSensitive) == 4); 1676 assert(indexOfAny(to!S("dfeffgfff����"), to!T("f����"), 9, 1677 No.caseSensitive) == 9); 1678 1679 assert(indexOfAny("\u0100", to!T("\u0100"), 0, 1680 No.caseSensitive) == 0); 1681 }(); 1682 1683 foreach (cs; EnumMembers!CaseSensitive) 1684 { 1685 assert(indexOfAny("hello\U00010143\u0100\U00010143", 1686 to!S("e\u0100"), 3, cs) == 9); 1687 assert(indexOfAny("hello\U00010143\u0100\U00010143"w, 1688 to!S("h\u0100"), 3, cs) == 7); 1689 assert(indexOfAny("hello\U00010143\u0100\U00010143"d, 1690 to!S("l\u0100"), 5, cs) == 6); 1691 } 1692 } 1693} 1694 1695/** 1696 Returns the index of the last occurrence of any of the elements in $(D 1697 needles) in $(D haystack). If no element of $(D needles) is found, 1698 then $(D -1) is returned. The $(D stopIdx) slices $(D haystack) in the 1699 following way $(D s[0 .. stopIdx]). $(D stopIdx) represents a codeunit 1700 index in $(D haystack). If the sequence ending at $(D startIdx) does not 1701 represent a well formed codepoint, then a $(REF UTFException, std,utf) may be 1702 thrown. 1703 1704 Params: 1705 haystack = String to search for needles in. 1706 needles = Strings to search for in haystack. 1707 stopIdx = slices haystack like this $(D haystack[0 .. stopIdx]). If 1708 the stopIdx is greater equal the length of haystack the functions 1709 returns $(D -1). 1710 cs = Indicates whether the comparisons are case sensitive. 1711*/ 1712ptrdiff_t lastIndexOfAny(Char,Char2)(const(Char)[] haystack, 1713 const(Char2)[] needles, in CaseSensitive cs = Yes.caseSensitive) 1714 @safe pure 1715if (isSomeChar!Char && isSomeChar!Char2) 1716{ 1717 return indexOfAnyNeitherImpl!(false, true)(haystack, needles, cs); 1718} 1719 1720/// Ditto 1721ptrdiff_t lastIndexOfAny(Char,Char2)(const(Char)[] haystack, 1722 const(Char2)[] needles, in size_t stopIdx, 1723 in CaseSensitive cs = Yes.caseSensitive) @safe pure 1724if (isSomeChar!Char && isSomeChar!Char2) 1725{ 1726 if (stopIdx <= haystack.length) 1727 { 1728 return lastIndexOfAny(haystack[0u .. stopIdx], needles, cs); 1729 } 1730 1731 return -1; 1732} 1733 1734/// 1735@safe pure unittest 1736{ 1737 ptrdiff_t i = "helloWorld".lastIndexOfAny("Wlo"); 1738 assert(i == 8); 1739 1740 i = "Foo ������llo world".lastIndexOfAny("��F"); 1741 assert(i == 8); 1742} 1743 1744/// 1745@safe pure unittest 1746{ 1747 import std.conv : to; 1748 1749 ptrdiff_t i = "helloWorld".lastIndexOfAny("Wlo", 4); 1750 assert(i == 3); 1751 1752 i = "Foo ������llo world".lastIndexOfAny("��F", 3); 1753 assert(i == 0); 1754} 1755 1756@safe pure unittest 1757{ 1758 import std.conv : to; 1759 1760 foreach (S; AliasSeq!(string, wstring, dstring)) 1761 { 1762 auto r = to!S("").lastIndexOfAny("hello"); 1763 assert(r == -1, to!string(r)); 1764 1765 r = to!S("hello").lastIndexOfAny(""); 1766 assert(r == -1, to!string(r)); 1767 1768 r = to!S("").lastIndexOfAny(""); 1769 assert(r == -1, to!string(r)); 1770 } 1771} 1772 1773@safe pure unittest 1774{ 1775 import std.conv : to; 1776 import std.exception : assertCTFEable; 1777 1778 assertCTFEable!( 1779 { 1780 foreach (S; AliasSeq!(string, wstring, dstring)) 1781 { 1782 foreach (T; AliasSeq!(string, wstring, dstring)) 1783 (){ // avoid slow optimizations for large functions @@@BUG@@@ 2396 1784 assert(lastIndexOfAny(cast(S) null, to!T("a")) == -1); 1785 assert(lastIndexOfAny(to!S("def"), to!T("rsa")) == -1); 1786 assert(lastIndexOfAny(to!S("abba"), to!T("a")) == 3); 1787 assert(lastIndexOfAny(to!S("def"), to!T("f")) == 2); 1788 assert(lastIndexOfAny(to!S("dfefffg"), to!T("fgh")) == 6); 1789 1790 ptrdiff_t oeIdx = 9; 1791 if (is(S == wstring) || is(S == dstring)) 1792 { 1793 oeIdx = 8; 1794 } 1795 1796 auto foundOeIdx = lastIndexOfAny(to!S("dfeffgf��f"), to!T("feg")); 1797 assert(foundOeIdx == oeIdx, to!string(foundOeIdx)); 1798 1799 assert(lastIndexOfAny(to!S("zfeffgfff"), to!T("ACDC"), 1800 No.caseSensitive) == -1); 1801 assert(lastIndexOfAny(to!S("def"), to!T("MI6"), 1802 No.caseSensitive) == -1); 1803 assert(lastIndexOfAny(to!S("abba"), to!T("DEA"), 1804 No.caseSensitive) == 3); 1805 assert(lastIndexOfAny(to!S("def"), to!T("FBI"), 1806 No.caseSensitive) == 2); 1807 assert(lastIndexOfAny(to!S("dfefffg"), to!T("NSA"), 1808 No.caseSensitive) == -1); 1809 1810 oeIdx = 2; 1811 if (is(S == wstring) || is(S == dstring)) 1812 { 1813 oeIdx = 1; 1814 } 1815 assert(lastIndexOfAny(to!S("��dfeffgfff"), to!T("BND"), 1816 No.caseSensitive) == oeIdx); 1817 1818 assert(lastIndexOfAny("\u0100", to!T("\u0100"), 1819 No.caseSensitive) == 0); 1820 }(); 1821 } 1822 } 1823 ); 1824} 1825 1826@safe pure unittest 1827{ 1828 import std.conv : to; 1829 import std.exception : assertCTFEable; 1830 1831 assertCTFEable!( 1832 { 1833 foreach (S; AliasSeq!(string, wstring, dstring)) 1834 { 1835 foreach (T; AliasSeq!(string, wstring, dstring)) 1836 (){ // avoid slow optimizations for large functions @@@BUG@@@ 2396 1837 enum typeStr = S.stringof ~ " " ~ T.stringof; 1838 1839 assert(lastIndexOfAny(cast(S) null, to!T("a"), 1337) == -1, 1840 typeStr); 1841 assert(lastIndexOfAny(to!S("abcdefcdef"), to!T("c"), 7) == 6, 1842 typeStr); 1843 assert(lastIndexOfAny(to!S("abcdefcdef"), to!T("cd"), 5) == 3, 1844 typeStr); 1845 assert(lastIndexOfAny(to!S("abcdefcdef"), to!T("ef"), 6) == 5, 1846 typeStr); 1847 assert(lastIndexOfAny(to!S("abcdefCdef"), to!T("c"), 8) == 2, 1848 typeStr); 1849 assert(lastIndexOfAny(to!S("abcdefcdef"), to!T("x"), 7) == -1, 1850 typeStr); 1851 assert(lastIndexOfAny(to!S("abcdefcdef"), to!T("xy"), 4) == -1, 1852 typeStr); 1853 assert(lastIndexOfAny(to!S("��abcdefcdef"), to!T("��"), 2) == 0, 1854 typeStr); 1855 1856 assert(lastIndexOfAny(cast(S) null, to!T("a"), 1337, 1857 No.caseSensitive) == -1, typeStr); 1858 assert(lastIndexOfAny(to!S("abcdefcdef"), to!T("C"), 7, 1859 No.caseSensitive) == 6, typeStr); 1860 assert(lastIndexOfAny(to!S("ABCDEFCDEF"), to!T("cd"), 5, 1861 No.caseSensitive) == 3, typeStr); 1862 assert(lastIndexOfAny(to!S("abcdefcdef"), to!T("EF"), 6, 1863 No.caseSensitive) == 5, typeStr); 1864 assert(lastIndexOfAny(to!S("ABCDEFcDEF"), to!T("C"), 8, 1865 No.caseSensitive) == 6, typeStr); 1866 assert(lastIndexOfAny(to!S("ABCDEFCDEF"), to!T("x"), 7, 1867 No.caseSensitive) == -1, typeStr); 1868 assert(lastIndexOfAny(to!S("abCdefcdef"), to!T("XY"), 4, 1869 No.caseSensitive) == -1, typeStr); 1870 assert(lastIndexOfAny(to!S("��ABCDEFCDEF"), to!T("��"), 2, 1871 No.caseSensitive) == 0, typeStr); 1872 }(); 1873 } 1874 } 1875 ); 1876} 1877 1878/** 1879 Returns the index of the first occurrence of any character not an elements 1880 in $(D needles) in $(D haystack). If all element of $(D haystack) are 1881 element of $(D needles) $(D -1) is returned. 1882 1883 Params: 1884 haystack = String to search for needles in. 1885 needles = Strings to search for in haystack. 1886 startIdx = slices haystack like this $(D haystack[startIdx .. $]). If 1887 the startIdx is greater equal the length of haystack the functions 1888 returns $(D -1). 1889 cs = Indicates whether the comparisons are case sensitive. 1890*/ 1891ptrdiff_t indexOfNeither(Char,Char2)(const(Char)[] haystack, 1892 const(Char2)[] needles, in CaseSensitive cs = Yes.caseSensitive) 1893 @safe pure 1894if (isSomeChar!Char && isSomeChar!Char2) 1895{ 1896 return indexOfAnyNeitherImpl!(true, false)(haystack, needles, cs); 1897} 1898 1899/// Ditto 1900ptrdiff_t indexOfNeither(Char,Char2)(const(Char)[] haystack, 1901 const(Char2)[] needles, in size_t startIdx, 1902 in CaseSensitive cs = Yes.caseSensitive) 1903 @safe pure 1904if (isSomeChar!Char && isSomeChar!Char2) 1905{ 1906 if (startIdx < haystack.length) 1907 { 1908 ptrdiff_t foundIdx = indexOfAnyNeitherImpl!(true, false)( 1909 haystack[startIdx .. $], needles, cs); 1910 if (foundIdx != -1) 1911 { 1912 return foundIdx + cast(ptrdiff_t) startIdx; 1913 } 1914 } 1915 return -1; 1916} 1917 1918/// 1919@safe pure unittest 1920{ 1921 assert(indexOfNeither("abba", "a", 2) == 2); 1922 assert(indexOfNeither("def", "de", 1) == 2); 1923 assert(indexOfNeither("dfefffg", "dfe", 4) == 6); 1924} 1925 1926/// 1927@safe pure unittest 1928{ 1929 assert(indexOfNeither("def", "a") == 0); 1930 assert(indexOfNeither("def", "de") == 2); 1931 assert(indexOfNeither("dfefffg", "dfe") == 6); 1932} 1933 1934@safe pure unittest 1935{ 1936 import std.conv : to; 1937 1938 foreach (S; AliasSeq!(string, wstring, dstring)) 1939 { 1940 auto r = to!S("").indexOfNeither("hello"); 1941 assert(r == -1, to!string(r)); 1942 1943 r = to!S("hello").indexOfNeither(""); 1944 assert(r == 0, to!string(r)); 1945 1946 r = to!S("").indexOfNeither(""); 1947 assert(r == -1, to!string(r)); 1948 } 1949} 1950 1951@safe pure unittest 1952{ 1953 import std.conv : to; 1954 import std.exception : assertCTFEable; 1955 1956 assertCTFEable!( 1957 { 1958 foreach (S; AliasSeq!(string, wstring, dstring)) 1959 { 1960 foreach (T; AliasSeq!(string, wstring, dstring)) 1961 (){ // avoid slow optimizations for large functions @@@BUG@@@ 2396 1962 assert(indexOfNeither(cast(S) null, to!T("a")) == -1); 1963 assert(indexOfNeither("abba", "a") == 1); 1964 1965 assert(indexOfNeither(to!S("dfeffgfff"), to!T("a"), 1966 No.caseSensitive) == 0); 1967 assert(indexOfNeither(to!S("def"), to!T("D"), 1968 No.caseSensitive) == 1); 1969 assert(indexOfNeither(to!S("ABca"), to!T("a"), 1970 No.caseSensitive) == 1); 1971 assert(indexOfNeither(to!S("def"), to!T("f"), 1972 No.caseSensitive) == 0); 1973 assert(indexOfNeither(to!S("DfEfffg"), to!T("dFe"), 1974 No.caseSensitive) == 6); 1975 if (is(S == string)) 1976 { 1977 assert(indexOfNeither(to!S("��DfEfffg"), to!T("��dFe"), 1978 No.caseSensitive) == 8, 1979 to!string(indexOfNeither(to!S("��DfEfffg"), to!T("��dFe"), 1980 No.caseSensitive))); 1981 } 1982 else 1983 { 1984 assert(indexOfNeither(to!S("��DfEfffg"), to!T("��dFe"), 1985 No.caseSensitive) == 7, 1986 to!string(indexOfNeither(to!S("��DfEfffg"), to!T("��dFe"), 1987 No.caseSensitive))); 1988 } 1989 }(); 1990 } 1991 } 1992 ); 1993} 1994 1995@safe pure unittest 1996{ 1997 import std.conv : to; 1998 import std.exception : assertCTFEable; 1999 2000 assertCTFEable!( 2001 { 2002 foreach (S; AliasSeq!(string, wstring, dstring)) 2003 { 2004 foreach (T; AliasSeq!(string, wstring, dstring)) 2005 (){ // avoid slow optimizations for large functions @@@BUG@@@ 2396 2006 assert(indexOfNeither(cast(S) null, to!T("a"), 1) == -1); 2007 assert(indexOfNeither(to!S("def"), to!T("a"), 1) == 1, 2008 to!string(indexOfNeither(to!S("def"), to!T("a"), 1))); 2009 2010 assert(indexOfNeither(to!S("dfeffgfff"), to!T("a"), 4, 2011 No.caseSensitive) == 4); 2012 assert(indexOfNeither(to!S("def"), to!T("D"), 2, 2013 No.caseSensitive) == 2); 2014 assert(indexOfNeither(to!S("ABca"), to!T("a"), 3, 2015 No.caseSensitive) == -1); 2016 assert(indexOfNeither(to!S("def"), to!T("tzf"), 2, 2017 No.caseSensitive) == -1); 2018 assert(indexOfNeither(to!S("DfEfffg"), to!T("dFe"), 5, 2019 No.caseSensitive) == 6); 2020 if (is(S == string)) 2021 { 2022 assert(indexOfNeither(to!S("��DfEfffg"), to!T("��Di"), 2, 2023 No.caseSensitive) == 3, to!string(indexOfNeither( 2024 to!S("��DfEfffg"), to!T("��Di"), 2, No.caseSensitive))); 2025 } 2026 else 2027 { 2028 assert(indexOfNeither(to!S("��DfEfffg"), to!T("��Di"), 2, 2029 No.caseSensitive) == 2, to!string(indexOfNeither( 2030 to!S("��DfEfffg"), to!T("��Di"), 2, No.caseSensitive))); 2031 } 2032 }(); 2033 } 2034 } 2035 ); 2036} 2037 2038/** 2039 Returns the last index of the first occurence of any character that is not 2040 an elements in $(D needles) in $(D haystack). If all element of 2041 $(D haystack) are element of $(D needles) $(D -1) is returned. 2042 2043 Params: 2044 haystack = String to search for needles in. 2045 needles = Strings to search for in haystack. 2046 stopIdx = slices haystack like this $(D haystack[0 .. stopIdx]) If 2047 the stopIdx is greater equal the length of haystack the functions 2048 returns $(D -1). 2049 cs = Indicates whether the comparisons are case sensitive. 2050*/ 2051ptrdiff_t lastIndexOfNeither(Char,Char2)(const(Char)[] haystack, 2052 const(Char2)[] needles, in CaseSensitive cs = Yes.caseSensitive) 2053 @safe pure 2054if (isSomeChar!Char && isSomeChar!Char2) 2055{ 2056 return indexOfAnyNeitherImpl!(false, false)(haystack, needles, cs); 2057} 2058 2059/// Ditto 2060ptrdiff_t lastIndexOfNeither(Char,Char2)(const(Char)[] haystack, 2061 const(Char2)[] needles, in size_t stopIdx, 2062 in CaseSensitive cs = Yes.caseSensitive) 2063 @safe pure 2064if (isSomeChar!Char && isSomeChar!Char2) 2065{ 2066 if (stopIdx < haystack.length) 2067 { 2068 return indexOfAnyNeitherImpl!(false, false)(haystack[0 .. stopIdx], 2069 needles, cs); 2070 } 2071 return -1; 2072} 2073 2074/// 2075@safe pure unittest 2076{ 2077 assert(lastIndexOfNeither("abba", "a") == 2); 2078 assert(lastIndexOfNeither("def", "f") == 1); 2079} 2080 2081/// 2082@safe pure unittest 2083{ 2084 assert(lastIndexOfNeither("def", "rsa", 3) == -1); 2085 assert(lastIndexOfNeither("abba", "a", 2) == 1); 2086} 2087 2088@safe pure unittest 2089{ 2090 import std.conv : to; 2091 2092 foreach (S; AliasSeq!(string, wstring, dstring)) 2093 { 2094 auto r = to!S("").lastIndexOfNeither("hello"); 2095 assert(r == -1, to!string(r)); 2096 2097 r = to!S("hello").lastIndexOfNeither(""); 2098 assert(r == 4, to!string(r)); 2099 2100 r = to!S("").lastIndexOfNeither(""); 2101 assert(r == -1, to!string(r)); 2102 } 2103} 2104 2105@safe pure unittest 2106{ 2107 import std.conv : to; 2108 import std.exception : assertCTFEable; 2109 2110 assertCTFEable!( 2111 { 2112 foreach (S; AliasSeq!(string, wstring, dstring)) 2113 { 2114 foreach (T; AliasSeq!(string, wstring, dstring)) 2115 (){ // avoid slow optimizations for large functions @@@BUG@@@ 2396 2116 assert(lastIndexOfNeither(cast(S) null, to!T("a")) == -1); 2117 assert(lastIndexOfNeither(to!S("def"), to!T("rsa")) == 2); 2118 assert(lastIndexOfNeither(to!S("dfefffg"), to!T("fgh")) == 2); 2119 2120 ptrdiff_t oeIdx = 8; 2121 if (is(S == string)) 2122 { 2123 oeIdx = 9; 2124 } 2125 2126 auto foundOeIdx = lastIndexOfNeither(to!S("��dfefegff"), to!T("zeg")); 2127 assert(foundOeIdx == oeIdx, to!string(foundOeIdx)); 2128 2129 assert(lastIndexOfNeither(to!S("zfeffgfsb"), to!T("FSB"), 2130 No.caseSensitive) == 5); 2131 assert(lastIndexOfNeither(to!S("def"), to!T("MI6"), 2132 No.caseSensitive) == 2, to!string(lastIndexOfNeither(to!S("def"), 2133 to!T("MI6"), No.caseSensitive))); 2134 assert(lastIndexOfNeither(to!S("abbadeafsb"), to!T("fSb"), 2135 No.caseSensitive) == 6, to!string(lastIndexOfNeither( 2136 to!S("abbadeafsb"), to!T("fSb"), No.caseSensitive))); 2137 assert(lastIndexOfNeither(to!S("defbi"), to!T("FBI"), 2138 No.caseSensitive) == 1); 2139 assert(lastIndexOfNeither(to!S("dfefffg"), to!T("NSA"), 2140 No.caseSensitive) == 6); 2141 assert(lastIndexOfNeither(to!S("dfeffgfff��"), to!T("BNDabCHIJKQEP����SYX��??��"), 2142 No.caseSensitive) == 8, to!string(lastIndexOfNeither(to!S("dfeffgfff��"), 2143 to!T("BNDabCHIJKQEP����SYX��??��"), No.caseSensitive))); 2144 }(); 2145 } 2146 } 2147 ); 2148} 2149 2150@safe pure unittest 2151{ 2152 import std.conv : to; 2153 import std.exception : assertCTFEable; 2154 2155 assertCTFEable!( 2156 { 2157 foreach (S; AliasSeq!(string, wstring, dstring)) 2158 { 2159 foreach (T; AliasSeq!(string, wstring, dstring)) 2160 (){ // avoid slow optimizations for large functions @@@BUG@@@ 2396 2161 assert(lastIndexOfNeither(cast(S) null, to!T("a"), 1337) == -1); 2162 assert(lastIndexOfNeither(to!S("def"), to!T("f")) == 1); 2163 assert(lastIndexOfNeither(to!S("dfefffg"), to!T("fgh")) == 2); 2164 2165 ptrdiff_t oeIdx = 4; 2166 if (is(S == string)) 2167 { 2168 oeIdx = 5; 2169 } 2170 2171 auto foundOeIdx = lastIndexOfNeither(to!S("��dfefegff"), to!T("zeg"), 2172 7); 2173 assert(foundOeIdx == oeIdx, to!string(foundOeIdx)); 2174 2175 assert(lastIndexOfNeither(to!S("zfeffgfsb"), to!T("FSB"), 6, 2176 No.caseSensitive) == 5); 2177 assert(lastIndexOfNeither(to!S("def"), to!T("MI6"), 2, 2178 No.caseSensitive) == 1, to!string(lastIndexOfNeither(to!S("def"), 2179 to!T("MI6"), 2, No.caseSensitive))); 2180 assert(lastIndexOfNeither(to!S("abbadeafsb"), to!T("fSb"), 6, 2181 No.caseSensitive) == 5, to!string(lastIndexOfNeither( 2182 to!S("abbadeafsb"), to!T("fSb"), 6, No.caseSensitive))); 2183 assert(lastIndexOfNeither(to!S("defbi"), to!T("FBI"), 3, 2184 No.caseSensitive) == 1); 2185 assert(lastIndexOfNeither(to!S("dfefffg"), to!T("NSA"), 2, 2186 No.caseSensitive) == 1, to!string(lastIndexOfNeither( 2187 to!S("dfefffg"), to!T("NSA"), 2, No.caseSensitive))); 2188 }(); 2189 } 2190 } 2191 ); 2192} 2193 2194/** 2195 * Returns the _representation of a string, which has the same type 2196 * as the string except the character type is replaced by $(D ubyte), 2197 * $(D ushort), or $(D uint) depending on the character width. 2198 * 2199 * Params: 2200 * s = The string to return the _representation of. 2201 * 2202 * Returns: 2203 * The _representation of the passed string. 2204 */ 2205auto representation(Char)(Char[] s) @safe pure nothrow @nogc 2206if (isSomeChar!Char) 2207{ 2208 import std.traits : ModifyTypePreservingTQ; 2209 alias ToRepType(T) = AliasSeq!(ubyte, ushort, uint)[T.sizeof / 2]; 2210 return cast(ModifyTypePreservingTQ!(ToRepType, Char)[])s; 2211} 2212 2213/// 2214@safe pure unittest 2215{ 2216 string s = "hello"; 2217 static assert(is(typeof(representation(s)) == immutable(ubyte)[])); 2218 assert(representation(s) is cast(immutable(ubyte)[]) s); 2219 assert(representation(s) == [0x68, 0x65, 0x6c, 0x6c, 0x6f]); 2220} 2221 2222@system pure unittest 2223{ 2224 import std.exception : assertCTFEable; 2225 import std.traits : Fields; 2226 import std.typecons : Tuple; 2227 2228 assertCTFEable!( 2229 { 2230 void test(Char, T)(Char[] str) 2231 { 2232 static assert(is(typeof(representation(str)) == T[])); 2233 assert(representation(str) is cast(T[]) str); 2234 } 2235 2236 foreach (Type; AliasSeq!(Tuple!(char , ubyte ), 2237 Tuple!(wchar, ushort), 2238 Tuple!(dchar, uint ))) 2239 { 2240 alias Char = Fields!Type[0]; 2241 alias Int = Fields!Type[1]; 2242 enum immutable(Char)[] hello = "hello"; 2243 2244 test!( immutable Char, immutable Int)(hello); 2245 test!( const Char, const Int)(hello); 2246 test!( Char, Int)(hello.dup); 2247 test!( shared Char, shared Int)(cast(shared) hello.dup); 2248 test!(const shared Char, const shared Int)(hello); 2249 } 2250 }); 2251} 2252 2253 2254/** 2255 * Capitalize the first character of $(D s) and convert the rest of $(D s) to 2256 * lowercase. 2257 * 2258 * Params: 2259 * input = The string to _capitalize. 2260 * 2261 * Returns: 2262 * The capitalized string. 2263 * 2264 * See_Also: 2265 * $(REF asCapitalized, std,uni) for a lazy range version that doesn't allocate memory 2266 */ 2267S capitalize(S)(S input) @trusted pure 2268if (isSomeString!S) 2269{ 2270 import std.array : array; 2271 import std.uni : asCapitalized; 2272 import std.utf : byUTF; 2273 2274 return input.asCapitalized.byUTF!(ElementEncodingType!(S)).array; 2275} 2276 2277/// 2278pure @safe unittest 2279{ 2280 assert(capitalize("hello") == "Hello"); 2281 assert(capitalize("World") == "World"); 2282} 2283 2284auto capitalize(S)(auto ref S s) 2285if (!isSomeString!S && is(StringTypeOf!S)) 2286{ 2287 return capitalize!(StringTypeOf!S)(s); 2288} 2289 2290@safe pure unittest 2291{ 2292 assert(testAliasedString!capitalize("hello")); 2293} 2294 2295@safe pure unittest 2296{ 2297 import std.algorithm.comparison : cmp; 2298 import std.conv : to; 2299 import std.exception : assertCTFEable; 2300 2301 assertCTFEable!( 2302 { 2303 foreach (S; AliasSeq!(string, wstring, dstring, char[], wchar[], dchar[])) 2304 { 2305 S s1 = to!S("FoL"); 2306 S s2; 2307 2308 s2 = capitalize(s1); 2309 assert(cmp(s2, "Fol") == 0); 2310 assert(s2 !is s1); 2311 2312 s2 = capitalize(s1[0 .. 2]); 2313 assert(cmp(s2, "Fo") == 0); 2314 2315 s1 = to!S("fOl"); 2316 s2 = capitalize(s1); 2317 assert(cmp(s2, "Fol") == 0); 2318 assert(s2 !is s1); 2319 s1 = to!S("\u0131 \u0130"); 2320 s2 = capitalize(s1); 2321 assert(cmp(s2, "\u0049 i\u0307") == 0); 2322 assert(s2 !is s1); 2323 2324 s1 = to!S("\u017F \u0049"); 2325 s2 = capitalize(s1); 2326 assert(cmp(s2, "\u0053 \u0069") == 0); 2327 assert(s2 !is s1); 2328 } 2329 }); 2330} 2331 2332/++ 2333 Split $(D s) into an array of lines according to the unicode standard using 2334 $(D '\r'), $(D '\n'), $(D "\r\n"), $(REF lineSep, std,uni), 2335 $(REF paraSep, std,uni), $(D U+0085) (NEL), $(D '\v') and $(D '\f') 2336 as delimiters. If $(D keepTerm) is set to $(D KeepTerminator.yes), then the 2337 delimiter is included in the strings returned. 2338 2339 Does not throw on invalid UTF; such is simply passed unchanged 2340 to the output. 2341 2342 Allocates memory; use $(LREF lineSplitter) for an alternative that 2343 does not. 2344 2345 Adheres to $(HTTP www.unicode.org/versions/Unicode7.0.0/ch05.pdf, Unicode 7.0). 2346 2347 Params: 2348 s = a string of $(D chars), $(D wchars), or $(D dchars), or any custom 2349 type that casts to a $(D string) type 2350 keepTerm = whether delimiter is included or not in the results 2351 Returns: 2352 array of strings, each element is a line that is a slice of $(D s) 2353 See_Also: 2354 $(LREF lineSplitter) 2355 $(REF splitter, std,algorithm) 2356 $(REF splitter, std,regex) 2357 +/ 2358alias KeepTerminator = Flag!"keepTerminator"; 2359 2360/// ditto 2361S[] splitLines(S)(S s, in KeepTerminator keepTerm = No.keepTerminator) @safe pure 2362if (isSomeString!S) 2363{ 2364 import std.array : appender; 2365 import std.uni : lineSep, paraSep; 2366 2367 size_t iStart = 0; 2368 auto retval = appender!(S[])(); 2369 2370 for (size_t i; i < s.length; ++i) 2371 { 2372 switch (s[i]) 2373 { 2374 case '\v', '\f', '\n': 2375 retval.put(s[iStart .. i + (keepTerm == Yes.keepTerminator)]); 2376 iStart = i + 1; 2377 break; 2378 2379 case '\r': 2380 if (i + 1 < s.length && s[i + 1] == '\n') 2381 { 2382 retval.put(s[iStart .. i + (keepTerm == Yes.keepTerminator) * 2]); 2383 iStart = i + 2; 2384 ++i; 2385 } 2386 else 2387 { 2388 goto case '\n'; 2389 } 2390 break; 2391 2392 static if (s[i].sizeof == 1) 2393 { 2394 /* Manually decode: 2395 * lineSep is E2 80 A8 2396 * paraSep is E2 80 A9 2397 */ 2398 case 0xE2: 2399 if (i + 2 < s.length && 2400 s[i + 1] == 0x80 && 2401 (s[i + 2] == 0xA8 || s[i + 2] == 0xA9) 2402 ) 2403 { 2404 retval.put(s[iStart .. i + (keepTerm == Yes.keepTerminator) * 3]); 2405 iStart = i + 3; 2406 i += 2; 2407 } 2408 else 2409 goto default; 2410 break; 2411 /* Manually decode: 2412 * NEL is C2 85 2413 */ 2414 case 0xC2: 2415 if (i + 1 < s.length && s[i + 1] == 0x85) 2416 { 2417 retval.put(s[iStart .. i + (keepTerm == Yes.keepTerminator) * 2]); 2418 iStart = i + 2; 2419 i += 1; 2420 } 2421 else 2422 goto default; 2423 break; 2424 } 2425 else 2426 { 2427 case lineSep: 2428 case paraSep: 2429 case '\u0085': 2430 goto case '\n'; 2431 } 2432 2433 default: 2434 break; 2435 } 2436 } 2437 2438 if (iStart != s.length) 2439 retval.put(s[iStart .. $]); 2440 2441 return retval.data; 2442} 2443 2444/// 2445@safe pure nothrow unittest 2446{ 2447 string s = "Hello\nmy\rname\nis"; 2448 assert(splitLines(s) == ["Hello", "my", "name", "is"]); 2449} 2450 2451@safe pure nothrow unittest 2452{ 2453 string s = "a\xC2\x86b"; 2454 assert(splitLines(s) == [s]); 2455} 2456 2457auto splitLines(S)(auto ref S s, in KeepTerminator keepTerm = No.keepTerminator) 2458if (!isSomeString!S && is(StringTypeOf!S)) 2459{ 2460 return splitLines!(StringTypeOf!S)(s, keepTerm); 2461} 2462 2463@safe pure nothrow unittest 2464{ 2465 assert(testAliasedString!splitLines("hello\nworld")); 2466} 2467 2468@safe pure unittest 2469{ 2470 import std.conv : to; 2471 import std.exception : assertCTFEable; 2472 2473 assertCTFEable!( 2474 { 2475 foreach (S; AliasSeq!(char[], wchar[], dchar[], string, wstring, dstring)) 2476 { 2477 auto s = to!S( 2478 "\rpeter\n\rpaul\r\njerry\u2028ice\u2029cream\n\nsunday\n" ~ 2479 "mon\u2030day\nschadenfreude\vkindergarten\f\vcookies\u0085" 2480 ); 2481 auto lines = splitLines(s); 2482 assert(lines.length == 14); 2483 assert(lines[0] == ""); 2484 assert(lines[1] == "peter"); 2485 assert(lines[2] == ""); 2486 assert(lines[3] == "paul"); 2487 assert(lines[4] == "jerry"); 2488 assert(lines[5] == "ice"); 2489 assert(lines[6] == "cream"); 2490 assert(lines[7] == ""); 2491 assert(lines[8] == "sunday"); 2492 assert(lines[9] == "mon\u2030day"); 2493 assert(lines[10] == "schadenfreude"); 2494 assert(lines[11] == "kindergarten"); 2495 assert(lines[12] == ""); 2496 assert(lines[13] == "cookies"); 2497 2498 2499 ubyte[] u = ['a', 0xFF, 0x12, 'b']; // invalid UTF 2500 auto ulines = splitLines(cast(char[]) u); 2501 assert(cast(ubyte[])(ulines[0]) == u); 2502 2503 lines = splitLines(s, Yes.keepTerminator); 2504 assert(lines.length == 14); 2505 assert(lines[0] == "\r"); 2506 assert(lines[1] == "peter\n"); 2507 assert(lines[2] == "\r"); 2508 assert(lines[3] == "paul\r\n"); 2509 assert(lines[4] == "jerry\u2028"); 2510 assert(lines[5] == "ice\u2029"); 2511 assert(lines[6] == "cream\n"); 2512 assert(lines[7] == "\n"); 2513 assert(lines[8] == "sunday\n"); 2514 assert(lines[9] == "mon\u2030day\n"); 2515 assert(lines[10] == "schadenfreude\v"); 2516 assert(lines[11] == "kindergarten\f"); 2517 assert(lines[12] == "\v"); 2518 assert(lines[13] == "cookies\u0085"); 2519 2520 s.popBack(); // Lop-off trailing \n 2521 lines = splitLines(s); 2522 assert(lines.length == 14); 2523 assert(lines[9] == "mon\u2030day"); 2524 2525 lines = splitLines(s, Yes.keepTerminator); 2526 assert(lines.length == 14); 2527 assert(lines[13] == "cookies"); 2528 } 2529 }); 2530} 2531 2532private struct LineSplitter(KeepTerminator keepTerm = No.keepTerminator, Range) 2533{ 2534 import std.conv : unsigned; 2535 import std.uni : lineSep, paraSep; 2536private: 2537 Range _input; 2538 2539 alias IndexType = typeof(unsigned(_input.length)); 2540 enum IndexType _unComputed = IndexType.max; 2541 IndexType iStart = _unComputed; 2542 IndexType iEnd = 0; 2543 IndexType iNext = 0; 2544 2545public: 2546 this(Range input) 2547 { 2548 _input = input; 2549 } 2550 2551 static if (isInfinite!Range) 2552 { 2553 enum bool empty = false; 2554 } 2555 else 2556 { 2557 @property bool empty() 2558 { 2559 return iStart == _unComputed && iNext == _input.length; 2560 } 2561 } 2562 2563 @property typeof(_input) front() 2564 { 2565 if (iStart == _unComputed) 2566 { 2567 iStart = iNext; 2568 Loop: 2569 for (IndexType i = iNext; ; ++i) 2570 { 2571 if (i == _input.length) 2572 { 2573 iEnd = i; 2574 iNext = i; 2575 break Loop; 2576 } 2577 switch (_input[i]) 2578 { 2579 case '\v', '\f', '\n': 2580 iEnd = i + (keepTerm == Yes.keepTerminator); 2581 iNext = i + 1; 2582 break Loop; 2583 2584 case '\r': 2585 if (i + 1 < _input.length && _input[i + 1] == '\n') 2586 { 2587 iEnd = i + (keepTerm == Yes.keepTerminator) * 2; 2588 iNext = i + 2; 2589 break Loop; 2590 } 2591 else 2592 { 2593 goto case '\n'; 2594 } 2595 2596 static if (_input[i].sizeof == 1) 2597 { 2598 /* Manually decode: 2599 * lineSep is E2 80 A8 2600 * paraSep is E2 80 A9 2601 */ 2602 case 0xE2: 2603 if (i + 2 < _input.length && 2604 _input[i + 1] == 0x80 && 2605 (_input[i + 2] == 0xA8 || _input[i + 2] == 0xA9) 2606 ) 2607 { 2608 iEnd = i + (keepTerm == Yes.keepTerminator) * 3; 2609 iNext = i + 3; 2610 break Loop; 2611 } 2612 else 2613 goto default; 2614 /* Manually decode: 2615 * NEL is C2 85 2616 */ 2617 case 0xC2: 2618 if (i + 1 < _input.length && _input[i + 1] == 0x85) 2619 { 2620 iEnd = i + (keepTerm == Yes.keepTerminator) * 2; 2621 iNext = i + 2; 2622 break Loop; 2623 } 2624 else 2625 goto default; 2626 } 2627 else 2628 { 2629 case '\u0085': 2630 case lineSep: 2631 case paraSep: 2632 goto case '\n'; 2633 } 2634 2635 default: 2636 break; 2637 } 2638 } 2639 } 2640 return _input[iStart .. iEnd]; 2641 } 2642 2643 void popFront() 2644 { 2645 if (iStart == _unComputed) 2646 { 2647 assert(!empty); 2648 front; 2649 } 2650 iStart = _unComputed; 2651 } 2652 2653 static if (isForwardRange!Range) 2654 { 2655 @property typeof(this) save() 2656 { 2657 auto ret = this; 2658 ret._input = _input.save; 2659 return ret; 2660 } 2661 } 2662} 2663 2664/*********************************** 2665 * Split an array or slicable range of characters into a range of lines 2666 using $(D '\r'), $(D '\n'), $(D '\v'), $(D '\f'), $(D "\r\n"), 2667 $(REF lineSep, std,uni), $(REF paraSep, std,uni) and $(D '\u0085') (NEL) 2668 as delimiters. If $(D keepTerm) is set to $(D Yes.keepTerminator), then the 2669 delimiter is included in the slices returned. 2670 2671 Does not throw on invalid UTF; such is simply passed unchanged 2672 to the output. 2673 2674 Adheres to $(HTTP www.unicode.org/versions/Unicode7.0.0/ch05.pdf, Unicode 7.0). 2675 2676 Does not allocate memory. 2677 2678 Params: 2679 r = array of $(D chars), $(D wchars), or $(D dchars) or a slicable range 2680 keepTerm = whether delimiter is included or not in the results 2681 Returns: 2682 range of slices of the input range $(D r) 2683 2684 See_Also: 2685 $(LREF splitLines) 2686 $(REF splitter, std,algorithm) 2687 $(REF splitter, std,regex) 2688 */ 2689auto lineSplitter(KeepTerminator keepTerm = No.keepTerminator, Range)(Range r) 2690if ((hasSlicing!Range && hasLength!Range && isSomeChar!(ElementType!Range) || 2691 isSomeString!Range) && 2692 !isConvertibleToString!Range) 2693{ 2694 return LineSplitter!(keepTerm, Range)(r); 2695} 2696 2697/// 2698@safe pure unittest 2699{ 2700 import std.array : array; 2701 2702 string s = "Hello\nmy\rname\nis"; 2703 2704 /* notice the call to 'array' to turn the lazy range created by 2705 lineSplitter comparable to the string[] created by splitLines. 2706 */ 2707 assert(lineSplitter(s).array == splitLines(s)); 2708} 2709 2710auto lineSplitter(KeepTerminator keepTerm = No.keepTerminator, Range)(auto ref Range r) 2711if (isConvertibleToString!Range) 2712{ 2713 return LineSplitter!(keepTerm, StringTypeOf!Range)(r); 2714} 2715 2716@safe pure unittest 2717{ 2718 import std.array : array; 2719 import std.conv : to; 2720 import std.exception : assertCTFEable; 2721 2722 assertCTFEable!( 2723 { 2724 foreach (S; AliasSeq!(char[], wchar[], dchar[], string, wstring, dstring)) 2725 { 2726 auto s = to!S( 2727 "\rpeter\n\rpaul\r\njerry\u2028ice\u2029cream\n\n" ~ 2728 "sunday\nmon\u2030day\nschadenfreude\vkindergarten\f\vcookies\u0085" 2729 ); 2730 2731 auto lines = lineSplitter(s).array; 2732 assert(lines.length == 14); 2733 assert(lines[0] == ""); 2734 assert(lines[1] == "peter"); 2735 assert(lines[2] == ""); 2736 assert(lines[3] == "paul"); 2737 assert(lines[4] == "jerry"); 2738 assert(lines[5] == "ice"); 2739 assert(lines[6] == "cream"); 2740 assert(lines[7] == ""); 2741 assert(lines[8] == "sunday"); 2742 assert(lines[9] == "mon\u2030day"); 2743 assert(lines[10] == "schadenfreude"); 2744 assert(lines[11] == "kindergarten"); 2745 assert(lines[12] == ""); 2746 assert(lines[13] == "cookies"); 2747 2748 2749 ubyte[] u = ['a', 0xFF, 0x12, 'b']; // invalid UTF 2750 auto ulines = lineSplitter(cast(char[]) u).array; 2751 assert(cast(ubyte[])(ulines[0]) == u); 2752 2753 lines = lineSplitter!(Yes.keepTerminator)(s).array; 2754 assert(lines.length == 14); 2755 assert(lines[0] == "\r"); 2756 assert(lines[1] == "peter\n"); 2757 assert(lines[2] == "\r"); 2758 assert(lines[3] == "paul\r\n"); 2759 assert(lines[4] == "jerry\u2028"); 2760 assert(lines[5] == "ice\u2029"); 2761 assert(lines[6] == "cream\n"); 2762 assert(lines[7] == "\n"); 2763 assert(lines[8] == "sunday\n"); 2764 assert(lines[9] == "mon\u2030day\n"); 2765 assert(lines[10] == "schadenfreude\v"); 2766 assert(lines[11] == "kindergarten\f"); 2767 assert(lines[12] == "\v"); 2768 assert(lines[13] == "cookies\u0085"); 2769 2770 s.popBack(); // Lop-off trailing \n 2771 lines = lineSplitter(s).array; 2772 assert(lines.length == 14); 2773 assert(lines[9] == "mon\u2030day"); 2774 2775 lines = lineSplitter!(Yes.keepTerminator)(s).array; 2776 assert(lines.length == 14); 2777 assert(lines[13] == "cookies"); 2778 } 2779 }); 2780} 2781 2782/// 2783@nogc @safe pure unittest 2784{ 2785 auto s = "\rpeter\n\rpaul\r\njerry\u2028ice\u2029cream\n\nsunday\nmon\u2030day\n"; 2786 auto lines = s.lineSplitter(); 2787 static immutable witness = ["", "peter", "", "paul", "jerry", "ice", "cream", "", "sunday", "mon\u2030day"]; 2788 uint i; 2789 foreach (line; lines) 2790 { 2791 assert(line == witness[i++]); 2792 } 2793 assert(i == witness.length); 2794} 2795 2796@nogc @safe pure unittest 2797{ 2798 import std.algorithm.comparison : equal; 2799 auto s = "std/string.d"; 2800 auto as = TestAliasedString(s); 2801 assert(equal(s.lineSplitter(), as.lineSplitter())); 2802} 2803 2804@safe pure unittest 2805{ 2806 auto s = "line1\nline2"; 2807 auto spl0 = s.lineSplitter!(Yes.keepTerminator); 2808 auto spl1 = spl0.save; 2809 spl0.popFront; 2810 assert(spl1.front ~ spl0.front == s); 2811 string r = "a\xC2\x86b"; 2812 assert(r.lineSplitter.front == r); 2813} 2814 2815/++ 2816 Strips leading whitespace (as defined by $(REF isWhite, std,uni)). 2817 2818 Params: 2819 input = string or $(REF_ALTTEXT forward range, isForwardRange, std,range,primitives) 2820 of characters 2821 2822 Returns: $(D input) stripped of leading whitespace. 2823 2824 Postconditions: $(D input) and the returned value 2825 will share the same tail (see $(REF sameTail, std,array)). 2826 2827 See_Also: 2828 Generic stripping on ranges: $(REF _stripLeft, std, algorithm, mutation) 2829 +/ 2830auto stripLeft(Range)(Range input) 2831if (isForwardRange!Range && isSomeChar!(ElementEncodingType!Range) && 2832 !isInfinite!Range && !isConvertibleToString!Range) 2833{ 2834 static import std.ascii; 2835 static import std.uni; 2836 import std.utf : decodeFront; 2837 2838 while (!input.empty) 2839 { 2840 auto c = input.front; 2841 if (std.ascii.isASCII(c)) 2842 { 2843 if (!std.ascii.isWhite(c)) 2844 break; 2845 input.popFront(); 2846 } 2847 else 2848 { 2849 auto save = input.save; 2850 auto dc = decodeFront(input); 2851 if (!std.uni.isWhite(dc)) 2852 return save; 2853 } 2854 } 2855 return input; 2856} 2857 2858/// 2859@safe pure unittest 2860{ 2861 import std.uni : lineSep, paraSep; 2862 assert(stripLeft(" hello world ") == 2863 "hello world "); 2864 assert(stripLeft("\n\t\v\rhello world\n\t\v\r") == 2865 "hello world\n\t\v\r"); 2866 assert(stripLeft("hello world") == 2867 "hello world"); 2868 assert(stripLeft([lineSep] ~ "hello world" ~ lineSep) == 2869 "hello world" ~ [lineSep]); 2870 assert(stripLeft([paraSep] ~ "hello world" ~ paraSep) == 2871 "hello world" ~ [paraSep]); 2872 2873 import std.array : array; 2874 import std.utf : byChar; 2875 assert(stripLeft(" hello world "w.byChar).array == 2876 "hello world "); 2877} 2878 2879auto stripLeft(Range)(auto ref Range str) 2880if (isConvertibleToString!Range) 2881{ 2882 return stripLeft!(StringTypeOf!Range)(str); 2883} 2884 2885@safe pure unittest 2886{ 2887 assert(testAliasedString!stripLeft(" hello")); 2888} 2889 2890/++ 2891 Strips trailing whitespace (as defined by $(REF isWhite, std,uni)). 2892 2893 Params: 2894 str = string or random access range of characters 2895 2896 Returns: 2897 slice of $(D str) stripped of trailing whitespace. 2898 2899 See_Also: 2900 Generic stripping on ranges: $(REF _stripRight, std, algorithm, mutation) 2901 +/ 2902auto stripRight(Range)(Range str) 2903if (isSomeString!Range || 2904 isRandomAccessRange!Range && hasLength!Range && hasSlicing!Range && 2905 !isConvertibleToString!Range && 2906 isSomeChar!(ElementEncodingType!Range)) 2907{ 2908 import std.uni : isWhite; 2909 alias C = Unqual!(ElementEncodingType!(typeof(str))); 2910 2911 static if (isSomeString!(typeof(str))) 2912 { 2913 import std.utf : codeLength; 2914 2915 foreach_reverse (i, dchar c; str) 2916 { 2917 if (!isWhite(c)) 2918 return str[0 .. i + codeLength!C(c)]; 2919 } 2920 2921 return str[0 .. 0]; 2922 } 2923 else 2924 { 2925 size_t i = str.length; 2926 while (i--) 2927 { 2928 static if (C.sizeof == 4) 2929 { 2930 if (isWhite(str[i])) 2931 continue; 2932 break; 2933 } 2934 else static if (C.sizeof == 2) 2935 { 2936 auto c2 = str[i]; 2937 if (c2 < 0xD800 || c2 >= 0xE000) 2938 { 2939 if (isWhite(c2)) 2940 continue; 2941 } 2942 else if (c2 >= 0xDC00) 2943 { 2944 if (i) 2945 { 2946 immutable c1 = str[i - 1]; 2947 if (c1 >= 0xD800 && c1 < 0xDC00) 2948 { 2949 immutable dchar c = ((c1 - 0xD7C0) << 10) + (c2 - 0xDC00); 2950 if (isWhite(c)) 2951 { 2952 --i; 2953 continue; 2954 } 2955 } 2956 } 2957 } 2958 break; 2959 } 2960 else static if (C.sizeof == 1) 2961 { 2962 import std.utf : byDchar; 2963 2964 char cx = str[i]; 2965 if (cx <= 0x7F) 2966 { 2967 if (isWhite(cx)) 2968 continue; 2969 break; 2970 } 2971 else 2972 { 2973 size_t stride = 0; 2974 2975 while (1) 2976 { 2977 ++stride; 2978 if (!i || (cx & 0xC0) == 0xC0 || stride == 4) 2979 break; 2980 cx = str[i - 1]; 2981 if (!(cx & 0x80)) 2982 break; 2983 --i; 2984 } 2985 2986 if (!str[i .. i + stride].byDchar.front.isWhite) 2987 return str[0 .. i + stride]; 2988 } 2989 } 2990 else 2991 static assert(0); 2992 } 2993 2994 return str[0 .. i + 1]; 2995 } 2996} 2997 2998/// 2999@safe pure 3000unittest 3001{ 3002 import std.uni : lineSep, paraSep; 3003 assert(stripRight(" hello world ") == 3004 " hello world"); 3005 assert(stripRight("\n\t\v\rhello world\n\t\v\r") == 3006 "\n\t\v\rhello world"); 3007 assert(stripRight("hello world") == 3008 "hello world"); 3009 assert(stripRight([lineSep] ~ "hello world" ~ lineSep) == 3010 [lineSep] ~ "hello world"); 3011 assert(stripRight([paraSep] ~ "hello world" ~ paraSep) == 3012 [paraSep] ~ "hello world"); 3013} 3014 3015auto stripRight(Range)(auto ref Range str) 3016if (isConvertibleToString!Range) 3017{ 3018 return stripRight!(StringTypeOf!Range)(str); 3019} 3020 3021@safe pure unittest 3022{ 3023 assert(testAliasedString!stripRight("hello ")); 3024} 3025 3026@safe pure unittest 3027{ 3028 import std.array : array; 3029 import std.uni : lineSep, paraSep; 3030 import std.utf : byChar, byDchar, byUTF, byWchar, invalidUTFstrings; 3031 assert(stripRight(" hello world ".byChar).array == " hello world"); 3032 assert(stripRight("\n\t\v\rhello world\n\t\v\r"w.byWchar).array == "\n\t\v\rhello world"w); 3033 assert(stripRight("hello world"d.byDchar).array == "hello world"d); 3034 assert(stripRight("\u2028hello world\u2020\u2028".byChar).array == "\u2028hello world\u2020"); 3035 assert(stripRight("hello world\U00010001"w.byWchar).array == "hello world\U00010001"w); 3036 3037 foreach (C; AliasSeq!(char, wchar, dchar)) 3038 { 3039 foreach (s; invalidUTFstrings!C()) 3040 { 3041 cast(void) stripRight(s.byUTF!C).array; 3042 } 3043 } 3044 3045 cast(void) stripRight("a\x80".byUTF!char).array; 3046 wstring ws = ['a', cast(wchar) 0xDC00]; 3047 cast(void) stripRight(ws.byUTF!wchar).array; 3048} 3049 3050 3051/++ 3052 Strips both leading and trailing whitespace (as defined by 3053 $(REF isWhite, std,uni)). 3054 3055 Params: 3056 str = string or random access range of characters 3057 3058 Returns: 3059 slice of $(D str) stripped of leading and trailing whitespace. 3060 3061 See_Also: 3062 Generic stripping on ranges: $(REF _strip, std, algorithm, mutation) 3063 +/ 3064auto strip(Range)(Range str) 3065if (isSomeString!Range || 3066 isRandomAccessRange!Range && hasLength!Range && hasSlicing!Range && 3067 !isConvertibleToString!Range && 3068 isSomeChar!(ElementEncodingType!Range)) 3069{ 3070 return stripRight(stripLeft(str)); 3071} 3072 3073/// 3074@safe pure unittest 3075{ 3076 import std.uni : lineSep, paraSep; 3077 assert(strip(" hello world ") == 3078 "hello world"); 3079 assert(strip("\n\t\v\rhello world\n\t\v\r") == 3080 "hello world"); 3081 assert(strip("hello world") == 3082 "hello world"); 3083 assert(strip([lineSep] ~ "hello world" ~ [lineSep]) == 3084 "hello world"); 3085 assert(strip([paraSep] ~ "hello world" ~ [paraSep]) == 3086 "hello world"); 3087} 3088 3089auto strip(Range)(auto ref Range str) 3090if (isConvertibleToString!Range) 3091{ 3092 return strip!(StringTypeOf!Range)(str); 3093} 3094 3095@safe pure unittest 3096{ 3097 assert(testAliasedString!strip(" hello world ")); 3098} 3099 3100@safe pure unittest 3101{ 3102 import std.algorithm.comparison : equal; 3103 import std.conv : to; 3104 import std.exception : assertCTFEable; 3105 3106 assertCTFEable!( 3107 { 3108 foreach (S; AliasSeq!( char[], const char[], string, 3109 wchar[], const wchar[], wstring, 3110 dchar[], const dchar[], dstring)) 3111 { 3112 assert(equal(stripLeft(to!S(" foo\t ")), "foo\t ")); 3113 assert(equal(stripLeft(to!S("\u2008 foo\t \u2007")), "foo\t \u2007")); 3114 assert(equal(stripLeft(to!S("\u0085 �� \u0085 \u00BB \r")), "�� \u0085 \u00BB \r")); 3115 assert(equal(stripLeft(to!S("1")), "1")); 3116 assert(equal(stripLeft(to!S("\U0010FFFE")), "\U0010FFFE")); 3117 assert(equal(stripLeft(to!S("")), "")); 3118 3119 assert(equal(stripRight(to!S(" foo\t ")), " foo")); 3120 assert(equal(stripRight(to!S("\u2008 foo\t \u2007")), "\u2008 foo")); 3121 assert(equal(stripRight(to!S("\u0085 �� \u0085 \u00BB \r")), "\u0085 �� \u0085 \u00BB")); 3122 assert(equal(stripRight(to!S("1")), "1")); 3123 assert(equal(stripRight(to!S("\U0010FFFE")), "\U0010FFFE")); 3124 assert(equal(stripRight(to!S("")), "")); 3125 3126 assert(equal(strip(to!S(" foo\t ")), "foo")); 3127 assert(equal(strip(to!S("\u2008 foo\t \u2007")), "foo")); 3128 assert(equal(strip(to!S("\u0085 �� \u0085 \u00BB \r")), "�� \u0085 \u00BB")); 3129 assert(equal(strip(to!S("\U0010FFFE")), "\U0010FFFE")); 3130 assert(equal(strip(to!S("")), "")); 3131 } 3132 }); 3133} 3134 3135@safe pure unittest 3136{ 3137 import std.array : sameHead, sameTail; 3138 import std.exception : assertCTFEable; 3139 assertCTFEable!( 3140 { 3141 wstring s = " "; 3142 assert(s.sameTail(s.stripLeft())); 3143 assert(s.sameHead(s.stripRight())); 3144 }); 3145} 3146 3147 3148/++ 3149 If $(D str) ends with $(D delimiter), then $(D str) is returned without 3150 $(D delimiter) on its end. If it $(D str) does $(I not) end with 3151 $(D delimiter), then it is returned unchanged. 3152 3153 If no $(D delimiter) is given, then one trailing $(D '\r'), $(D '\n'), 3154 $(D "\r\n"), $(D '\f'), $(D '\v'), $(REF lineSep, std,uni), $(REF paraSep, std,uni), or $(REF nelSep, std,uni) 3155 is removed from the end of $(D str). If $(D str) does not end with any of those characters, 3156 then it is returned unchanged. 3157 3158 Params: 3159 str = string or indexable range of characters 3160 delimiter = string of characters to be sliced off end of str[] 3161 3162 Returns: 3163 slice of str 3164 +/ 3165Range chomp(Range)(Range str) 3166if ((isRandomAccessRange!Range && isSomeChar!(ElementEncodingType!Range) || 3167 isNarrowString!Range) && 3168 !isConvertibleToString!Range) 3169{ 3170 import std.uni : lineSep, paraSep, nelSep; 3171 if (str.empty) 3172 return str; 3173 3174 alias C = ElementEncodingType!Range; 3175 3176 switch (str[$ - 1]) 3177 { 3178 case '\n': 3179 { 3180 if (str.length > 1 && str[$ - 2] == '\r') 3181 return str[0 .. $ - 2]; 3182 goto case; 3183 } 3184 case '\r', '\v', '\f': 3185 return str[0 .. $ - 1]; 3186 3187 // Pop off the last character if lineSep, paraSep, or nelSep 3188 static if (is(C : const char)) 3189 { 3190 /* Manually decode: 3191 * lineSep is E2 80 A8 3192 * paraSep is E2 80 A9 3193 */ 3194 case 0xA8: // Last byte of lineSep 3195 case 0xA9: // Last byte of paraSep 3196 if (str.length > 2 && str[$ - 2] == 0x80 && str[$ - 3] == 0xE2) 3197 return str [0 .. $ - 3]; 3198 goto default; 3199 3200 /* Manually decode: 3201 * NEL is C2 85 3202 */ 3203 case 0x85: 3204 if (str.length > 1 && str[$ - 2] == 0xC2) 3205 return str [0 .. $ - 2]; 3206 goto default; 3207 } 3208 else 3209 { 3210 case lineSep: 3211 case paraSep: 3212 case nelSep: 3213 return str[0 .. $ - 1]; 3214 } 3215 default: 3216 return str; 3217 } 3218} 3219 3220/// Ditto 3221Range chomp(Range, C2)(Range str, const(C2)[] delimiter) 3222if ((isBidirectionalRange!Range && isSomeChar!(ElementEncodingType!Range) || 3223 isNarrowString!Range) && 3224 !isConvertibleToString!Range && 3225 isSomeChar!C2) 3226{ 3227 if (delimiter.empty) 3228 return chomp(str); 3229 3230 alias C1 = ElementEncodingType!Range; 3231 3232 static if (is(Unqual!C1 == Unqual!C2) && (isSomeString!Range || (hasSlicing!Range && C2.sizeof == 4))) 3233 { 3234 import std.algorithm.searching : endsWith; 3235 if (str.endsWith(delimiter)) 3236 return str[0 .. $ - delimiter.length]; 3237 return str; 3238 } 3239 else 3240 { 3241 auto orig = str.save; 3242 3243 static if (isSomeString!Range) 3244 alias C = dchar; // because strings auto-decode 3245 else 3246 alias C = C1; // and ranges do not 3247 3248 foreach_reverse (C c; delimiter) 3249 { 3250 if (str.empty || str.back != c) 3251 return orig; 3252 3253 str.popBack(); 3254 } 3255 3256 return str; 3257 } 3258} 3259 3260/// 3261@safe pure 3262unittest 3263{ 3264 import std.uni : lineSep, paraSep, nelSep; 3265 import std.utf : decode; 3266 assert(chomp(" hello world \n\r") == " hello world \n"); 3267 assert(chomp(" hello world \r\n") == " hello world "); 3268 assert(chomp(" hello world \f") == " hello world "); 3269 assert(chomp(" hello world \v") == " hello world "); 3270 assert(chomp(" hello world \n\n") == " hello world \n"); 3271 assert(chomp(" hello world \n\n ") == " hello world \n\n "); 3272 assert(chomp(" hello world \n\n" ~ [lineSep]) == " hello world \n\n"); 3273 assert(chomp(" hello world \n\n" ~ [paraSep]) == " hello world \n\n"); 3274 assert(chomp(" hello world \n\n" ~ [ nelSep]) == " hello world \n\n"); 3275 assert(chomp(" hello world") == " hello world"); 3276 assert(chomp("") == ""); 3277 3278 assert(chomp(" hello world", "orld") == " hello w"); 3279 assert(chomp(" hello world", " he") == " hello world"); 3280 assert(chomp("", "hello") == ""); 3281 3282 // Don't decode pointlessly 3283 assert(chomp("hello\xFE", "\r") == "hello\xFE"); 3284} 3285 3286StringTypeOf!Range chomp(Range)(auto ref Range str) 3287if (isConvertibleToString!Range) 3288{ 3289 return chomp!(StringTypeOf!Range)(str); 3290} 3291 3292StringTypeOf!Range chomp(Range, C2)(auto ref Range str, const(C2)[] delimiter) 3293if (isConvertibleToString!Range) 3294{ 3295 return chomp!(StringTypeOf!Range, C2)(str, delimiter); 3296} 3297 3298@safe pure unittest 3299{ 3300 assert(testAliasedString!chomp(" hello world \n\r")); 3301 assert(testAliasedString!chomp(" hello world", "orld")); 3302} 3303 3304@safe pure unittest 3305{ 3306 import std.conv : to; 3307 import std.exception : assertCTFEable; 3308 3309 string s; 3310 3311 assertCTFEable!( 3312 { 3313 foreach (S; AliasSeq!(char[], wchar[], dchar[], string, wstring, dstring)) 3314 { 3315 // @@@ BUG IN COMPILER, MUST INSERT CAST 3316 assert(chomp(cast(S) null) is null); 3317 assert(chomp(to!S("hello")) == "hello"); 3318 assert(chomp(to!S("hello\n")) == "hello"); 3319 assert(chomp(to!S("hello\r")) == "hello"); 3320 assert(chomp(to!S("hello\r\n")) == "hello"); 3321 assert(chomp(to!S("hello\n\r")) == "hello\n"); 3322 assert(chomp(to!S("hello\n\n")) == "hello\n"); 3323 assert(chomp(to!S("hello\r\r")) == "hello\r"); 3324 assert(chomp(to!S("hello\nxxx\n")) == "hello\nxxx"); 3325 assert(chomp(to!S("hello\u2028")) == "hello"); 3326 assert(chomp(to!S("hello\u2029")) == "hello"); 3327 assert(chomp(to!S("hello\u0085")) == "hello"); 3328 assert(chomp(to!S("hello\u2028\u2028")) == "hello\u2028"); 3329 assert(chomp(to!S("hello\u2029\u2029")) == "hello\u2029"); 3330 assert(chomp(to!S("hello\u2029\u2129")) == "hello\u2029\u2129"); 3331 assert(chomp(to!S("hello\u2029\u0185")) == "hello\u2029\u0185"); 3332 3333 foreach (T; AliasSeq!(char[], wchar[], dchar[], string, wstring, dstring)) 3334 (){ // avoid slow optimizations for large functions @@@BUG@@@ 2396 3335 // @@@ BUG IN COMPILER, MUST INSERT CAST 3336 assert(chomp(cast(S) null, cast(T) null) is null); 3337 assert(chomp(to!S("hello\n"), cast(T) null) == "hello"); 3338 assert(chomp(to!S("hello"), to!T("o")) == "hell"); 3339 assert(chomp(to!S("hello"), to!T("p")) == "hello"); 3340 // @@@ BUG IN COMPILER, MUST INSERT CAST 3341 assert(chomp(to!S("hello"), cast(T) null) == "hello"); 3342 assert(chomp(to!S("hello"), to!T("llo")) == "he"); 3343 assert(chomp(to!S("\uFF28ello"), to!T("llo")) == "\uFF28e"); 3344 assert(chomp(to!S("\uFF28el\uFF4co"), to!T("l\uFF4co")) == "\uFF28e"); 3345 }(); 3346 } 3347 }); 3348 3349 // Ranges 3350 import std.array : array; 3351 import std.utf : byChar, byWchar, byDchar; 3352 assert(chomp("hello world\r\n" .byChar ).array == "hello world"); 3353 assert(chomp("hello world\r\n"w.byWchar).array == "hello world"w); 3354 assert(chomp("hello world\r\n"d.byDchar).array == "hello world"d); 3355 3356 assert(chomp("hello world"d.byDchar, "ld").array == "hello wor"d); 3357 3358 assert(chomp("hello\u2020" .byChar , "\u2020").array == "hello"); 3359 assert(chomp("hello\u2020"d.byDchar, "\u2020"d).array == "hello"d); 3360} 3361 3362 3363/++ 3364 If $(D str) starts with $(D delimiter), then the part of $(D str) following 3365 $(D delimiter) is returned. If $(D str) does $(I not) start with 3366 3367 $(D delimiter), then it is returned unchanged. 3368 3369 Params: 3370 str = string or $(REF_ALTTEXT forward range, isForwardRange, std,range,primitives) 3371 of characters 3372 delimiter = string of characters to be sliced off front of str[] 3373 3374 Returns: 3375 slice of str 3376 +/ 3377Range chompPrefix(Range, C2)(Range str, const(C2)[] delimiter) 3378if ((isForwardRange!Range && isSomeChar!(ElementEncodingType!Range) || 3379 isNarrowString!Range) && 3380 !isConvertibleToString!Range && 3381 isSomeChar!C2) 3382{ 3383 alias C1 = ElementEncodingType!Range; 3384 3385 static if (is(Unqual!C1 == Unqual!C2) && (isSomeString!Range || (hasSlicing!Range && C2.sizeof == 4))) 3386 { 3387 import std.algorithm.searching : startsWith; 3388 if (str.startsWith(delimiter)) 3389 return str[delimiter.length .. $]; 3390 return str; 3391 } 3392 else 3393 { 3394 auto orig = str.save; 3395 3396 static if (isSomeString!Range) 3397 alias C = dchar; // because strings auto-decode 3398 else 3399 alias C = C1; // and ranges do not 3400 3401 foreach (C c; delimiter) 3402 { 3403 if (str.empty || str.front != c) 3404 return orig; 3405 3406 str.popFront(); 3407 } 3408 3409 return str; 3410 } 3411} 3412 3413/// 3414@safe pure unittest 3415{ 3416 assert(chompPrefix("hello world", "he") == "llo world"); 3417 assert(chompPrefix("hello world", "hello w") == "orld"); 3418 assert(chompPrefix("hello world", " world") == "hello world"); 3419 assert(chompPrefix("", "hello") == ""); 3420} 3421 3422StringTypeOf!Range chompPrefix(Range, C2)(auto ref Range str, const(C2)[] delimiter) 3423if (isConvertibleToString!Range) 3424{ 3425 return chompPrefix!(StringTypeOf!Range, C2)(str, delimiter); 3426} 3427 3428@safe pure 3429unittest 3430{ 3431 import std.algorithm.comparison : equal; 3432 import std.conv : to; 3433 import std.exception : assertCTFEable; 3434 assertCTFEable!( 3435 { 3436 foreach (S; AliasSeq!(char[], wchar[], dchar[], string, wstring, dstring)) 3437 { 3438 foreach (T; AliasSeq!(char[], wchar[], dchar[], string, wstring, dstring)) 3439 (){ // avoid slow optimizations for large functions @@@BUG@@@ 2396 3440 assert(equal(chompPrefix(to!S("abcdefgh"), to!T("abcde")), "fgh")); 3441 assert(equal(chompPrefix(to!S("abcde"), to!T("abcdefgh")), "abcde")); 3442 assert(equal(chompPrefix(to!S("\uFF28el\uFF4co"), to!T("\uFF28el\uFF4co")), "")); 3443 assert(equal(chompPrefix(to!S("\uFF28el\uFF4co"), to!T("\uFF28el")), "\uFF4co")); 3444 assert(equal(chompPrefix(to!S("\uFF28el"), to!T("\uFF28el\uFF4co")), "\uFF28el")); 3445 }(); 3446 } 3447 }); 3448 3449 // Ranges 3450 import std.array : array; 3451 import std.utf : byChar, byWchar, byDchar; 3452 assert(chompPrefix("hello world" .byChar , "hello"d).array == " world"); 3453 assert(chompPrefix("hello world"w.byWchar, "hello" ).array == " world"w); 3454 assert(chompPrefix("hello world"d.byDchar, "hello"w).array == " world"d); 3455 assert(chompPrefix("hello world"c.byDchar, "hello"w).array == " world"d); 3456 3457 assert(chompPrefix("hello world"d.byDchar, "lx").array == "hello world"d); 3458 assert(chompPrefix("hello world"d.byDchar, "hello world xx").array == "hello world"d); 3459 3460 assert(chompPrefix("\u2020world" .byChar , "\u2020").array == "world"); 3461 assert(chompPrefix("\u2020world"d.byDchar, "\u2020"d).array == "world"d); 3462} 3463 3464@safe pure unittest 3465{ 3466 assert(testAliasedString!chompPrefix("hello world", "hello")); 3467} 3468 3469/++ 3470 Returns $(D str) without its last character, if there is one. If $(D str) 3471 ends with $(D "\r\n"), then both are removed. If $(D str) is empty, then 3472 then it is returned unchanged. 3473 3474 Params: 3475 str = string (must be valid UTF) 3476 Returns: 3477 slice of str 3478 +/ 3479 3480Range chop(Range)(Range str) 3481if ((isBidirectionalRange!Range && isSomeChar!(ElementEncodingType!Range) || 3482 isNarrowString!Range) && 3483 !isConvertibleToString!Range) 3484{ 3485 if (str.empty) 3486 return str; 3487 3488 static if (isSomeString!Range) 3489 { 3490 if (str.length >= 2 && str[$ - 1] == '\n' && str[$ - 2] == '\r') 3491 return str[0 .. $ - 2]; 3492 str.popBack(); 3493 return str; 3494 } 3495 else 3496 { 3497 alias C = Unqual!(ElementEncodingType!Range); 3498 C c = str.back; 3499 str.popBack(); 3500 if (c == '\n') 3501 { 3502 if (!str.empty && str.back == '\r') 3503 str.popBack(); 3504 return str; 3505 } 3506 // Pop back a dchar, not just a code unit 3507 static if (C.sizeof == 1) 3508 { 3509 int cnt = 1; 3510 while ((c & 0xC0) == 0x80) 3511 { 3512 if (str.empty) 3513 break; 3514 c = str.back; 3515 str.popBack(); 3516 if (++cnt > 4) 3517 break; 3518 } 3519 } 3520 else static if (C.sizeof == 2) 3521 { 3522 if (c >= 0xD800 && c <= 0xDBFF) 3523 { 3524 if (!str.empty) 3525 str.popBack(); 3526 } 3527 } 3528 else static if (C.sizeof == 4) 3529 { 3530 } 3531 else 3532 static assert(0); 3533 return str; 3534 } 3535} 3536 3537/// 3538@safe pure unittest 3539{ 3540 assert(chop("hello world") == "hello worl"); 3541 assert(chop("hello world\n") == "hello world"); 3542 assert(chop("hello world\r") == "hello world"); 3543 assert(chop("hello world\n\r") == "hello world\n"); 3544 assert(chop("hello world\r\n") == "hello world"); 3545 assert(chop("Walter Bright") == "Walter Brigh"); 3546 assert(chop("") == ""); 3547} 3548 3549StringTypeOf!Range chop(Range)(auto ref Range str) 3550if (isConvertibleToString!Range) 3551{ 3552 return chop!(StringTypeOf!Range)(str); 3553} 3554 3555@safe pure unittest 3556{ 3557 assert(testAliasedString!chop("hello world")); 3558} 3559 3560@safe pure unittest 3561{ 3562 import std.array : array; 3563 import std.utf : byChar, byWchar, byDchar, byCodeUnit, invalidUTFstrings; 3564 3565 assert(chop("hello world".byChar).array == "hello worl"); 3566 assert(chop("hello world\n"w.byWchar).array == "hello world"w); 3567 assert(chop("hello world\r"d.byDchar).array == "hello world"d); 3568 assert(chop("hello world\n\r".byChar).array == "hello world\n"); 3569 assert(chop("hello world\r\n"w.byWchar).array == "hello world"w); 3570 assert(chop("Walter Bright"d.byDchar).array == "Walter Brigh"d); 3571 assert(chop("".byChar).array == ""); 3572 3573 assert(chop(`������������������������` .byCodeUnit).array == "���������������������"); 3574 assert(chop(`������������������������`w.byCodeUnit).array == "���������������������"w); 3575 assert(chop(`������������������������`d.byCodeUnit).array == "���������������������"d); 3576 3577 auto ca = invalidUTFstrings!char(); 3578 foreach (s; ca) 3579 { 3580 foreach (c; chop(s.byCodeUnit)) 3581 { 3582 } 3583 } 3584 3585 auto wa = invalidUTFstrings!wchar(); 3586 foreach (s; wa) 3587 { 3588 foreach (c; chop(s.byCodeUnit)) 3589 { 3590 } 3591 } 3592} 3593 3594@safe pure unittest 3595{ 3596 import std.algorithm.comparison : equal; 3597 import std.conv : to; 3598 import std.exception : assertCTFEable; 3599 3600 assertCTFEable!( 3601 { 3602 foreach (S; AliasSeq!(char[], wchar[], dchar[], string, wstring, dstring)) 3603 { 3604 assert(chop(cast(S) null) is null); 3605 assert(equal(chop(to!S("hello")), "hell")); 3606 assert(equal(chop(to!S("hello\r\n")), "hello")); 3607 assert(equal(chop(to!S("hello\n\r")), "hello\n")); 3608 assert(equal(chop(to!S("Verit��")), "Verit")); 3609 assert(equal(chop(to!S(`������������������`)), "���������������")); 3610 assert(equal(chop(to!S(`������������������������`)), "���������������������")); 3611 } 3612 }); 3613} 3614 3615 3616/++ 3617 Left justify $(D s) in a field $(D width) characters wide. $(D fillChar) 3618 is the character that will be used to fill up the space in the field that 3619 $(D s) doesn't fill. 3620 3621 Params: 3622 s = string 3623 width = minimum field width 3624 fillChar = used to pad end up to $(D width) characters 3625 3626 Returns: 3627 GC allocated string 3628 3629 See_Also: 3630 $(LREF leftJustifier), which does not allocate 3631 +/ 3632S leftJustify(S)(S s, size_t width, dchar fillChar = ' ') 3633if (isSomeString!S) 3634{ 3635 import std.array : array; 3636 return leftJustifier(s, width, fillChar).array; 3637} 3638 3639/// 3640@safe pure unittest 3641{ 3642 assert(leftJustify("hello", 7, 'X') == "helloXX"); 3643 assert(leftJustify("hello", 2, 'X') == "hello"); 3644 assert(leftJustify("hello", 9, 'X') == "helloXXXX"); 3645} 3646 3647/++ 3648 Left justify $(D s) in a field $(D width) characters wide. $(D fillChar) 3649 is the character that will be used to fill up the space in the field that 3650 $(D s) doesn't fill. 3651 3652 Params: 3653 r = string or range of characters 3654 width = minimum field width 3655 fillChar = used to pad end up to $(D width) characters 3656 3657 Returns: 3658 a lazy range of the left justified result 3659 3660 See_Also: 3661 $(LREF rightJustifier) 3662 +/ 3663 3664auto leftJustifier(Range)(Range r, size_t width, dchar fillChar = ' ') 3665if (isInputRange!Range && isSomeChar!(ElementEncodingType!Range) && 3666 !isConvertibleToString!Range) 3667{ 3668 alias C = Unqual!(ElementEncodingType!Range); 3669 3670 static if (C.sizeof == 1) 3671 { 3672 import std.utf : byDchar, byChar; 3673 return leftJustifier(r.byDchar, width, fillChar).byChar; 3674 } 3675 else static if (C.sizeof == 2) 3676 { 3677 import std.utf : byDchar, byWchar; 3678 return leftJustifier(r.byDchar, width, fillChar).byWchar; 3679 } 3680 else static if (C.sizeof == 4) 3681 { 3682 static struct Result 3683 { 3684 private: 3685 Range _input; 3686 size_t _width; 3687 dchar _fillChar; 3688 size_t len; 3689 3690 public: 3691 3692 @property bool empty() 3693 { 3694 return len >= _width && _input.empty; 3695 } 3696 3697 @property C front() 3698 { 3699 return _input.empty ? _fillChar : _input.front; 3700 } 3701 3702 void popFront() 3703 { 3704 ++len; 3705 if (!_input.empty) 3706 _input.popFront(); 3707 } 3708 3709 static if (isForwardRange!Range) 3710 { 3711 @property typeof(this) save() return scope 3712 { 3713 auto ret = this; 3714 ret._input = _input.save; 3715 return ret; 3716 } 3717 } 3718 } 3719 3720 return Result(r, width, fillChar); 3721 } 3722 else 3723 static assert(0); 3724} 3725 3726/// 3727@safe pure @nogc nothrow 3728unittest 3729{ 3730 import std.algorithm.comparison : equal; 3731 import std.utf : byChar; 3732 assert(leftJustifier("hello", 2).equal("hello".byChar)); 3733 assert(leftJustifier("hello", 7).equal("hello ".byChar)); 3734 assert(leftJustifier("hello", 7, 'x').equal("helloxx".byChar)); 3735} 3736 3737auto leftJustifier(Range)(auto ref Range r, size_t width, dchar fillChar = ' ') 3738if (isConvertibleToString!Range) 3739{ 3740 return leftJustifier!(StringTypeOf!Range)(r, width, fillChar); 3741} 3742 3743@safe pure unittest 3744{ 3745 auto r = "hello".leftJustifier(8); 3746 r.popFront(); 3747 auto save = r.save; 3748 r.popFront(); 3749 assert(r.front == 'l'); 3750 assert(save.front == 'e'); 3751} 3752 3753@safe pure unittest 3754{ 3755 assert(testAliasedString!leftJustifier("hello", 2)); 3756} 3757 3758/++ 3759 Right justify $(D s) in a field $(D width) characters wide. $(D fillChar) 3760 is the character that will be used to fill up the space in the field that 3761 $(D s) doesn't fill. 3762 3763 Params: 3764 s = string 3765 width = minimum field width 3766 fillChar = used to pad end up to $(D width) characters 3767 3768 Returns: 3769 GC allocated string 3770 3771 See_Also: 3772 $(LREF rightJustifier), which does not allocate 3773 +/ 3774S rightJustify(S)(S s, size_t width, dchar fillChar = ' ') 3775if (isSomeString!S) 3776{ 3777 import std.array : array; 3778 return rightJustifier(s, width, fillChar).array; 3779} 3780 3781/// 3782@safe pure unittest 3783{ 3784 assert(rightJustify("hello", 7, 'X') == "XXhello"); 3785 assert(rightJustify("hello", 2, 'X') == "hello"); 3786 assert(rightJustify("hello", 9, 'X') == "XXXXhello"); 3787} 3788 3789/++ 3790 Right justify $(D s) in a field $(D width) characters wide. $(D fillChar) 3791 is the character that will be used to fill up the space in the field that 3792 $(D s) doesn't fill. 3793 3794 Params: 3795 r = string or $(REF_ALTTEXT forward range, isForwardRange, std,range,primitives) 3796 of characters 3797 width = minimum field width 3798 fillChar = used to pad end up to $(D width) characters 3799 3800 Returns: 3801 a lazy range of the right justified result 3802 3803 See_Also: 3804 $(LREF leftJustifier) 3805 +/ 3806 3807auto rightJustifier(Range)(Range r, size_t width, dchar fillChar = ' ') 3808if (isForwardRange!Range && isSomeChar!(ElementEncodingType!Range) && 3809 !isConvertibleToString!Range) 3810{ 3811 alias C = Unqual!(ElementEncodingType!Range); 3812 3813 static if (C.sizeof == 1) 3814 { 3815 import std.utf : byDchar, byChar; 3816 return rightJustifier(r.byDchar, width, fillChar).byChar; 3817 } 3818 else static if (C.sizeof == 2) 3819 { 3820 import std.utf : byDchar, byWchar; 3821 return rightJustifier(r.byDchar, width, fillChar).byWchar; 3822 } 3823 else static if (C.sizeof == 4) 3824 { 3825 static struct Result 3826 { 3827 private: 3828 Range _input; 3829 size_t _width; 3830 alias nfill = _width; // number of fill characters to prepend 3831 dchar _fillChar; 3832 bool inited; 3833 3834 // Lazy initialization so constructor is trivial and cannot fail 3835 void initialize() 3836 { 3837 // Replace _width with nfill 3838 // (use alias instead of union because CTFE cannot deal with unions) 3839 assert(_width); 3840 static if (hasLength!Range) 3841 { 3842 immutable len = _input.length; 3843 nfill = (_width > len) ? _width - len : 0; 3844 } 3845 else 3846 { 3847 // Lookahead to see now many fill characters are needed 3848 import std.range : take; 3849 import std.range.primitives : walkLength; 3850 nfill = _width - walkLength(_input.save.take(_width), _width); 3851 } 3852 inited = true; 3853 } 3854 3855 public: 3856 this(Range input, size_t width, dchar fillChar) pure nothrow 3857 { 3858 _input = input; 3859 _fillChar = fillChar; 3860 _width = width; 3861 } 3862 3863 @property bool empty() 3864 { 3865 return !nfill && _input.empty; 3866 } 3867 3868 @property C front() 3869 { 3870 if (!nfill) 3871 return _input.front; // fast path 3872 if (!inited) 3873 initialize(); 3874 return nfill ? _fillChar : _input.front; 3875 } 3876 3877 void popFront() 3878 { 3879 if (!nfill) 3880 _input.popFront(); // fast path 3881 else 3882 { 3883 if (!inited) 3884 initialize(); 3885 if (nfill) 3886 --nfill; 3887 else 3888 _input.popFront(); 3889 } 3890 } 3891 3892 @property typeof(this) save() 3893 { 3894 auto ret = this; 3895 ret._input = _input.save; 3896 return ret; 3897 } 3898 } 3899 3900 return Result(r, width, fillChar); 3901 } 3902 else 3903 static assert(0); 3904} 3905 3906/// 3907@safe pure @nogc nothrow 3908unittest 3909{ 3910 import std.algorithm.comparison : equal; 3911 import std.utf : byChar; 3912 assert(rightJustifier("hello", 2).equal("hello".byChar)); 3913 assert(rightJustifier("hello", 7).equal(" hello".byChar)); 3914 assert(rightJustifier("hello", 7, 'x').equal("xxhello".byChar)); 3915} 3916 3917auto rightJustifier(Range)(auto ref Range r, size_t width, dchar fillChar = ' ') 3918if (isConvertibleToString!Range) 3919{ 3920 return rightJustifier!(StringTypeOf!Range)(r, width, fillChar); 3921} 3922 3923@safe pure unittest 3924{ 3925 assert(testAliasedString!rightJustifier("hello", 2)); 3926} 3927 3928@safe pure unittest 3929{ 3930 auto r = "hello"d.rightJustifier(6); 3931 r.popFront(); 3932 auto save = r.save; 3933 r.popFront(); 3934 assert(r.front == 'e'); 3935 assert(save.front == 'h'); 3936 3937 auto t = "hello".rightJustifier(7); 3938 t.popFront(); 3939 assert(t.front == ' '); 3940 t.popFront(); 3941 assert(t.front == 'h'); 3942 3943 auto u = "hello"d.rightJustifier(5); 3944 u.popFront(); 3945 u.popFront(); 3946 u.popFront(); 3947} 3948 3949/++ 3950 Center $(D s) in a field $(D width) characters wide. $(D fillChar) 3951 is the character that will be used to fill up the space in the field that 3952 $(D s) doesn't fill. 3953 3954 Params: 3955 s = The string to center 3956 width = Width of the field to center `s` in 3957 fillChar = The character to use for filling excess space in the field 3958 3959 Returns: 3960 The resulting _center-justified string. The returned string is 3961 GC-allocated. To avoid GC allocation, use $(LREF centerJustifier) 3962 instead. 3963 +/ 3964S center(S)(S s, size_t width, dchar fillChar = ' ') 3965if (isSomeString!S) 3966{ 3967 import std.array : array; 3968 return centerJustifier(s, width, fillChar).array; 3969} 3970 3971/// 3972@safe pure unittest 3973{ 3974 assert(center("hello", 7, 'X') == "XhelloX"); 3975 assert(center("hello", 2, 'X') == "hello"); 3976 assert(center("hello", 9, 'X') == "XXhelloXX"); 3977} 3978 3979@safe pure 3980unittest 3981{ 3982 import std.conv : to; 3983 import std.exception : assertCTFEable; 3984 3985 assertCTFEable!( 3986 { 3987 foreach (S; AliasSeq!(char[], wchar[], dchar[], string, wstring, dstring)) 3988 { 3989 S s = to!S("hello"); 3990 3991 assert(leftJustify(s, 2) == "hello"); 3992 assert(rightJustify(s, 2) == "hello"); 3993 assert(center(s, 2) == "hello"); 3994 3995 assert(leftJustify(s, 7) == "hello "); 3996 assert(rightJustify(s, 7) == " hello"); 3997 assert(center(s, 7) == " hello "); 3998 3999 assert(leftJustify(s, 8) == "hello "); 4000 assert(rightJustify(s, 8) == " hello"); 4001 assert(center(s, 8) == " hello "); 4002 4003 assert(leftJustify(s, 8, '\u0100') == "hello\u0100\u0100\u0100"); 4004 assert(rightJustify(s, 8, '\u0100') == "\u0100\u0100\u0100hello"); 4005 assert(center(s, 8, '\u0100') == "\u0100hello\u0100\u0100"); 4006 4007 assert(leftJustify(s, 8, '��') == "hello������"); 4008 assert(rightJustify(s, 8, '��') == "������hello"); 4009 assert(center(s, 8, '��') == "��hello����"); 4010 } 4011 }); 4012} 4013 4014/++ 4015 Center justify $(D r) in a field $(D width) characters wide. $(D fillChar) 4016 is the character that will be used to fill up the space in the field that 4017 $(D r) doesn't fill. 4018 4019 Params: 4020 r = string or $(REF_ALTTEXT forward range, isForwardRange, std,range,primitives) 4021 of characters 4022 width = minimum field width 4023 fillChar = used to pad end up to $(D width) characters 4024 4025 Returns: 4026 a lazy range of the center justified result 4027 4028 See_Also: 4029 $(LREF leftJustifier) 4030 $(LREF rightJustifier) 4031 +/ 4032 4033auto centerJustifier(Range)(Range r, size_t width, dchar fillChar = ' ') 4034if (isForwardRange!Range && isSomeChar!(ElementEncodingType!Range) && 4035 !isConvertibleToString!Range) 4036{ 4037 alias C = Unqual!(ElementEncodingType!Range); 4038 4039 static if (C.sizeof == 1) 4040 { 4041 import std.utf : byDchar, byChar; 4042 return centerJustifier(r.byDchar, width, fillChar).byChar; 4043 } 4044 else static if (C.sizeof == 2) 4045 { 4046 import std.utf : byDchar, byWchar; 4047 return centerJustifier(r.byDchar, width, fillChar).byWchar; 4048 } 4049 else static if (C.sizeof == 4) 4050 { 4051 import std.range : chain, repeat; 4052 import std.range.primitives : walkLength; 4053 4054 auto len = walkLength(r.save, width); 4055 if (len > width) 4056 len = width; 4057 const nleft = (width - len) / 2; 4058 const nright = width - len - nleft; 4059 return chain(repeat(fillChar, nleft), r, repeat(fillChar, nright)); 4060 } 4061 else 4062 static assert(0); 4063} 4064 4065/// 4066@safe pure @nogc nothrow 4067unittest 4068{ 4069 import std.algorithm.comparison : equal; 4070 import std.utf : byChar; 4071 assert(centerJustifier("hello", 2).equal("hello".byChar)); 4072 assert(centerJustifier("hello", 8).equal(" hello ".byChar)); 4073 assert(centerJustifier("hello", 7, 'x').equal("xhellox".byChar)); 4074} 4075 4076auto centerJustifier(Range)(auto ref Range r, size_t width, dchar fillChar = ' ') 4077if (isConvertibleToString!Range) 4078{ 4079 return centerJustifier!(StringTypeOf!Range)(r, width, fillChar); 4080} 4081 4082@safe pure unittest 4083{ 4084 assert(testAliasedString!centerJustifier("hello", 8)); 4085} 4086 4087@system unittest 4088{ 4089 static auto byFwdRange(dstring s) 4090 { 4091 static struct FRange 4092 { 4093 dstring str; 4094 this(dstring s) { str = s; } 4095 @property bool empty() { return str.length == 0; } 4096 @property dchar front() { return str[0]; } 4097 void popFront() { str = str[1 .. $]; } 4098 @property FRange save() { return this; } 4099 } 4100 return FRange(s); 4101 } 4102 4103 auto r = centerJustifier(byFwdRange("hello"d), 6); 4104 r.popFront(); 4105 auto save = r.save; 4106 r.popFront(); 4107 assert(r.front == 'l'); 4108 assert(save.front == 'e'); 4109 4110 auto t = "hello".centerJustifier(7); 4111 t.popFront(); 4112 assert(t.front == 'h'); 4113 t.popFront(); 4114 assert(t.front == 'e'); 4115 4116 auto u = byFwdRange("hello"d).centerJustifier(6); 4117 u.popFront(); 4118 u.popFront(); 4119 u.popFront(); 4120 u.popFront(); 4121 u.popFront(); 4122 u.popFront(); 4123} 4124 4125 4126/++ 4127 Replace each tab character in $(D s) with the number of spaces necessary 4128 to align the following character at the next tab stop. 4129 4130 Params: 4131 s = string 4132 tabSize = distance between tab stops 4133 4134 Returns: 4135 GC allocated string with tabs replaced with spaces 4136 +/ 4137auto detab(Range)(auto ref Range s, size_t tabSize = 8) pure 4138if ((isForwardRange!Range && isSomeChar!(ElementEncodingType!Range)) 4139 || __traits(compiles, StringTypeOf!Range)) 4140{ 4141 import std.array : array; 4142 return detabber(s, tabSize).array; 4143} 4144 4145/// 4146@system pure unittest 4147{ 4148 assert(detab(" \n\tx", 9) == " \n x"); 4149} 4150 4151@safe pure unittest 4152{ 4153 static struct TestStruct 4154 { 4155 string s; 4156 alias s this; 4157 } 4158 4159 static struct TestStruct2 4160 { 4161 string s; 4162 alias s this; 4163 @disable this(this); 4164 } 4165 4166 string s = " \n\tx"; 4167 string cmp = " \n x"; 4168 auto t = TestStruct(s); 4169 assert(detab(t, 9) == cmp); 4170 assert(detab(TestStruct(s), 9) == cmp); 4171 assert(detab(TestStruct(s), 9) == detab(TestStruct(s), 9)); 4172 assert(detab(TestStruct2(s), 9) == detab(TestStruct2(s), 9)); 4173 assert(detab(TestStruct2(s), 9) == cmp); 4174} 4175 4176/++ 4177 Replace each tab character in $(D r) with the number of spaces 4178 necessary to align the following character at the next tab stop. 4179 4180 Params: 4181 r = string or $(REF_ALTTEXT forward range, isForwardRange, std,range,primitives) 4182 tabSize = distance between tab stops 4183 4184 Returns: 4185 lazy forward range with tabs replaced with spaces 4186 +/ 4187auto detabber(Range)(Range r, size_t tabSize = 8) 4188if (isForwardRange!Range && isSomeChar!(ElementEncodingType!Range) && 4189 !isConvertibleToString!Range) 4190{ 4191 import std.uni : lineSep, paraSep, nelSep; 4192 import std.utf : codeUnitLimit, decodeFront; 4193 4194 assert(tabSize > 0); 4195 4196 alias C = Unqual!(ElementEncodingType!(Range)); 4197 4198 static struct Result 4199 { 4200 private: 4201 Range _input; 4202 size_t _tabSize; 4203 size_t nspaces; 4204 int column; 4205 size_t index; 4206 4207 public: 4208 4209 this(Range input, size_t tabSize) 4210 { 4211 _input = input; 4212 _tabSize = tabSize; 4213 } 4214 4215 static if (isInfinite!(Range)) 4216 { 4217 enum bool empty = false; 4218 } 4219 else 4220 { 4221 @property bool empty() 4222 { 4223 return _input.empty && nspaces == 0; 4224 } 4225 } 4226 4227 @property C front() 4228 { 4229 if (nspaces) 4230 return ' '; 4231 static if (isSomeString!(Range)) 4232 C c = _input[0]; 4233 else 4234 C c = _input.front; 4235 if (index) 4236 return c; 4237 dchar dc; 4238 if (c < codeUnitLimit!(immutable(C)[])) 4239 { 4240 dc = c; 4241 index = 1; 4242 } 4243 else 4244 { 4245 auto r = _input.save; 4246 dc = decodeFront(r, index); // lookahead to decode 4247 } 4248 switch (dc) 4249 { 4250 case '\r': 4251 case '\n': 4252 case paraSep: 4253 case lineSep: 4254 case nelSep: 4255 column = 0; 4256 break; 4257 4258 case '\t': 4259 nspaces = _tabSize - (column % _tabSize); 4260 column += nspaces; 4261 c = ' '; 4262 break; 4263 4264 default: 4265 ++column; 4266 break; 4267 } 4268 return c; 4269 } 4270 4271 void popFront() 4272 { 4273 if (!index) 4274 front; 4275 if (nspaces) 4276 --nspaces; 4277 if (!nspaces) 4278 { 4279 static if (isSomeString!(Range)) 4280 _input = _input[1 .. $]; 4281 else 4282 _input.popFront(); 4283 --index; 4284 } 4285 } 4286 4287 @property typeof(this) save() 4288 { 4289 auto ret = this; 4290 ret._input = _input.save; 4291 return ret; 4292 } 4293 } 4294 4295 return Result(r, tabSize); 4296} 4297 4298/// 4299@system pure unittest 4300{ 4301 import std.array : array; 4302 4303 assert(detabber(" \n\tx", 9).array == " \n x"); 4304} 4305 4306auto detabber(Range)(auto ref Range r, size_t tabSize = 8) 4307if (isConvertibleToString!Range) 4308{ 4309 return detabber!(StringTypeOf!Range)(r, tabSize); 4310} 4311 4312@safe pure unittest 4313{ 4314 assert(testAliasedString!detabber( " ab\t asdf ", 8)); 4315} 4316 4317@system pure unittest 4318{ 4319 import std.algorithm.comparison : cmp; 4320 import std.conv : to; 4321 import std.exception : assertCTFEable; 4322 4323 assertCTFEable!( 4324 { 4325 foreach (S; AliasSeq!(char[], wchar[], dchar[], string, wstring, dstring)) 4326 { 4327 S s = to!S("This \tis\t a fofof\tof list"); 4328 assert(cmp(detab(s), "This is a fofof of list") == 0); 4329 4330 assert(detab(cast(S) null) is null); 4331 assert(detab("").empty); 4332 assert(detab("a") == "a"); 4333 assert(detab("\t") == " "); 4334 assert(detab("\t", 3) == " "); 4335 assert(detab("\t", 9) == " "); 4336 assert(detab( " ab\t asdf ") == " ab asdf "); 4337 assert(detab( " \U00010000b\tasdf ") == " \U00010000b asdf "); 4338 assert(detab("\r\t", 9) == "\r "); 4339 assert(detab("\n\t", 9) == "\n "); 4340 assert(detab("\u0085\t", 9) == "\u0085 "); 4341 assert(detab("\u2028\t", 9) == "\u2028 "); 4342 assert(detab(" \u2029\t", 9) == " \u2029 "); 4343 } 4344 }); 4345} 4346 4347/// 4348@system pure unittest 4349{ 4350 import std.array : array; 4351 import std.utf : byChar, byWchar; 4352 4353 assert(detabber(" \u2029\t".byChar, 9).array == " \u2029 "); 4354 auto r = "hel\tx".byWchar.detabber(); 4355 assert(r.front == 'h'); 4356 auto s = r.save; 4357 r.popFront(); 4358 r.popFront(); 4359 assert(r.front == 'l'); 4360 assert(s.front == 'h'); 4361} 4362 4363/++ 4364 Replaces spaces in $(D s) with the optimal number of tabs. 4365 All spaces and tabs at the end of a line are removed. 4366 4367 Params: 4368 s = String to convert. 4369 tabSize = Tab columns are $(D tabSize) spaces apart. 4370 4371 Returns: 4372 GC allocated string with spaces replaced with tabs; 4373 use $(LREF entabber) to not allocate. 4374 4375 See_Also: 4376 $(LREF entabber) 4377 +/ 4378auto entab(Range)(Range s, size_t tabSize = 8) 4379if (isForwardRange!Range && isSomeChar!(ElementEncodingType!Range)) 4380{ 4381 import std.array : array; 4382 return entabber(s, tabSize).array; 4383} 4384 4385/// 4386@safe pure unittest 4387{ 4388 assert(entab(" x \n") == "\tx\n"); 4389} 4390 4391auto entab(Range)(auto ref Range s, size_t tabSize = 8) 4392if (!(isForwardRange!Range && isSomeChar!(ElementEncodingType!Range)) && 4393 is(StringTypeOf!Range)) 4394{ 4395 return entab!(StringTypeOf!Range)(s, tabSize); 4396} 4397 4398@safe pure unittest 4399{ 4400 assert(testAliasedString!entab(" x \n")); 4401} 4402 4403/++ 4404 Replaces spaces in range $(D r) with the optimal number of tabs. 4405 All spaces and tabs at the end of a line are removed. 4406 4407 Params: 4408 r = string or $(REF_ALTTEXT forward range, isForwardRange, std,range,primitives) 4409 tabSize = distance between tab stops 4410 4411 Returns: 4412 lazy forward range with spaces replaced with tabs 4413 4414 See_Also: 4415 $(LREF entab) 4416 +/ 4417auto entabber(Range)(Range r, size_t tabSize = 8) 4418if (isForwardRange!Range && !isConvertibleToString!Range) 4419{ 4420 import std.uni : lineSep, paraSep, nelSep; 4421 import std.utf : codeUnitLimit, decodeFront; 4422 4423 assert(tabSize > 0); 4424 alias C = Unqual!(ElementEncodingType!Range); 4425 4426 static struct Result 4427 { 4428 private: 4429 Range _input; 4430 size_t _tabSize; 4431 size_t nspaces; 4432 size_t ntabs; 4433 int column; 4434 size_t index; 4435 4436 @property C getFront() 4437 { 4438 static if (isSomeString!Range) 4439 return _input[0]; // avoid autodecode 4440 else 4441 return _input.front; 4442 } 4443 4444 public: 4445 4446 this(Range input, size_t tabSize) 4447 { 4448 _input = input; 4449 _tabSize = tabSize; 4450 } 4451 4452 @property bool empty() 4453 { 4454 if (ntabs || nspaces) 4455 return false; 4456 4457 /* Since trailing spaces are removed, 4458 * look ahead for anything that is not a trailing space 4459 */ 4460 static if (isSomeString!Range) 4461 { 4462 foreach (c; _input) 4463 { 4464 if (c != ' ' && c != '\t') 4465 return false; 4466 } 4467 return true; 4468 } 4469 else 4470 { 4471 if (_input.empty) 4472 return true; 4473 immutable c = _input.front; 4474 if (c != ' ' && c != '\t') 4475 return false; 4476 auto t = _input.save; 4477 t.popFront(); 4478 foreach (c2; t) 4479 { 4480 if (c2 != ' ' && c2 != '\t') 4481 return false; 4482 } 4483 return true; 4484 } 4485 } 4486 4487 @property C front() 4488 { 4489 //writefln(" front(): ntabs = %s nspaces = %s index = %s front = '%s'", ntabs, nspaces, index, getFront); 4490 if (ntabs) 4491 return '\t'; 4492 if (nspaces) 4493 return ' '; 4494 C c = getFront; 4495 if (index) 4496 return c; 4497 dchar dc; 4498 if (c < codeUnitLimit!(immutable(C)[])) 4499 { 4500 index = 1; 4501 dc = c; 4502 if (c == ' ' || c == '\t') 4503 { 4504 // Consume input until a non-blank is encountered 4505 immutable startcol = column; 4506 C cx; 4507 static if (isSomeString!Range) 4508 { 4509 while (1) 4510 { 4511 assert(_input.length); 4512 cx = _input[0]; 4513 if (cx == ' ') 4514 ++column; 4515 else if (cx == '\t') 4516 column += _tabSize - (column % _tabSize); 4517 else 4518 break; 4519 _input = _input[1 .. $]; 4520 } 4521 } 4522 else 4523 { 4524 while (1) 4525 { 4526 assert(!_input.empty); 4527 cx = _input.front; 4528 if (cx == ' ') 4529 ++column; 4530 else if (cx == '\t') 4531 column += _tabSize - (column % _tabSize); 4532 else 4533 break; 4534 _input.popFront(); 4535 } 4536 } 4537 // Compute ntabs+nspaces to get from startcol to column 4538 immutable n = column - startcol; 4539 if (n == 1) 4540 { 4541 nspaces = 1; 4542 } 4543 else 4544 { 4545 ntabs = column / _tabSize - startcol / _tabSize; 4546 if (ntabs == 0) 4547 nspaces = column - startcol; 4548 else 4549 nspaces = column % _tabSize; 4550 } 4551 //writefln("\tstartcol = %s, column = %s, _tabSize = %s", startcol, column, _tabSize); 4552 //writefln("\tntabs = %s, nspaces = %s", ntabs, nspaces); 4553 if (cx < codeUnitLimit!(immutable(C)[])) 4554 { 4555 dc = cx; 4556 index = 1; 4557 } 4558 else 4559 { 4560 auto r = _input.save; 4561 dc = decodeFront(r, index); // lookahead to decode 4562 } 4563 switch (dc) 4564 { 4565 case '\r': 4566 case '\n': 4567 case paraSep: 4568 case lineSep: 4569 case nelSep: 4570 column = 0; 4571 // Spaces followed by newline are ignored 4572 ntabs = 0; 4573 nspaces = 0; 4574 return cx; 4575 4576 default: 4577 ++column; 4578 break; 4579 } 4580 return ntabs ? '\t' : ' '; 4581 } 4582 } 4583 else 4584 { 4585 auto r = _input.save; 4586 dc = decodeFront(r, index); // lookahead to decode 4587 } 4588 //writefln("dc = x%x", dc); 4589 switch (dc) 4590 { 4591 case '\r': 4592 case '\n': 4593 case paraSep: 4594 case lineSep: 4595 case nelSep: 4596 column = 0; 4597 break; 4598 4599 default: 4600 ++column; 4601 break; 4602 } 4603 return c; 4604 } 4605 4606 void popFront() 4607 { 4608 //writefln("popFront(): ntabs = %s nspaces = %s index = %s front = '%s'", ntabs, nspaces, index, getFront); 4609 if (!index) 4610 front; 4611 if (ntabs) 4612 --ntabs; 4613 else if (nspaces) 4614 --nspaces; 4615 else if (!ntabs && !nspaces) 4616 { 4617 static if (isSomeString!Range) 4618 _input = _input[1 .. $]; 4619 else 4620 _input.popFront(); 4621 --index; 4622 } 4623 } 4624 4625 @property typeof(this) save() 4626 { 4627 auto ret = this; 4628 ret._input = _input.save; 4629 return ret; 4630 } 4631 } 4632 4633 return Result(r, tabSize); 4634} 4635 4636/// 4637@safe pure unittest 4638{ 4639 import std.array : array; 4640 assert(entabber(" x \n").array == "\tx\n"); 4641} 4642 4643auto entabber(Range)(auto ref Range r, size_t tabSize = 8) 4644if (isConvertibleToString!Range) 4645{ 4646 return entabber!(StringTypeOf!Range)(r, tabSize); 4647} 4648 4649@safe pure unittest 4650{ 4651 assert(testAliasedString!entabber(" ab asdf ", 8)); 4652} 4653 4654@safe pure 4655unittest 4656{ 4657 import std.conv : to; 4658 import std.exception : assertCTFEable; 4659 4660 assertCTFEable!( 4661 { 4662 assert(entab(cast(string) null) is null); 4663 assert(entab("").empty); 4664 assert(entab("a") == "a"); 4665 assert(entab(" ") == ""); 4666 assert(entab(" x") == "\tx"); 4667 assert(entab(" ab asdf ") == " ab\tasdf"); 4668 assert(entab(" ab asdf ") == " ab\t asdf"); 4669 assert(entab(" ab \t asdf ") == " ab\t asdf"); 4670 assert(entab("1234567 \ta") == "1234567\t\ta"); 4671 assert(entab("1234567 \ta") == "1234567\t\ta"); 4672 assert(entab("1234567 \ta") == "1234567\t\ta"); 4673 assert(entab("1234567 \ta") == "1234567\t\ta"); 4674 assert(entab("1234567 \ta") == "1234567\t\ta"); 4675 assert(entab("1234567 \ta") == "1234567\t\ta"); 4676 assert(entab("1234567 \ta") == "1234567\t\ta"); 4677 assert(entab("1234567 \ta") == "1234567\t\ta"); 4678 assert(entab("1234567 \ta") == "1234567\t\t\ta"); 4679 4680 assert(entab("a ") == "a"); 4681 assert(entab("a\v") == "a\v"); 4682 assert(entab("a\f") == "a\f"); 4683 assert(entab("a\n") == "a\n"); 4684 assert(entab("a\n\r") == "a\n\r"); 4685 assert(entab("a\r\n") == "a\r\n"); 4686 assert(entab("a\u2028") == "a\u2028"); 4687 assert(entab("a\u2029") == "a\u2029"); 4688 assert(entab("a\u0085") == "a\u0085"); 4689 assert(entab("a ") == "a"); 4690 assert(entab("a\t") == "a"); 4691 assert(entab("\uFF28\uFF45\uFF4C\uFF4C567 \t\uFF4F \t") == 4692 "\uFF28\uFF45\uFF4C\uFF4C567\t\t\uFF4F"); 4693 assert(entab(" \naa") == "\naa"); 4694 assert(entab(" \r aa") == "\r aa"); 4695 assert(entab(" \u2028 aa") == "\u2028 aa"); 4696 assert(entab(" \u2029 aa") == "\u2029 aa"); 4697 assert(entab(" \u0085 aa") == "\u0085 aa"); 4698 }); 4699} 4700 4701@safe pure 4702unittest 4703{ 4704 import std.array : array; 4705 import std.utf : byChar; 4706 assert(entabber(" \u0085 aa".byChar).array == "\u0085 aa"); 4707 assert(entabber(" \u2028\t aa \t".byChar).array == "\u2028\t aa"); 4708 4709 auto r = entabber("1234", 4); 4710 r.popFront(); 4711 auto rsave = r.save; 4712 r.popFront(); 4713 assert(r.front == '3'); 4714 assert(rsave.front == '2'); 4715} 4716 4717 4718/++ 4719 Replaces the characters in $(D str) which are keys in $(D transTable) with 4720 their corresponding values in $(D transTable). $(D transTable) is an AA 4721 where its keys are $(D dchar) and its values are either $(D dchar) or some 4722 type of string. Also, if $(D toRemove) is given, the characters in it are 4723 removed from $(D str) prior to translation. $(D str) itself is unaltered. 4724 A copy with the changes is returned. 4725 4726 See_Also: 4727 $(LREF tr) 4728 $(REF replace, std,array) 4729 4730 Params: 4731 str = The original string. 4732 transTable = The AA indicating which characters to replace and what to 4733 replace them with. 4734 toRemove = The characters to remove from the string. 4735 +/ 4736C1[] translate(C1, C2 = immutable char)(C1[] str, 4737 in dchar[dchar] transTable, 4738 const(C2)[] toRemove = null) @safe pure 4739if (isSomeChar!C1 && isSomeChar!C2) 4740{ 4741 import std.array : appender; 4742 auto buffer = appender!(C1[])(); 4743 translateImpl(str, transTable, toRemove, buffer); 4744 return buffer.data; 4745} 4746 4747/// 4748@safe pure unittest 4749{ 4750 dchar[dchar] transTable1 = ['e' : '5', 'o' : '7', '5': 'q']; 4751 assert(translate("hello world", transTable1) == "h5ll7 w7rld"); 4752 4753 assert(translate("hello world", transTable1, "low") == "h5 rd"); 4754 4755 string[dchar] transTable2 = ['e' : "5", 'o' : "orange"]; 4756 assert(translate("hello world", transTable2) == "h5llorange worangerld"); 4757} 4758 4759@safe pure unittest // issue 13018 4760{ 4761 immutable dchar[dchar] transTable1 = ['e' : '5', 'o' : '7', '5': 'q']; 4762 assert(translate("hello world", transTable1) == "h5ll7 w7rld"); 4763 4764 assert(translate("hello world", transTable1, "low") == "h5 rd"); 4765 4766 immutable string[dchar] transTable2 = ['e' : "5", 'o' : "orange"]; 4767 assert(translate("hello world", transTable2) == "h5llorange worangerld"); 4768} 4769 4770@system pure unittest 4771{ 4772 import std.conv : to; 4773 import std.exception : assertCTFEable; 4774 4775 assertCTFEable!( 4776 { 4777 foreach (S; AliasSeq!( char[], const( char)[], immutable( char)[], 4778 wchar[], const(wchar)[], immutable(wchar)[], 4779 dchar[], const(dchar)[], immutable(dchar)[])) 4780 { 4781 assert(translate(to!S("hello world"), cast(dchar[dchar])['h' : 'q', 'l' : '5']) == 4782 to!S("qe55o wor5d")); 4783 assert(translate(to!S("hello world"), cast(dchar[dchar])['o' : 'l', 'l' : '\U00010143']) == 4784 to!S("he\U00010143\U00010143l wlr\U00010143d")); 4785 assert(translate(to!S("hello \U00010143 world"), cast(dchar[dchar])['h' : 'q', 'l': '5']) == 4786 to!S("qe55o \U00010143 wor5d")); 4787 assert(translate(to!S("hello \U00010143 world"), cast(dchar[dchar])['o' : '0', '\U00010143' : 'o']) == 4788 to!S("hell0 o w0rld")); 4789 assert(translate(to!S("hello world"), cast(dchar[dchar]) null) == to!S("hello world")); 4790 4791 foreach (T; AliasSeq!( char[], const( char)[], immutable( char)[], 4792 wchar[], const(wchar)[], immutable(wchar)[], 4793 dchar[], const(dchar)[], immutable(dchar)[])) 4794 (){ // avoid slow optimizations for large functions @@@BUG@@@ 2396 4795 foreach (R; AliasSeq!(dchar[dchar], const dchar[dchar], 4796 immutable dchar[dchar])) 4797 { 4798 R tt = ['h' : 'q', 'l' : '5']; 4799 assert(translate(to!S("hello world"), tt, to!T("r")) 4800 == to!S("qe55o wo5d")); 4801 assert(translate(to!S("hello world"), tt, to!T("helo")) 4802 == to!S(" wrd")); 4803 assert(translate(to!S("hello world"), tt, to!T("q5")) 4804 == to!S("qe55o wor5d")); 4805 } 4806 }(); 4807 4808 auto s = to!S("hello world"); 4809 dchar[dchar] transTable = ['h' : 'q', 'l' : '5']; 4810 static assert(is(typeof(s) == typeof(translate(s, transTable)))); 4811 } 4812 }); 4813} 4814 4815/++ Ditto +/ 4816C1[] translate(C1, S, C2 = immutable char)(C1[] str, 4817 in S[dchar] transTable, 4818 const(C2)[] toRemove = null) @safe pure 4819if (isSomeChar!C1 && isSomeString!S && isSomeChar!C2) 4820{ 4821 import std.array : appender; 4822 auto buffer = appender!(C1[])(); 4823 translateImpl(str, transTable, toRemove, buffer); 4824 return buffer.data; 4825} 4826 4827@system pure unittest 4828{ 4829 import std.conv : to; 4830 import std.exception : assertCTFEable; 4831 4832 assertCTFEable!( 4833 { 4834 foreach (S; AliasSeq!( char[], const( char)[], immutable( char)[], 4835 wchar[], const(wchar)[], immutable(wchar)[], 4836 dchar[], const(dchar)[], immutable(dchar)[])) 4837 { 4838 assert(translate(to!S("hello world"), ['h' : "yellow", 'l' : "42"]) == 4839 to!S("yellowe4242o wor42d")); 4840 assert(translate(to!S("hello world"), ['o' : "owl", 'l' : "\U00010143\U00010143"]) == 4841 to!S("he\U00010143\U00010143\U00010143\U00010143owl wowlr\U00010143\U00010143d")); 4842 assert(translate(to!S("hello \U00010143 world"), ['h' : "yellow", 'l' : "42"]) == 4843 to!S("yellowe4242o \U00010143 wor42d")); 4844 assert(translate(to!S("hello \U00010143 world"), ['o' : "owl", 'l' : "\U00010143\U00010143"]) == 4845 to!S("he\U00010143\U00010143\U00010143\U00010143owl \U00010143 wowlr\U00010143\U00010143d")); 4846 assert(translate(to!S("hello \U00010143 world"), ['h' : ""]) == 4847 to!S("ello \U00010143 world")); 4848 assert(translate(to!S("hello \U00010143 world"), ['\U00010143' : ""]) == 4849 to!S("hello world")); 4850 assert(translate(to!S("hello world"), cast(string[dchar]) null) == to!S("hello world")); 4851 4852 foreach (T; AliasSeq!( char[], const( char)[], immutable( char)[], 4853 wchar[], const(wchar)[], immutable(wchar)[], 4854 dchar[], const(dchar)[], immutable(dchar)[])) 4855 (){ // avoid slow optimizations for large functions @@@BUG@@@ 2396 4856 4857 foreach (R; AliasSeq!(string[dchar], const string[dchar], 4858 immutable string[dchar])) 4859 { 4860 R tt = ['h' : "yellow", 'l' : "42"]; 4861 assert(translate(to!S("hello world"), tt, to!T("r")) == 4862 to!S("yellowe4242o wo42d")); 4863 assert(translate(to!S("hello world"), tt, to!T("helo")) == 4864 to!S(" wrd")); 4865 assert(translate(to!S("hello world"), tt, to!T("y42")) == 4866 to!S("yellowe4242o wor42d")); 4867 assert(translate(to!S("hello world"), tt, to!T("hello world")) == 4868 to!S("")); 4869 assert(translate(to!S("hello world"), tt, to!T("42")) == 4870 to!S("yellowe4242o wor42d")); 4871 } 4872 }(); 4873 4874 auto s = to!S("hello world"); 4875 string[dchar] transTable = ['h' : "silly", 'l' : "putty"]; 4876 static assert(is(typeof(s) == typeof(translate(s, transTable)))); 4877 } 4878 }); 4879} 4880 4881/++ 4882 This is an overload of $(D translate) which takes an existing buffer to write the contents to. 4883 4884 Params: 4885 str = The original string. 4886 transTable = The AA indicating which characters to replace and what to 4887 replace them with. 4888 toRemove = The characters to remove from the string. 4889 buffer = An output range to write the contents to. 4890 +/ 4891void translate(C1, C2 = immutable char, Buffer)(C1[] str, 4892 in dchar[dchar] transTable, 4893 const(C2)[] toRemove, 4894 Buffer buffer) 4895if (isSomeChar!C1 && isSomeChar!C2 && isOutputRange!(Buffer, C1)) 4896{ 4897 translateImpl(str, transTable, toRemove, buffer); 4898} 4899 4900/// 4901@safe pure unittest 4902{ 4903 import std.array : appender; 4904 dchar[dchar] transTable1 = ['e' : '5', 'o' : '7', '5': 'q']; 4905 auto buffer = appender!(dchar[])(); 4906 translate("hello world", transTable1, null, buffer); 4907 assert(buffer.data == "h5ll7 w7rld"); 4908 4909 buffer.clear(); 4910 translate("hello world", transTable1, "low", buffer); 4911 assert(buffer.data == "h5 rd"); 4912 4913 buffer.clear(); 4914 string[dchar] transTable2 = ['e' : "5", 'o' : "orange"]; 4915 translate("hello world", transTable2, null, buffer); 4916 assert(buffer.data == "h5llorange worangerld"); 4917} 4918 4919@safe pure unittest // issue 13018 4920{ 4921 import std.array : appender; 4922 immutable dchar[dchar] transTable1 = ['e' : '5', 'o' : '7', '5': 'q']; 4923 auto buffer = appender!(dchar[])(); 4924 translate("hello world", transTable1, null, buffer); 4925 assert(buffer.data == "h5ll7 w7rld"); 4926 4927 buffer.clear(); 4928 translate("hello world", transTable1, "low", buffer); 4929 assert(buffer.data == "h5 rd"); 4930 4931 buffer.clear(); 4932 immutable string[dchar] transTable2 = ['e' : "5", 'o' : "orange"]; 4933 translate("hello world", transTable2, null, buffer); 4934 assert(buffer.data == "h5llorange worangerld"); 4935} 4936 4937/++ Ditto +/ 4938void translate(C1, S, C2 = immutable char, Buffer)(C1[] str, 4939 in S[dchar] transTable, 4940 const(C2)[] toRemove, 4941 Buffer buffer) 4942if (isSomeChar!C1 && isSomeString!S && isSomeChar!C2 && isOutputRange!(Buffer, S)) 4943{ 4944 translateImpl(str, transTable, toRemove, buffer); 4945} 4946 4947private void translateImpl(C1, T, C2, Buffer)(C1[] str, 4948 T transTable, 4949 const(C2)[] toRemove, 4950 Buffer buffer) 4951{ 4952 bool[dchar] removeTable; 4953 4954 foreach (dchar c; toRemove) 4955 removeTable[c] = true; 4956 4957 foreach (dchar c; str) 4958 { 4959 if (c in removeTable) 4960 continue; 4961 4962 auto newC = c in transTable; 4963 4964 if (newC) 4965 put(buffer, *newC); 4966 else 4967 put(buffer, c); 4968 } 4969} 4970 4971/++ 4972 This is an $(I $(RED ASCII-only)) overload of $(LREF _translate). It 4973 will $(I not) work with Unicode. It exists as an optimization for the 4974 cases where Unicode processing is not necessary. 4975 4976 Unlike the other overloads of $(LREF _translate), this one does not take 4977 an AA. Rather, it takes a $(D string) generated by $(LREF makeTransTable). 4978 4979 The array generated by $(D makeTransTable) is $(D 256) elements long such that 4980 the index is equal to the ASCII character being replaced and the value is 4981 equal to the character that it's being replaced with. Note that translate 4982 does not decode any of the characters, so you can actually pass it Extended 4983 ASCII characters if you want to (ASCII only actually uses $(D 128) 4984 characters), but be warned that Extended ASCII characters are not valid 4985 Unicode and therefore will result in a $(D UTFException) being thrown from 4986 most other Phobos functions. 4987 4988 Also, because no decoding occurs, it is possible to use this overload to 4989 translate ASCII characters within a proper UTF-8 string without altering the 4990 other, non-ASCII characters. It's replacing any code unit greater than 4991 $(D 127) with another code unit or replacing any code unit with another code 4992 unit greater than $(D 127) which will cause UTF validation issues. 4993 4994 See_Also: 4995 $(LREF tr) 4996 $(REF replace, std,array) 4997 4998 Params: 4999 str = The original string. 5000 transTable = The string indicating which characters to replace and what 5001 to replace them with. It is generated by $(LREF makeTransTable). 5002 toRemove = The characters to remove from the string. 5003 +/ 5004C[] translate(C = immutable char)(in char[] str, in char[] transTable, in char[] toRemove = null) @trusted pure nothrow 5005if (is(Unqual!C == char)) 5006in 5007{ 5008 assert(transTable.length == 256); 5009} 5010body 5011{ 5012 bool[256] remTable = false; 5013 5014 foreach (char c; toRemove) 5015 remTable[c] = true; 5016 5017 size_t count = 0; 5018 foreach (char c; str) 5019 { 5020 if (!remTable[c]) 5021 ++count; 5022 } 5023 5024 auto buffer = new char[count]; 5025 5026 size_t i = 0; 5027 foreach (char c; str) 5028 { 5029 if (!remTable[c]) 5030 buffer[i++] = transTable[c]; 5031 } 5032 5033 return cast(C[])(buffer); 5034} 5035 5036 5037/** 5038 * Do same thing as $(LREF makeTransTable) but allocate the translation table 5039 * on the GC heap. 5040 * 5041 * Use $(LREF makeTransTable) instead. 5042 */ 5043string makeTrans(in char[] from, in char[] to) @trusted pure nothrow 5044{ 5045 return makeTransTable(from, to)[].idup; 5046} 5047 5048/// 5049@safe pure nothrow unittest 5050{ 5051 auto transTable1 = makeTrans("eo5", "57q"); 5052 assert(translate("hello world", transTable1) == "h5ll7 w7rld"); 5053 5054 assert(translate("hello world", transTable1, "low") == "h5 rd"); 5055} 5056 5057/******* 5058 * Construct 256 character translation table, where characters in from[] are replaced 5059 * by corresponding characters in to[]. 5060 * 5061 * Params: 5062 * from = array of chars, less than or equal to 256 in length 5063 * to = corresponding array of chars to translate to 5064 * Returns: 5065 * translation array 5066 */ 5067 5068char[256] makeTransTable(in char[] from, in char[] to) @safe pure nothrow @nogc 5069in 5070{ 5071 import std.ascii : isASCII; 5072 assert(from.length == to.length); 5073 assert(from.length <= 256); 5074 foreach (char c; from) 5075 assert(isASCII(c)); 5076 foreach (char c; to) 5077 assert(isASCII(c)); 5078} 5079body 5080{ 5081 char[256] result = void; 5082 5083 foreach (i; 0 .. result.length) 5084 result[i] = cast(char) i; 5085 foreach (i, c; from) 5086 result[c] = to[i]; 5087 return result; 5088} 5089 5090@safe pure unittest 5091{ 5092 import std.conv : to; 5093 import std.exception : assertCTFEable; 5094 5095 assertCTFEable!( 5096 { 5097 foreach (C; AliasSeq!(char, const char, immutable char)) 5098 { 5099 assert(translate!C("hello world", makeTransTable("hl", "q5")) == to!(C[])("qe55o wor5d")); 5100 5101 auto s = to!(C[])("hello world"); 5102 auto transTable = makeTransTable("hl", "q5"); 5103 static assert(is(typeof(s) == typeof(translate!C(s, transTable)))); 5104 } 5105 5106 foreach (S; AliasSeq!(char[], const(char)[], immutable(char)[])) 5107 { 5108 assert(translate(to!S("hello world"), makeTransTable("hl", "q5")) == to!S("qe55o wor5d")); 5109 assert(translate(to!S("hello \U00010143 world"), makeTransTable("hl", "q5")) == 5110 to!S("qe55o \U00010143 wor5d")); 5111 assert(translate(to!S("hello world"), makeTransTable("ol", "1o")) == to!S("heoo1 w1rod")); 5112 assert(translate(to!S("hello world"), makeTransTable("", "")) == to!S("hello world")); 5113 assert(translate(to!S("hello world"), makeTransTable("12345", "67890")) == to!S("hello world")); 5114 assert(translate(to!S("hello \U00010143 world"), makeTransTable("12345", "67890")) == 5115 to!S("hello \U00010143 world")); 5116 5117 foreach (T; AliasSeq!(char[], const(char)[], immutable(char)[])) 5118 (){ // avoid slow optimizations for large functions @@@BUG@@@ 2396 5119 assert(translate(to!S("hello world"), makeTransTable("hl", "q5"), to!T("r")) == 5120 to!S("qe55o wo5d")); 5121 assert(translate(to!S("hello \U00010143 world"), makeTransTable("hl", "q5"), to!T("r")) == 5122 to!S("qe55o \U00010143 wo5d")); 5123 assert(translate(to!S("hello world"), makeTransTable("hl", "q5"), to!T("helo")) == 5124 to!S(" wrd")); 5125 assert(translate(to!S("hello world"), makeTransTable("hl", "q5"), to!T("q5")) == 5126 to!S("qe55o wor5d")); 5127 }(); 5128 } 5129 }); 5130} 5131 5132/++ 5133 This is an $(I $(RED ASCII-only)) overload of $(D translate) which takes an existing buffer to write the contents to. 5134 5135 Params: 5136 str = The original string. 5137 transTable = The string indicating which characters to replace and what 5138 to replace them with. It is generated by $(LREF makeTransTable). 5139 toRemove = The characters to remove from the string. 5140 buffer = An output range to write the contents to. 5141 +/ 5142void translate(C = immutable char, Buffer)(in char[] str, in char[] transTable, 5143 in char[] toRemove, Buffer buffer) @trusted pure 5144if (is(Unqual!C == char) && isOutputRange!(Buffer, char)) 5145in 5146{ 5147 assert(transTable.length == 256); 5148} 5149body 5150{ 5151 bool[256] remTable = false; 5152 5153 foreach (char c; toRemove) 5154 remTable[c] = true; 5155 5156 foreach (char c; str) 5157 { 5158 if (!remTable[c]) 5159 put(buffer, transTable[c]); 5160 } 5161} 5162 5163/// 5164@safe pure unittest 5165{ 5166 import std.array : appender; 5167 auto buffer = appender!(char[])(); 5168 auto transTable1 = makeTransTable("eo5", "57q"); 5169 translate("hello world", transTable1, null, buffer); 5170 assert(buffer.data == "h5ll7 w7rld"); 5171 5172 buffer.clear(); 5173 translate("hello world", transTable1, "low", buffer); 5174 assert(buffer.data == "h5 rd"); 5175} 5176 5177//@@@DEPRECATED_2.086@@@ 5178deprecated("This function is obsolete. It is available in https://github.com/dlang/undeaD if necessary.") 5179bool inPattern(S)(dchar c, in S pattern) @safe pure @nogc 5180if (isSomeString!S) 5181{ 5182 bool result = false; 5183 int range = 0; 5184 dchar lastc; 5185 5186 foreach (size_t i, dchar p; pattern) 5187 { 5188 if (p == '^' && i == 0) 5189 { 5190 result = true; 5191 if (i + 1 == pattern.length) 5192 return (c == p); // or should this be an error? 5193 } 5194 else if (range) 5195 { 5196 range = 0; 5197 if (lastc <= c && c <= p || c == p) 5198 return !result; 5199 } 5200 else if (p == '-' && i > result && i + 1 < pattern.length) 5201 { 5202 range = 1; 5203 continue; 5204 } 5205 else if (c == p) 5206 return !result; 5207 lastc = p; 5208 } 5209 return result; 5210} 5211 5212 5213deprecated 5214@safe pure @nogc unittest 5215{ 5216 import std.conv : to; 5217 import std.exception : assertCTFEable; 5218 5219 assertCTFEable!( 5220 { 5221 assert(inPattern('x', "x") == 1); 5222 assert(inPattern('x', "y") == 0); 5223 assert(inPattern('x', string.init) == 0); 5224 assert(inPattern('x', "^y") == 1); 5225 assert(inPattern('x', "yxxy") == 1); 5226 assert(inPattern('x', "^yxxy") == 0); 5227 assert(inPattern('x', "^abcd") == 1); 5228 assert(inPattern('^', "^^") == 0); 5229 assert(inPattern('^', "^") == 1); 5230 assert(inPattern('^', "a^") == 1); 5231 assert(inPattern('x', "a-z") == 1); 5232 assert(inPattern('x', "A-Z") == 0); 5233 assert(inPattern('x', "^a-z") == 0); 5234 assert(inPattern('x', "^A-Z") == 1); 5235 assert(inPattern('-', "a-") == 1); 5236 assert(inPattern('-', "^A-") == 0); 5237 assert(inPattern('a', "z-a") == 1); 5238 assert(inPattern('z', "z-a") == 1); 5239 assert(inPattern('x', "z-a") == 0); 5240 }); 5241} 5242 5243//@@@DEPRECATED_2.086@@@ 5244deprecated("This function is obsolete. It is available in https://github.com/dlang/undeaD if necessary.") 5245bool inPattern(S)(dchar c, S[] patterns) @safe pure @nogc 5246if (isSomeString!S) 5247{ 5248 foreach (string pattern; patterns) 5249 { 5250 if (!inPattern(c, pattern)) 5251 { 5252 return false; 5253 } 5254 } 5255 return true; 5256} 5257 5258//@@@DEPRECATED_2.086@@@ 5259deprecated("This function is obsolete. It is available in https://github.com/dlang/undeaD if necessary.") 5260size_t countchars(S, S1)(S s, in S1 pattern) @safe pure @nogc 5261if (isSomeString!S && isSomeString!S1) 5262{ 5263 size_t count; 5264 foreach (dchar c; s) 5265 { 5266 count += inPattern(c, pattern); 5267 } 5268 return count; 5269} 5270 5271deprecated 5272@safe pure @nogc unittest 5273{ 5274 import std.conv : to; 5275 import std.exception : assertCTFEable; 5276 5277 assertCTFEable!( 5278 { 5279 assert(countchars("abc", "a-c") == 3); 5280 assert(countchars("hello world", "or") == 3); 5281 }); 5282} 5283 5284//@@@DEPRECATED_2.086@@@ 5285deprecated("This function is obsolete. It is available in https://github.com/dlang/undeaD if necessary.") 5286S removechars(S)(S s, in S pattern) @safe pure 5287if (isSomeString!S) 5288{ 5289 import std.utf : encode; 5290 5291 Unqual!(typeof(s[0]))[] r; 5292 bool changed = false; 5293 5294 foreach (size_t i, dchar c; s) 5295 { 5296 if (inPattern(c, pattern)) 5297 { 5298 if (!changed) 5299 { 5300 changed = true; 5301 r = s[0 .. i].dup; 5302 } 5303 continue; 5304 } 5305 if (changed) 5306 { 5307 encode(r, c); 5308 } 5309 } 5310 if (changed) 5311 return r; 5312 else 5313 return s; 5314} 5315 5316deprecated 5317@safe pure unittest 5318{ 5319 import std.conv : to; 5320 import std.exception : assertCTFEable; 5321 5322 assertCTFEable!( 5323 { 5324 assert(removechars("abc", "a-c").length == 0); 5325 assert(removechars("hello world", "or") == "hell wld"); 5326 assert(removechars("hello world", "d") == "hello worl"); 5327 assert(removechars("hah", "h") == "a"); 5328 }); 5329} 5330 5331deprecated 5332@safe pure unittest 5333{ 5334 assert(removechars("abc", "x") == "abc"); 5335} 5336 5337//@@@DEPRECATED_2.086@@@ 5338deprecated("This function is obsolete. It is available in https://github.com/dlang/undeaD if necessary.") 5339S squeeze(S)(S s, in S pattern = null) 5340{ 5341 import std.utf : encode, stride; 5342 5343 Unqual!(typeof(s[0]))[] r; 5344 dchar lastc; 5345 size_t lasti; 5346 int run; 5347 bool changed; 5348 5349 foreach (size_t i, dchar c; s) 5350 { 5351 if (run && lastc == c) 5352 { 5353 changed = true; 5354 } 5355 else if (pattern is null || inPattern(c, pattern)) 5356 { 5357 run = 1; 5358 if (changed) 5359 { 5360 if (r is null) 5361 r = s[0 .. lasti].dup; 5362 encode(r, c); 5363 } 5364 else 5365 lasti = i + stride(s, i); 5366 lastc = c; 5367 } 5368 else 5369 { 5370 run = 0; 5371 if (changed) 5372 { 5373 if (r is null) 5374 r = s[0 .. lasti].dup; 5375 encode(r, c); 5376 } 5377 } 5378 } 5379 return changed ? ((r is null) ? s[0 .. lasti] : cast(S) r) : s; 5380} 5381 5382deprecated 5383@system pure unittest 5384{ 5385 import std.conv : to; 5386 import std.exception : assertCTFEable; 5387 5388 assertCTFEable!( 5389 { 5390 string s; 5391 5392 assert(squeeze("hello") == "helo"); 5393 5394 s = "abcd"; 5395 assert(squeeze(s) is s); 5396 s = "xyzz"; 5397 assert(squeeze(s).ptr == s.ptr); // should just be a slice 5398 5399 assert(squeeze("hello goodbyee", "oe") == "hello godbye"); 5400 }); 5401} 5402 5403//@@@DEPRECATED_2.086@@@ 5404deprecated("This function is obsolete. It is available in https://github.com/dlang/undeaD if necessary.") 5405S1 munch(S1, S2)(ref S1 s, S2 pattern) @safe pure @nogc 5406{ 5407 size_t j = s.length; 5408 foreach (i, dchar c; s) 5409 { 5410 if (!inPattern(c, pattern)) 5411 { 5412 j = i; 5413 break; 5414 } 5415 } 5416 scope(exit) s = s[j .. $]; 5417 return s[0 .. j]; 5418} 5419 5420/// 5421deprecated 5422@safe pure @nogc unittest 5423{ 5424 string s = "123abc"; 5425 string t = munch(s, "0123456789"); 5426 assert(t == "123" && s == "abc"); 5427 t = munch(s, "0123456789"); 5428 assert(t == "" && s == "abc"); 5429} 5430 5431deprecated 5432@safe pure @nogc unittest 5433{ 5434 string s = "123���abc"; 5435 string t = munch(s, "0123456789"); 5436 assert(t == "123" && s == "���abc"); 5437 t = munch(s, "0123456789"); 5438 assert(t == "" && s == "���abc"); 5439 t = munch(s, "��$�����"); 5440 assert(t == "���" && s == "abc"); 5441} 5442 5443 5444/********************************************** 5445 * Return string that is the 'successor' to s[]. 5446 * If the rightmost character is a-zA-Z0-9, it is incremented within 5447 * its case or digits. If it generates a carry, the process is 5448 * repeated with the one to its immediate left. 5449 */ 5450 5451S succ(S)(S s) @safe pure 5452if (isSomeString!S) 5453{ 5454 import std.ascii : isAlphaNum; 5455 5456 if (s.length && isAlphaNum(s[$ - 1])) 5457 { 5458 auto r = s.dup; 5459 size_t i = r.length - 1; 5460 5461 while (1) 5462 { 5463 dchar c = s[i]; 5464 dchar carry; 5465 5466 switch (c) 5467 { 5468 case '9': 5469 c = '0'; 5470 carry = '1'; 5471 goto Lcarry; 5472 case 'z': 5473 case 'Z': 5474 c -= 'Z' - 'A'; 5475 carry = c; 5476 Lcarry: 5477 r[i] = cast(char) c; 5478 if (i == 0) 5479 { 5480 auto t = new typeof(r[0])[r.length + 1]; 5481 t[0] = cast(char) carry; 5482 t[1 .. $] = r[]; 5483 return t; 5484 } 5485 i--; 5486 break; 5487 5488 default: 5489 if (isAlphaNum(c)) 5490 r[i]++; 5491 return r; 5492 } 5493 } 5494 } 5495 return s; 5496} 5497 5498/// 5499@safe pure unittest 5500{ 5501 assert(succ("1") == "2"); 5502 assert(succ("9") == "10"); 5503 assert(succ("999") == "1000"); 5504 assert(succ("zz99") == "aaa00"); 5505} 5506 5507@safe pure unittest 5508{ 5509 import std.conv : to; 5510 import std.exception : assertCTFEable; 5511 5512 assertCTFEable!( 5513 { 5514 assert(succ(string.init) is null); 5515 assert(succ("!@#$%") == "!@#$%"); 5516 assert(succ("1") == "2"); 5517 assert(succ("9") == "10"); 5518 assert(succ("999") == "1000"); 5519 assert(succ("zz99") == "aaa00"); 5520 }); 5521} 5522 5523 5524/++ 5525 Replaces the characters in $(D str) which are in $(D from) with the 5526 the corresponding characters in $(D to) and returns the resulting string. 5527 5528 $(D tr) is based on 5529 $(HTTP pubs.opengroup.org/onlinepubs/9699919799/utilities/_tr.html, Posix's tr), 5530 though it doesn't do everything that the Posix utility does. 5531 5532 Params: 5533 str = The original string. 5534 from = The characters to replace. 5535 to = The characters to replace with. 5536 modifiers = String containing modifiers. 5537 5538 Modifiers: 5539 $(BOOKTABLE, 5540 $(TR $(TD Modifier) $(TD Description)) 5541 $(TR $(TD $(D 'c')) $(TD Complement the list of characters in $(D from))) 5542 $(TR $(TD $(D 'd')) $(TD Removes matching characters with no corresponding 5543 replacement in $(D to))) 5544 $(TR $(TD $(D 's')) $(TD Removes adjacent duplicates in the replaced 5545 characters)) 5546 ) 5547 5548 If the modifier $(D 'd') is present, then the number of characters in 5549 $(D to) may be only $(D 0) or $(D 1). 5550 5551 If the modifier $(D 'd') is $(I not) present, and $(D to) is empty, then 5552 $(D to) is taken to be the same as $(D from). 5553 5554 If the modifier $(D 'd') is $(I not) present, and $(D to) is shorter than 5555 $(D from), then $(D to) is extended by replicating the last character in 5556 $(D to). 5557 5558 Both $(D from) and $(D to) may contain ranges using the $(D '-') character 5559 (e.g. $(D "a-d") is synonymous with $(D "abcd").) Neither accept a leading 5560 $(D '^') as meaning the complement of the string (use the $(D 'c') modifier 5561 for that). 5562 +/ 5563C1[] tr(C1, C2, C3, C4 = immutable char) 5564 (C1[] str, const(C2)[] from, const(C3)[] to, const(C4)[] modifiers = null) 5565{ 5566 import std.array : appender; 5567 import std.conv : conv_to = to; 5568 import std.utf : decode; 5569 5570 bool mod_c; 5571 bool mod_d; 5572 bool mod_s; 5573 5574 foreach (char c; modifiers) 5575 { 5576 switch (c) 5577 { 5578 case 'c': mod_c = 1; break; // complement 5579 case 'd': mod_d = 1; break; // delete unreplaced chars 5580 case 's': mod_s = 1; break; // squeeze duplicated replaced chars 5581 default: assert(0); 5582 } 5583 } 5584 5585 if (to.empty && !mod_d) 5586 to = conv_to!(typeof(to))(from); 5587 5588 auto result = appender!(C1[])(); 5589 bool modified; 5590 dchar lastc; 5591 5592 foreach (dchar c; str) 5593 { 5594 dchar lastf; 5595 dchar lastt; 5596 dchar newc; 5597 int n = 0; 5598 5599 for (size_t i = 0; i < from.length; ) 5600 { 5601 immutable f = decode(from, i); 5602 if (f == '-' && lastf != dchar.init && i < from.length) 5603 { 5604 immutable nextf = decode(from, i); 5605 if (lastf <= c && c <= nextf) 5606 { 5607 n += c - lastf - 1; 5608 if (mod_c) 5609 goto Lnotfound; 5610 goto Lfound; 5611 } 5612 n += nextf - lastf; 5613 lastf = lastf.init; 5614 continue; 5615 } 5616 5617 if (c == f) 5618 { if (mod_c) 5619 goto Lnotfound; 5620 goto Lfound; 5621 } 5622 lastf = f; 5623 n++; 5624 } 5625 if (!mod_c) 5626 goto Lnotfound; 5627 n = 0; // consider it 'found' at position 0 5628 5629 Lfound: 5630 5631 // Find the nth character in to[] 5632 dchar nextt; 5633 for (size_t i = 0; i < to.length; ) 5634 { 5635 immutable t = decode(to, i); 5636 if (t == '-' && lastt != dchar.init && i < to.length) 5637 { 5638 nextt = decode(to, i); 5639 n -= nextt - lastt; 5640 if (n < 0) 5641 { 5642 newc = nextt + n + 1; 5643 goto Lnewc; 5644 } 5645 lastt = dchar.init; 5646 continue; 5647 } 5648 if (n == 0) 5649 { newc = t; 5650 goto Lnewc; 5651 } 5652 lastt = t; 5653 nextt = t; 5654 n--; 5655 } 5656 if (mod_d) 5657 continue; 5658 newc = nextt; 5659 5660 Lnewc: 5661 if (mod_s && modified && newc == lastc) 5662 continue; 5663 result.put(newc); 5664 assert(newc != dchar.init); 5665 modified = true; 5666 lastc = newc; 5667 continue; 5668 5669 Lnotfound: 5670 result.put(c); 5671 lastc = c; 5672 modified = false; 5673 } 5674 5675 return result.data; 5676} 5677 5678@safe pure unittest 5679{ 5680 import std.algorithm.comparison : equal; 5681 import std.conv : to; 5682 import std.exception : assertCTFEable; 5683 5684 // Complete list of test types; too slow to test'em all 5685 // alias TestTypes = AliasSeq!( 5686 // char[], const( char)[], immutable( char)[], 5687 // wchar[], const(wchar)[], immutable(wchar)[], 5688 // dchar[], const(dchar)[], immutable(dchar)[]); 5689 5690 // Reduced list of test types 5691 alias TestTypes = AliasSeq!(char[], const(wchar)[], immutable(dchar)[]); 5692 5693 assertCTFEable!( 5694 { 5695 foreach (S; TestTypes) 5696 { 5697 foreach (T; TestTypes) 5698 { 5699 foreach (U; TestTypes) 5700 { 5701 assert(equal(tr(to!S("abcdef"), to!T("cd"), to!U("CD")), "abCDef")); 5702 assert(equal(tr(to!S("abcdef"), to!T("b-d"), to!U("B-D")), "aBCDef")); 5703 assert(equal(tr(to!S("abcdefgh"), to!T("b-dh"), to!U("B-Dx")), "aBCDefgx")); 5704 assert(equal(tr(to!S("abcdefgh"), to!T("b-dh"), to!U("B-CDx")), "aBCDefgx")); 5705 assert(equal(tr(to!S("abcdefgh"), to!T("b-dh"), to!U("B-BCDx")), "aBCDefgx")); 5706 assert(equal(tr(to!S("abcdef"), to!T("ef"), to!U("*"), to!S("c")), "****ef")); 5707 assert(equal(tr(to!S("abcdef"), to!T("ef"), to!U(""), to!T("d")), "abcd")); 5708 assert(equal(tr(to!S("hello goodbye"), to!T("lo"), to!U(""), to!U("s")), "helo godbye")); 5709 assert(equal(tr(to!S("hello goodbye"), to!T("lo"), to!U("x"), "s"), "hex gxdbye")); 5710 assert(equal(tr(to!S("14-Jul-87"), to!T("a-zA-Z"), to!U(" "), "cs"), " Jul ")); 5711 assert(equal(tr(to!S("Abc"), to!T("AAA"), to!U("XYZ")), "Xbc")); 5712 } 5713 } 5714 5715 auto s = to!S("hello world"); 5716 static assert(is(typeof(s) == typeof(tr(s, "he", "if")))); 5717 } 5718 }); 5719} 5720 5721@system pure unittest 5722{ 5723 import core.exception : AssertError; 5724 import std.exception : assertThrown; 5725 assertThrown!AssertError(tr("abcdef", "cd", "CD", "X")); 5726} 5727 5728/** 5729 * Takes a string $(D s) and determines if it represents a number. This function 5730 * also takes an optional parameter, $(D bAllowSep), which will accept the 5731 * separator characters $(D ',') and $(D '__') within the string. But these 5732 * characters should be stripped from the string before using any 5733 * of the conversion functions like $(D to!int()), $(D to!float()), and etc 5734 * else an error will occur. 5735 * 5736 * Also please note, that no spaces are allowed within the string 5737 * anywhere whether it's a leading, trailing, or embedded space(s), 5738 * thus they too must be stripped from the string before using this 5739 * function, or any of the conversion functions. 5740 * 5741 * Params: 5742 * s = the string or random access range to check 5743 * bAllowSep = accept separator characters or not 5744 * 5745 * Returns: 5746 * $(D bool) 5747 */ 5748bool isNumeric(S)(S s, bool bAllowSep = false) 5749if (isSomeString!S || 5750 (isRandomAccessRange!S && 5751 hasSlicing!S && 5752 isSomeChar!(ElementType!S) && 5753 !isInfinite!S)) 5754{ 5755 import std.algorithm.comparison : among; 5756 import std.ascii : isASCII; 5757 5758 // ASCII only case insensitive comparison with two ranges 5759 static bool asciiCmp(S1)(S1 a, string b) 5760 { 5761 import std.algorithm.comparison : equal; 5762 import std.algorithm.iteration : map; 5763 import std.ascii : toLower; 5764 import std.utf : byChar; 5765 return a.map!toLower.equal(b.byChar.map!toLower); 5766 } 5767 5768 // auto-decoding special case, we're only comparing characters 5769 // in the ASCII range so there's no reason to decode 5770 static if (isSomeString!S) 5771 { 5772 import std.utf : byCodeUnit; 5773 auto codeUnits = s.byCodeUnit; 5774 } 5775 else 5776 { 5777 alias codeUnits = s; 5778 } 5779 5780 if (codeUnits.empty) 5781 return false; 5782 5783 // Check for NaN (Not a Number) and for Infinity 5784 if (codeUnits.among!((a, b) => asciiCmp(a.save, b)) 5785 ("nan", "nani", "nan+nani", "inf", "-inf")) 5786 return true; 5787 5788 immutable frontResult = codeUnits.front; 5789 if (frontResult == '-' || frontResult == '+') 5790 codeUnits.popFront; 5791 5792 immutable iLen = codeUnits.length; 5793 bool bDecimalPoint, bExponent, bComplex, sawDigits; 5794 5795 for (size_t i = 0; i < iLen; i++) 5796 { 5797 immutable c = codeUnits[i]; 5798 5799 if (!c.isASCII) 5800 return false; 5801 5802 // Digits are good, skip to the next character 5803 if (c >= '0' && c <= '9') 5804 { 5805 sawDigits = true; 5806 continue; 5807 } 5808 5809 // Check for the complex type, and if found 5810 // reset the flags for checking the 2nd number. 5811 if (c == '+') 5812 { 5813 if (!i) 5814 return false; 5815 bDecimalPoint = false; 5816 bExponent = false; 5817 bComplex = true; 5818 sawDigits = false; 5819 continue; 5820 } 5821 5822 // Allow only one exponent per number 5823 if (c == 'e' || c == 'E') 5824 { 5825 // A 2nd exponent found, return not a number 5826 if (bExponent || i + 1 >= iLen) 5827 return false; 5828 // Look forward for the sign, and if 5829 // missing then this is not a number. 5830 if (codeUnits[i + 1] != '-' && codeUnits[i + 1] != '+') 5831 return false; 5832 bExponent = true; 5833 i++; 5834 continue; 5835 } 5836 // Allow only one decimal point per number to be used 5837 if (c == '.') 5838 { 5839 // A 2nd decimal point found, return not a number 5840 if (bDecimalPoint) 5841 return false; 5842 bDecimalPoint = true; 5843 continue; 5844 } 5845 // Check for ending literal characters: "f,u,l,i,ul,fi,li", 5846 // and whether they're being used with the correct datatype. 5847 if (i == iLen - 2) 5848 { 5849 if (!sawDigits) 5850 return false; 5851 // Integer Whole Number 5852 if (asciiCmp(codeUnits[i .. iLen], "ul") && 5853 (!bDecimalPoint && !bExponent && !bComplex)) 5854 return true; 5855 // Floating-Point Number 5856 if (codeUnits[i .. iLen].among!((a, b) => asciiCmp(a, b))("fi", "li") && 5857 (bDecimalPoint || bExponent || bComplex)) 5858 return true; 5859 if (asciiCmp(codeUnits[i .. iLen], "ul") && 5860 (bDecimalPoint || bExponent || bComplex)) 5861 return false; 5862 // Could be a Integer or a Float, thus 5863 // all these suffixes are valid for both 5864 return codeUnits[i .. iLen].among!((a, b) => asciiCmp(a, b)) 5865 ("ul", "fi", "li") != 0; 5866 } 5867 if (i == iLen - 1) 5868 { 5869 if (!sawDigits) 5870 return false; 5871 // Integer Whole Number 5872 if (c.among!('u', 'l', 'U', 'L')() && 5873 (!bDecimalPoint && !bExponent && !bComplex)) 5874 return true; 5875 // Check to see if the last character in the string 5876 // is the required 'i' character 5877 if (bComplex) 5878 return c.among!('i', 'I')() != 0; 5879 // Floating-Point Number 5880 return c.among!('l', 'L', 'f', 'F', 'i', 'I')() != 0; 5881 } 5882 // Check if separators are allowed to be in the numeric string 5883 if (!bAllowSep || !c.among!('_', ',')()) 5884 return false; 5885 } 5886 5887 return sawDigits; 5888} 5889 5890/** 5891 * Integer Whole Number: (byte, ubyte, short, ushort, int, uint, long, and ulong) 5892 * ['+'|'-']digit(s)[U|L|UL] 5893 */ 5894@safe @nogc pure nothrow unittest 5895{ 5896 assert(isNumeric("123")); 5897 assert(isNumeric("123UL")); 5898 assert(isNumeric("123L")); 5899 assert(isNumeric("+123U")); 5900 assert(isNumeric("-123L")); 5901} 5902 5903/** 5904 * Floating-Point Number: (float, double, real, ifloat, idouble, and ireal) 5905 * ['+'|'-']digit(s)[.][digit(s)][[e-|e+]digit(s)][i|f|L|Li|fi]] 5906 * or [nan|nani|inf|-inf] 5907 */ 5908@safe @nogc pure nothrow unittest 5909{ 5910 assert(isNumeric("+123")); 5911 assert(isNumeric("-123.01")); 5912 assert(isNumeric("123.3e-10f")); 5913 assert(isNumeric("123.3e-10fi")); 5914 assert(isNumeric("123.3e-10L")); 5915 5916 assert(isNumeric("nan")); 5917 assert(isNumeric("nani")); 5918 assert(isNumeric("-inf")); 5919} 5920 5921/** 5922 * Floating-Point Number: (cfloat, cdouble, and creal) 5923 * ['+'|'-']digit(s)[.][digit(s)][[e-|e+]digit(s)][+] 5924 * [digit(s)[.][digit(s)][[e-|e+]digit(s)][i|f|L|Li|fi]] 5925 * or [nan|nani|nan+nani|inf|-inf] 5926 */ 5927@safe @nogc pure nothrow unittest 5928{ 5929 assert(isNumeric("-123e-1+456.9e-10Li")); 5930 assert(isNumeric("+123e+10+456i")); 5931 assert(isNumeric("123+456")); 5932} 5933 5934@safe @nogc pure nothrow unittest 5935{ 5936 assert(!isNumeric("F")); 5937 assert(!isNumeric("L")); 5938 assert(!isNumeric("U")); 5939 assert(!isNumeric("i")); 5940 assert(!isNumeric("fi")); 5941 assert(!isNumeric("ul")); 5942 assert(!isNumeric("li")); 5943 assert(!isNumeric(".")); 5944 assert(!isNumeric("-")); 5945 assert(!isNumeric("+")); 5946 assert(!isNumeric("e-")); 5947 assert(!isNumeric("e+")); 5948 assert(!isNumeric(".f")); 5949 assert(!isNumeric("e+f")); 5950 assert(!isNumeric("++1")); 5951 assert(!isNumeric("")); 5952 assert(!isNumeric("1E+1E+1")); 5953 assert(!isNumeric("1E1")); 5954 assert(!isNumeric("\x81")); 5955} 5956 5957// Test string types 5958@safe unittest 5959{ 5960 import std.conv : to; 5961 5962 foreach (T; AliasSeq!(string, char[], wstring, wchar[], dstring, dchar[])) 5963 { 5964 assert("123".to!T.isNumeric()); 5965 assert("123UL".to!T.isNumeric()); 5966 assert("123fi".to!T.isNumeric()); 5967 assert("123li".to!T.isNumeric()); 5968 assert(!"--123L".to!T.isNumeric()); 5969 } 5970} 5971 5972// test ranges 5973@system pure unittest 5974{ 5975 import std.range : refRange; 5976 import std.utf : byCodeUnit; 5977 5978 assert("123".byCodeUnit.isNumeric()); 5979 assert("123UL".byCodeUnit.isNumeric()); 5980 assert("123fi".byCodeUnit.isNumeric()); 5981 assert("123li".byCodeUnit.isNumeric()); 5982 assert(!"--123L".byCodeUnit.isNumeric()); 5983 5984 dstring z = "0"; 5985 assert(isNumeric(refRange(&z))); 5986 5987 dstring nani = "nani"; 5988 assert(isNumeric(refRange(&nani))); 5989} 5990 5991/// isNumeric works with CTFE 5992@safe pure unittest 5993{ 5994 enum a = isNumeric("123.00E-5+1234.45E-12Li"); 5995 enum b = isNumeric("12345xxxx890"); 5996 5997 static assert( a); 5998 static assert(!b); 5999} 6000 6001@system unittest 6002{ 6003 import std.conv : to; 6004 import std.exception : assertCTFEable; 6005 6006 assertCTFEable!( 6007 { 6008 // Test the isNumeric(in string) function 6009 assert(isNumeric("1") == true ); 6010 assert(isNumeric("1.0") == true ); 6011 assert(isNumeric("1e-1") == true ); 6012 assert(isNumeric("12345xxxx890") == false ); 6013 assert(isNumeric("567L") == true ); 6014 assert(isNumeric("23UL") == true ); 6015 assert(isNumeric("-123..56f") == false ); 6016 assert(isNumeric("12.3.5.6") == false ); 6017 assert(isNumeric(" 12.356") == false ); 6018 assert(isNumeric("123 5.6") == false ); 6019 assert(isNumeric("1233E-1+1.0e-1i") == true ); 6020 6021 assert(isNumeric("123.00E-5+1234.45E-12Li") == true); 6022 assert(isNumeric("123.00e-5+1234.45E-12iL") == false); 6023 assert(isNumeric("123.00e-5+1234.45e-12uL") == false); 6024 assert(isNumeric("123.00E-5+1234.45e-12lu") == false); 6025 6026 assert(isNumeric("123fi") == true); 6027 assert(isNumeric("123li") == true); 6028 assert(isNumeric("--123L") == false); 6029 assert(isNumeric("+123.5UL") == false); 6030 assert(isNumeric("123f") == true); 6031 assert(isNumeric("123.u") == false); 6032 6033 // @@@BUG@@ to!string(float) is not CTFEable. 6034 // Related: formatValue(T) if (is(FloatingPointTypeOf!T)) 6035 if (!__ctfe) 6036 { 6037 assert(isNumeric(to!string(real.nan)) == true); 6038 assert(isNumeric(to!string(-real.infinity)) == true); 6039 assert(isNumeric(to!string(123e+2+1234.78Li)) == true); 6040 } 6041 6042 string s = "$250.99-"; 6043 assert(isNumeric(s[1 .. s.length - 2]) == true); 6044 assert(isNumeric(s) == false); 6045 assert(isNumeric(s[0 .. s.length - 1]) == false); 6046 }); 6047 6048 assert(!isNumeric("-")); 6049 assert(!isNumeric("+")); 6050} 6051 6052/***************************** 6053 * Soundex algorithm. 6054 * 6055 * The Soundex algorithm converts a word into 4 characters 6056 * based on how the word sounds phonetically. The idea is that 6057 * two spellings that sound alike will have the same Soundex 6058 * value, which means that Soundex can be used for fuzzy matching 6059 * of names. 6060 * 6061 * Params: 6062 * str = String or InputRange to convert to Soundex representation. 6063 * 6064 * Returns: 6065 * The four character array with the Soundex result in it. 6066 * The array has zero's in it if there is no Soundex representation for the string. 6067 * 6068 * See_Also: 6069 * $(LINK2 http://en.wikipedia.org/wiki/Soundex, Wikipedia), 6070 * $(LUCKY The Soundex Indexing System) 6071 * $(LREF soundex) 6072 * 6073 * Bugs: 6074 * Only works well with English names. 6075 * There are other arguably better Soundex algorithms, 6076 * but this one is the standard one. 6077 */ 6078char[4] soundexer(Range)(Range str) 6079if (isInputRange!Range && isSomeChar!(ElementEncodingType!Range) && 6080 !isConvertibleToString!Range) 6081{ 6082 alias C = Unqual!(ElementEncodingType!Range); 6083 6084 static immutable dex = 6085 // ABCDEFGHIJKLMNOPQRSTUVWXYZ 6086 "01230120022455012623010202"; 6087 6088 char[4] result = void; 6089 size_t b = 0; 6090 C lastc; 6091 foreach (C c; str) 6092 { 6093 if (c >= 'a' && c <= 'z') 6094 c -= 'a' - 'A'; 6095 else if (c >= 'A' && c <= 'Z') 6096 { 6097 } 6098 else 6099 { 6100 lastc = lastc.init; 6101 continue; 6102 } 6103 if (b == 0) 6104 { 6105 result[0] = cast(char) c; 6106 b++; 6107 lastc = dex[c - 'A']; 6108 } 6109 else 6110 { 6111 if (c == 'H' || c == 'W') 6112 continue; 6113 if (c == 'A' || c == 'E' || c == 'I' || c == 'O' || c == 'U') 6114 lastc = lastc.init; 6115 c = dex[c - 'A']; 6116 if (c != '0' && c != lastc) 6117 { 6118 result[b] = cast(char) c; 6119 b++; 6120 lastc = c; 6121 } 6122 if (b == 4) 6123 goto Lret; 6124 } 6125 } 6126 if (b == 0) 6127 result[] = 0; 6128 else 6129 result[b .. 4] = '0'; 6130 Lret: 6131 return result; 6132} 6133 6134char[4] soundexer(Range)(auto ref Range str) 6135if (isConvertibleToString!Range) 6136{ 6137 return soundexer!(StringTypeOf!Range)(str); 6138} 6139 6140/***************************** 6141 * Like $(LREF soundexer), but with different parameters 6142 * and return value. 6143 * 6144 * Params: 6145 * str = String to convert to Soundex representation. 6146 * buffer = Optional 4 char array to put the resulting Soundex 6147 * characters into. If null, the return value 6148 * buffer will be allocated on the heap. 6149 * Returns: 6150 * The four character array with the Soundex result in it. 6151 * Returns null if there is no Soundex representation for the string. 6152 * See_Also: 6153 * $(LREF soundexer) 6154 */ 6155char[] soundex(const(char)[] str, char[] buffer = null) 6156 @safe pure nothrow 6157in 6158{ 6159 assert(buffer is null || buffer.length >= 4); 6160} 6161out (result) 6162{ 6163 if (result !is null) 6164 { 6165 assert(result.length == 4); 6166 assert(result[0] >= 'A' && result[0] <= 'Z'); 6167 foreach (char c; result[1 .. 4]) 6168 assert(c >= '0' && c <= '6'); 6169 } 6170} 6171body 6172{ 6173 char[4] result = soundexer(str); 6174 if (result[0] == 0) 6175 return null; 6176 if (buffer is null) 6177 buffer = new char[4]; 6178 buffer[] = result[]; 6179 return buffer; 6180} 6181 6182 6183@safe pure nothrow unittest 6184{ 6185 import std.exception : assertCTFEable; 6186 assertCTFEable!( 6187 { 6188 char[4] buffer; 6189 6190 assert(soundex(null) == null); 6191 assert(soundex("") == null); 6192 assert(soundex("0123^&^^**&^") == null); 6193 assert(soundex("Euler") == "E460"); 6194 assert(soundex(" Ellery ") == "E460"); 6195 assert(soundex("Gauss") == "G200"); 6196 assert(soundex("Ghosh") == "G200"); 6197 assert(soundex("Hilbert") == "H416"); 6198 assert(soundex("Heilbronn") == "H416"); 6199 assert(soundex("Knuth") == "K530"); 6200 assert(soundex("Kant", buffer) == "K530"); 6201 assert(soundex("Lloyd") == "L300"); 6202 assert(soundex("Ladd") == "L300"); 6203 assert(soundex("Lukasiewicz", buffer) == "L222"); 6204 assert(soundex("Lissajous") == "L222"); 6205 assert(soundex("Robert") == "R163"); 6206 assert(soundex("Rupert") == "R163"); 6207 assert(soundex("Rubin") == "R150"); 6208 assert(soundex("Washington") == "W252"); 6209 assert(soundex("Lee") == "L000"); 6210 assert(soundex("Gutierrez") == "G362"); 6211 assert(soundex("Pfister") == "P236"); 6212 assert(soundex("Jackson") == "J250"); 6213 assert(soundex("Tymczak") == "T522"); 6214 assert(soundex("Ashcraft") == "A261"); 6215 6216 assert(soundex("Woo") == "W000"); 6217 assert(soundex("Pilgrim") == "P426"); 6218 assert(soundex("Flingjingwaller") == "F452"); 6219 assert(soundex("PEARSE") == "P620"); 6220 assert(soundex("PIERCE") == "P620"); 6221 assert(soundex("Price") == "P620"); 6222 assert(soundex("CATHY") == "C300"); 6223 assert(soundex("KATHY") == "K300"); 6224 assert(soundex("Jones") == "J520"); 6225 assert(soundex("johnsons") == "J525"); 6226 assert(soundex("Hardin") == "H635"); 6227 assert(soundex("Martinez") == "M635"); 6228 6229 import std.utf : byChar, byDchar, byWchar; 6230 assert(soundexer("Martinez".byChar ) == "M635"); 6231 assert(soundexer("Martinez".byWchar) == "M635"); 6232 assert(soundexer("Martinez".byDchar) == "M635"); 6233 }); 6234} 6235 6236@safe pure unittest 6237{ 6238 assert(testAliasedString!soundexer("Martinez")); 6239} 6240 6241 6242/*************************************************** 6243 * Construct an associative array consisting of all 6244 * abbreviations that uniquely map to the strings in values. 6245 * 6246 * This is useful in cases where the user is expected to type 6247 * in one of a known set of strings, and the program will helpfully 6248 * auto-complete the string once sufficient characters have been 6249 * entered that uniquely identify it. 6250 */ 6251 6252string[string] abbrev(string[] values) @safe pure 6253{ 6254 import std.algorithm.sorting : sort; 6255 6256 string[string] result; 6257 6258 // Make a copy when sorting so we follow COW principles. 6259 values = values.dup; 6260 sort(values); 6261 6262 size_t values_length = values.length; 6263 size_t lasti = values_length; 6264 size_t nexti; 6265 6266 string nv; 6267 string lv; 6268 6269 for (size_t i = 0; i < values_length; i = nexti) 6270 { 6271 string value = values[i]; 6272 6273 // Skip dups 6274 for (nexti = i + 1; nexti < values_length; nexti++) 6275 { 6276 nv = values[nexti]; 6277 if (value != values[nexti]) 6278 break; 6279 } 6280 6281 import std.utf : stride; 6282 6283 for (size_t j = 0; j < value.length; j += stride(value, j)) 6284 { 6285 string v = value[0 .. j]; 6286 6287 if ((nexti == values_length || j > nv.length || v != nv[0 .. j]) && 6288 (lasti == values_length || j > lv.length || v != lv[0 .. j])) 6289 { 6290 result[v] = value; 6291 } 6292 } 6293 result[value] = value; 6294 lasti = i; 6295 lv = value; 6296 } 6297 6298 return result; 6299} 6300 6301/// 6302@safe unittest 6303{ 6304 import std.string; 6305 6306 static string[] list = [ "food", "foxy" ]; 6307 auto abbrevs = abbrev(list); 6308 assert(abbrevs == ["fox": "foxy", "food": "food", 6309 "foxy": "foxy", "foo": "food"]); 6310} 6311 6312 6313@system pure unittest 6314{ 6315 import std.algorithm.sorting : sort; 6316 import std.conv : to; 6317 import std.exception : assertCTFEable; 6318 6319 assertCTFEable!( 6320 { 6321 string[] values; 6322 values ~= "hello"; 6323 values ~= "hello"; 6324 values ~= "he"; 6325 6326 string[string] r; 6327 6328 r = abbrev(values); 6329 auto keys = r.keys.dup; 6330 sort(keys); 6331 6332 assert(keys.length == 4); 6333 assert(keys[0] == "he"); 6334 assert(keys[1] == "hel"); 6335 assert(keys[2] == "hell"); 6336 assert(keys[3] == "hello"); 6337 6338 assert(r[keys[0]] == "he"); 6339 assert(r[keys[1]] == "hello"); 6340 assert(r[keys[2]] == "hello"); 6341 assert(r[keys[3]] == "hello"); 6342 }); 6343} 6344 6345 6346/****************************************** 6347 * Compute _column number at the end of the printed form of the string, 6348 * assuming the string starts in the leftmost _column, which is numbered 6349 * starting from 0. 6350 * 6351 * Tab characters are expanded into enough spaces to bring the _column number 6352 * to the next multiple of tabsize. 6353 * If there are multiple lines in the string, the _column number of the last 6354 * line is returned. 6355 * 6356 * Params: 6357 * str = string or InputRange to be analyzed 6358 * tabsize = number of columns a tab character represents 6359 * 6360 * Returns: 6361 * column number 6362 */ 6363 6364size_t column(Range)(Range str, in size_t tabsize = 8) 6365if ((isInputRange!Range && isSomeChar!(Unqual!(ElementEncodingType!Range)) || 6366 isNarrowString!Range) && 6367 !isConvertibleToString!Range) 6368{ 6369 static if (is(Unqual!(ElementEncodingType!Range) == char)) 6370 { 6371 // decoding needed for chars 6372 import std.utf : byDchar; 6373 6374 return str.byDchar.column(tabsize); 6375 } 6376 else 6377 { 6378 // decoding not needed for wchars and dchars 6379 import std.uni : lineSep, paraSep, nelSep; 6380 6381 size_t column; 6382 6383 foreach (const c; str) 6384 { 6385 switch (c) 6386 { 6387 case '\t': 6388 column = (column + tabsize) / tabsize * tabsize; 6389 break; 6390 6391 case '\r': 6392 case '\n': 6393 case paraSep: 6394 case lineSep: 6395 case nelSep: 6396 column = 0; 6397 break; 6398 6399 default: 6400 column++; 6401 break; 6402 } 6403 } 6404 return column; 6405 } 6406} 6407 6408/// 6409@safe pure unittest 6410{ 6411 import std.utf : byChar, byWchar, byDchar; 6412 6413 assert(column("1234 ") == 5); 6414 assert(column("1234 "w) == 5); 6415 assert(column("1234 "d) == 5); 6416 6417 assert(column("1234 ".byChar()) == 5); 6418 assert(column("1234 "w.byWchar()) == 5); 6419 assert(column("1234 "d.byDchar()) == 5); 6420 6421 // Tab stops are set at 8 spaces by default; tab characters insert enough 6422 // spaces to bring the column position to the next multiple of 8. 6423 assert(column("\t") == 8); 6424 assert(column("1\t") == 8); 6425 assert(column("\t1") == 9); 6426 assert(column("123\t") == 8); 6427 6428 // Other tab widths are possible by specifying it explicitly: 6429 assert(column("\t", 4) == 4); 6430 assert(column("1\t", 4) == 4); 6431 assert(column("\t1", 4) == 5); 6432 assert(column("123\t", 4) == 4); 6433 6434 // New lines reset the column number. 6435 assert(column("abc\n") == 0); 6436 assert(column("abc\n1") == 1); 6437 assert(column("abcdefg\r1234") == 4); 6438 assert(column("abc\u20281") == 1); 6439 assert(column("abc\u20291") == 1); 6440 assert(column("abc\u00851") == 1); 6441 assert(column("abc\u00861") == 5); 6442} 6443 6444size_t column(Range)(auto ref Range str, in size_t tabsize = 8) 6445if (isConvertibleToString!Range) 6446{ 6447 return column!(StringTypeOf!Range)(str, tabsize); 6448} 6449 6450@safe pure unittest 6451{ 6452 assert(testAliasedString!column("abc\u00861")); 6453} 6454 6455@safe @nogc unittest 6456{ 6457 import std.conv : to; 6458 import std.exception : assertCTFEable; 6459 6460 assertCTFEable!( 6461 { 6462 assert(column(string.init) == 0); 6463 assert(column("") == 0); 6464 assert(column("\t") == 8); 6465 assert(column("abc\t") == 8); 6466 assert(column("12345678\t") == 16); 6467 }); 6468} 6469 6470/****************************************** 6471 * Wrap text into a paragraph. 6472 * 6473 * The input text string s is formed into a paragraph 6474 * by breaking it up into a sequence of lines, delineated 6475 * by \n, such that the number of columns is not exceeded 6476 * on each line. 6477 * The last line is terminated with a \n. 6478 * Params: 6479 * s = text string to be wrapped 6480 * columns = maximum number of _columns in the paragraph 6481 * firstindent = string used to _indent first line of the paragraph 6482 * indent = string to use to _indent following lines of the paragraph 6483 * tabsize = column spacing of tabs in firstindent[] and indent[] 6484 * Returns: 6485 * resulting paragraph as an allocated string 6486 */ 6487 6488S wrap(S)(S s, in size_t columns = 80, S firstindent = null, 6489S indent = null, in size_t tabsize = 8) 6490if (isSomeString!S) 6491{ 6492 import std.uni : isWhite; 6493 typeof(s.dup) result; 6494 bool inword; 6495 bool first = true; 6496 size_t wordstart; 6497 6498 const indentcol = column(indent, tabsize); 6499 6500 result.length = firstindent.length + s.length; 6501 result.length = firstindent.length; 6502 result[] = firstindent[]; 6503 auto col = column(firstindent, tabsize); 6504 foreach (size_t i, dchar c; s) 6505 { 6506 if (isWhite(c)) 6507 { 6508 if (inword) 6509 { 6510 if (first) 6511 { 6512 } 6513 else if (col + 1 + (i - wordstart) > columns) 6514 { 6515 result ~= '\n'; 6516 result ~= indent; 6517 col = indentcol; 6518 } 6519 else 6520 { 6521 result ~= ' '; 6522 col += 1; 6523 } 6524 result ~= s[wordstart .. i]; 6525 col += i - wordstart; 6526 inword = false; 6527 first = false; 6528 } 6529 } 6530 else 6531 { 6532 if (!inword) 6533 { 6534 wordstart = i; 6535 inword = true; 6536 } 6537 } 6538 } 6539 6540 if (inword) 6541 { 6542 if (col + 1 + (s.length - wordstart) >= columns) 6543 { 6544 result ~= '\n'; 6545 result ~= indent; 6546 } 6547 else if (result.length != firstindent.length) 6548 result ~= ' '; 6549 result ~= s[wordstart .. s.length]; 6550 } 6551 result ~= '\n'; 6552 6553 return result; 6554} 6555 6556/// 6557@safe pure unittest 6558{ 6559 assert(wrap("a short string", 7) == "a short\nstring\n"); 6560 6561 // wrap will not break inside of a word, but at the next space 6562 assert(wrap("a short string", 4) == "a\nshort\nstring\n"); 6563 6564 assert(wrap("a short string", 7, "\t") == "\ta\nshort\nstring\n"); 6565 assert(wrap("a short string", 7, "\t", " ") == "\ta\n short\n string\n"); 6566} 6567 6568@safe pure unittest 6569{ 6570 import std.conv : to; 6571 import std.exception : assertCTFEable; 6572 6573 assertCTFEable!( 6574 { 6575 assert(wrap(string.init) == "\n"); 6576 assert(wrap(" a b df ") == "a b df\n"); 6577 assert(wrap(" a b df ", 3) == "a b\ndf\n"); 6578 assert(wrap(" a bc df ", 3) == "a\nbc\ndf\n"); 6579 assert(wrap(" abcd df ", 3) == "abcd\ndf\n"); 6580 assert(wrap("x") == "x\n"); 6581 assert(wrap("u u") == "u u\n"); 6582 assert(wrap("abcd", 3) == "\nabcd\n"); 6583 assert(wrap("a de", 10, "\t", " ", 8) == "\ta\n de\n"); 6584 }); 6585} 6586 6587/****************************************** 6588 * Removes one level of indentation from a multi-line string. 6589 * 6590 * This uniformly outdents the text as much as possible. 6591 * Whitespace-only lines are always converted to blank lines. 6592 * 6593 * Does not allocate memory if it does not throw. 6594 * 6595 * Params: 6596 * str = multi-line string 6597 * 6598 * Returns: 6599 * outdented string 6600 * 6601 * Throws: 6602 * StringException if indentation is done with different sequences 6603 * of whitespace characters. 6604 */ 6605S outdent(S)(S str) @safe pure 6606if (isSomeString!S) 6607{ 6608 return str.splitLines(Yes.keepTerminator).outdent().join(); 6609} 6610 6611/// 6612@safe pure unittest 6613{ 6614 enum pretty = q{ 6615 import std.stdio; 6616 void main() { 6617 writeln("Hello"); 6618 } 6619 }.outdent(); 6620 6621 enum ugly = q{ 6622import std.stdio; 6623void main() { 6624 writeln("Hello"); 6625} 6626}; 6627 6628 assert(pretty == ugly); 6629} 6630 6631 6632/****************************************** 6633 * Removes one level of indentation from an array of single-line strings. 6634 * 6635 * This uniformly outdents the text as much as possible. 6636 * Whitespace-only lines are always converted to blank lines. 6637 * 6638 * Params: 6639 * lines = array of single-line strings 6640 * 6641 * Returns: 6642 * lines[] is rewritten in place with outdented lines 6643 * 6644 * Throws: 6645 * StringException if indentation is done with different sequences 6646 * of whitespace characters. 6647 */ 6648S[] outdent(S)(S[] lines) @safe pure 6649if (isSomeString!S) 6650{ 6651 import std.algorithm.searching : startsWith; 6652 6653 if (lines.empty) 6654 { 6655 return null; 6656 } 6657 6658 static S leadingWhiteOf(S str) 6659 { 6660 return str[ 0 .. $ - stripLeft(str).length ]; 6661 } 6662 6663 S shortestIndent; 6664 foreach (ref line; lines) 6665 { 6666 const stripped = line.stripLeft(); 6667 6668 if (stripped.empty) 6669 { 6670 line = line[line.chomp().length .. $]; 6671 } 6672 else 6673 { 6674 const indent = leadingWhiteOf(line); 6675 6676 // Comparing number of code units instead of code points is OK here 6677 // because this function throws upon inconsistent indentation. 6678 if (shortestIndent is null || indent.length < shortestIndent.length) 6679 { 6680 if (indent.empty) 6681 return lines; 6682 shortestIndent = indent; 6683 } 6684 } 6685 } 6686 6687 foreach (ref line; lines) 6688 { 6689 const stripped = line.stripLeft(); 6690 6691 if (stripped.empty) 6692 { 6693 // Do nothing 6694 } 6695 else if (line.startsWith(shortestIndent)) 6696 { 6697 line = line[shortestIndent.length .. $]; 6698 } 6699 else 6700 { 6701 throw new StringException("outdent: Inconsistent indentation"); 6702 } 6703 } 6704 6705 return lines; 6706} 6707 6708@safe pure unittest 6709{ 6710 import std.conv : to; 6711 import std.exception : assertCTFEable; 6712 6713 template outdent_testStr(S) 6714 { 6715 enum S outdent_testStr = 6716" 6717 \t\tX 6718 \t\U00010143X 6719 \t\t 6720 6721 \t\t\tX 6722\t "; 6723 } 6724 6725 template outdent_expected(S) 6726 { 6727 enum S outdent_expected = 6728" 6729\tX 6730\U00010143X 6731 6732 6733\t\tX 6734"; 6735 } 6736 6737 assertCTFEable!( 6738 { 6739 6740 foreach (S; AliasSeq!(string, wstring, dstring)) 6741 { 6742 enum S blank = ""; 6743 assert(blank.outdent() == blank); 6744 static assert(blank.outdent() == blank); 6745 6746 enum S testStr1 = " \n \t\n "; 6747 enum S expected1 = "\n\n"; 6748 assert(testStr1.outdent() == expected1); 6749 static assert(testStr1.outdent() == expected1); 6750 6751 assert(testStr1[0..$-1].outdent() == expected1); 6752 static assert(testStr1[0..$-1].outdent() == expected1); 6753 6754 enum S testStr2 = "a\n \t\nb"; 6755 assert(testStr2.outdent() == testStr2); 6756 static assert(testStr2.outdent() == testStr2); 6757 6758 enum S testStr3 = 6759" 6760 \t\tX 6761 \t\U00010143X 6762 \t\t 6763 6764 \t\t\tX 6765\t "; 6766 6767 enum S expected3 = 6768" 6769\tX 6770\U00010143X 6771 6772 6773\t\tX 6774"; 6775 assert(testStr3.outdent() == expected3); 6776 static assert(testStr3.outdent() == expected3); 6777 6778 enum testStr4 = " X\r X\n X\r\n X\u2028 X\u2029 X"; 6779 enum expected4 = "X\rX\nX\r\nX\u2028X\u2029X"; 6780 assert(testStr4.outdent() == expected4); 6781 static assert(testStr4.outdent() == expected4); 6782 6783 enum testStr5 = testStr4[0..$-1]; 6784 enum expected5 = expected4[0..$-1]; 6785 assert(testStr5.outdent() == expected5); 6786 static assert(testStr5.outdent() == expected5); 6787 6788 enum testStr6 = " \r \n \r\n \u2028 \u2029"; 6789 enum expected6 = "\r\n\r\n\u2028\u2029"; 6790 assert(testStr6.outdent() == expected6); 6791 static assert(testStr6.outdent() == expected6); 6792 6793 enum testStr7 = " a \n b "; 6794 enum expected7 = "a \nb "; 6795 assert(testStr7.outdent() == expected7); 6796 static assert(testStr7.outdent() == expected7); 6797 } 6798 }); 6799} 6800 6801@safe pure unittest 6802{ 6803 import std.exception : assertThrown; 6804 auto bad = " a\n\tb\n c"; 6805 assertThrown!StringException(bad.outdent); 6806} 6807 6808/** Assume the given array of integers $(D arr) is a well-formed UTF string and 6809return it typed as a UTF string. 6810 6811$(D ubyte) becomes $(D char), $(D ushort) becomes $(D wchar) and $(D uint) 6812becomes $(D dchar). Type qualifiers are preserved. 6813 6814When compiled with debug mode, this function performs an extra check to make 6815sure the return value is a valid Unicode string. 6816 6817Params: 6818 arr = array of bytes, ubytes, shorts, ushorts, ints, or uints 6819 6820Returns: 6821 arr retyped as an array of chars, wchars, or dchars 6822 6823See_Also: $(LREF representation) 6824*/ 6825auto assumeUTF(T)(T[] arr) pure 6826if (staticIndexOf!(Unqual!T, ubyte, ushort, uint) != -1) 6827{ 6828 import std.traits : ModifyTypePreservingTQ; 6829 import std.utf : validate; 6830 alias ToUTFType(U) = AliasSeq!(char, wchar, dchar)[U.sizeof / 2]; 6831 auto asUTF = cast(ModifyTypePreservingTQ!(ToUTFType, T)[])arr; 6832 debug validate(asUTF); 6833 return asUTF; 6834} 6835 6836/// 6837@safe pure unittest 6838{ 6839 string a = "H��lo World"; 6840 immutable(ubyte)[] b = a.representation; 6841 string c = b.assumeUTF; 6842 6843 assert(a == c); 6844} 6845 6846pure @system unittest 6847{ 6848 import std.algorithm.comparison : equal; 6849 foreach (T; AliasSeq!(char[], wchar[], dchar[])) 6850 { 6851 immutable T jti = "Hello World"; 6852 T jt = jti.dup; 6853 6854 static if (is(T == char[])) 6855 { 6856 auto gt = cast(ubyte[]) jt; 6857 auto gtc = cast(const(ubyte)[])jt; 6858 auto gti = cast(immutable(ubyte)[])jt; 6859 } 6860 else static if (is(T == wchar[])) 6861 { 6862 auto gt = cast(ushort[]) jt; 6863 auto gtc = cast(const(ushort)[])jt; 6864 auto gti = cast(immutable(ushort)[])jt; 6865 } 6866 else static if (is(T == dchar[])) 6867 { 6868 auto gt = cast(uint[]) jt; 6869 auto gtc = cast(const(uint)[])jt; 6870 auto gti = cast(immutable(uint)[])jt; 6871 } 6872 6873 auto ht = assumeUTF(gt); 6874 auto htc = assumeUTF(gtc); 6875 auto hti = assumeUTF(gti); 6876 assert(equal(jt, ht)); 6877 assert(equal(jt, htc)); 6878 assert(equal(jt, hti)); 6879 } 6880} 6881