1/* $NetBSD: lstrlib.c,v 1.21 2023/06/08 21:12:08 nikita Exp $ */ 2 3/* 4** Id: lstrlib.c 5** Standard library for string operations and pattern-matching 6** See Copyright Notice in lua.h 7*/ 8 9#define lstrlib_c 10#define LUA_LIB 11 12#include "lprefix.h" 13 14 15#ifndef _KERNEL 16#include <ctype.h> 17#include <float.h> 18#include <limits.h> 19#include <locale.h> 20#include <math.h> 21#include <stddef.h> 22#include <stdio.h> 23#include <stdlib.h> 24#include <string.h> 25#endif /* _KERNEL */ 26 27#include "lua.h" 28 29#include "lauxlib.h" 30#include "lualib.h" 31 32 33/* 34** maximum number of captures that a pattern can do during 35** pattern-matching. This limit is arbitrary, but must fit in 36** an unsigned char. 37*/ 38#if !defined(LUA_MAXCAPTURES) 39#define LUA_MAXCAPTURES 32 40#endif 41 42 43/* macro to 'unsign' a character */ 44#define uchar(c) ((unsigned char)(c)) 45 46 47/* 48** Some sizes are better limited to fit in 'int', but must also fit in 49** 'size_t'. (We assume that 'lua_Integer' cannot be smaller than 'int'.) 50*/ 51#define MAX_SIZET ((size_t)(~(size_t)0)) 52 53#define MAXSIZE \ 54 (sizeof(size_t) < sizeof(int) ? MAX_SIZET : (size_t)(INT_MAX)) 55 56 57 58 59static int str_len (lua_State *L) { 60 size_t l; 61 luaL_checklstring(L, 1, &l); 62 lua_pushinteger(L, (lua_Integer)l); 63 return 1; 64} 65 66 67/* 68** translate a relative initial string position 69** (negative means back from end): clip result to [1, inf). 70** The length of any string in Lua must fit in a lua_Integer, 71** so there are no overflows in the casts. 72** The inverted comparison avoids a possible overflow 73** computing '-pos'. 74*/ 75static size_t posrelatI (lua_Integer pos, size_t len) { 76 if (pos > 0) 77 return (size_t)pos; 78 else if (pos == 0) 79 return 1; 80 else if (pos < -(lua_Integer)len) /* inverted comparison */ 81 return 1; /* clip to 1 */ 82 else return len + (size_t)pos + 1; 83} 84 85 86/* 87** Gets an optional ending string position from argument 'arg', 88** with default value 'def'. 89** Negative means back from end: clip result to [0, len] 90*/ 91static size_t getendpos (lua_State *L, int arg, lua_Integer def, 92 size_t len) { 93 lua_Integer pos = luaL_optinteger(L, arg, def); 94 if (pos > (lua_Integer)len) 95 return len; 96 else if (pos >= 0) 97 return (size_t)pos; 98 else if (pos < -(lua_Integer)len) 99 return 0; 100 else return len + (size_t)pos + 1; 101} 102 103 104static int str_sub (lua_State *L) { 105 size_t l; 106 const char *s = luaL_checklstring(L, 1, &l); 107 size_t start = posrelatI(luaL_checkinteger(L, 2), l); 108 size_t end = getendpos(L, 3, -1, l); 109 if (start <= end) 110 lua_pushlstring(L, s + start - 1, (end - start) + 1); 111 else lua_pushliteral(L, ""); 112 return 1; 113} 114 115 116static int str_reverse (lua_State *L) { 117 size_t l, i; 118 luaL_Buffer b; 119 const char *s = luaL_checklstring(L, 1, &l); 120 char *p = luaL_buffinitsize(L, &b, l); 121 for (i = 0; i < l; i++) 122 p[i] = s[l - i - 1]; 123 luaL_pushresultsize(&b, l); 124 return 1; 125} 126 127 128static int str_lower (lua_State *L) { 129 size_t l; 130 size_t i; 131 luaL_Buffer b; 132 const char *s = luaL_checklstring(L, 1, &l); 133 char *p = luaL_buffinitsize(L, &b, l); 134 for (i=0; i<l; i++) 135 p[i] = tolower(uchar(s[i])); 136 luaL_pushresultsize(&b, l); 137 return 1; 138} 139 140 141static int str_upper (lua_State *L) { 142 size_t l; 143 size_t i; 144 luaL_Buffer b; 145 const char *s = luaL_checklstring(L, 1, &l); 146 char *p = luaL_buffinitsize(L, &b, l); 147 for (i=0; i<l; i++) 148 p[i] = toupper(uchar(s[i])); 149 luaL_pushresultsize(&b, l); 150 return 1; 151} 152 153 154static int str_rep (lua_State *L) { 155 size_t l, lsep; 156 const char *s = luaL_checklstring(L, 1, &l); 157 lua_Integer n = luaL_checkinteger(L, 2); 158 const char *sep = luaL_optlstring(L, 3, "", &lsep); 159 if (n <= 0) 160 lua_pushliteral(L, ""); 161 else if (l_unlikely(l + lsep < l || l + lsep > MAXSIZE / n)) 162 return luaL_error(L, "resulting string too large"); 163 else { 164 size_t totallen = (size_t)n * l + (size_t)(n - 1) * lsep; 165 luaL_Buffer b; 166 char *p = luaL_buffinitsize(L, &b, totallen); 167 while (n-- > 1) { /* first n-1 copies (followed by separator) */ 168 memcpy(p, s, l * sizeof(char)); p += l; 169 if (lsep > 0) { /* empty 'memcpy' is not that cheap */ 170 memcpy(p, sep, lsep * sizeof(char)); 171 p += lsep; 172 } 173 } 174 memcpy(p, s, l * sizeof(char)); /* last copy (not followed by separator) */ 175 luaL_pushresultsize(&b, totallen); 176 } 177 return 1; 178} 179 180 181static int str_byte (lua_State *L) { 182 size_t l; 183 const char *s = luaL_checklstring(L, 1, &l); 184 lua_Integer pi = luaL_optinteger(L, 2, 1); 185 size_t posi = posrelatI(pi, l); 186 size_t pose = getendpos(L, 3, pi, l); 187 int n, i; 188 if (posi > pose) return 0; /* empty interval; return no values */ 189 if (l_unlikely(pose - posi >= (size_t)INT_MAX)) /* arithmetic overflow? */ 190 return luaL_error(L, "string slice too long"); 191 n = (int)(pose - posi) + 1; 192 luaL_checkstack(L, n, "string slice too long"); 193 for (i=0; i<n; i++) 194 lua_pushinteger(L, uchar(s[posi+i-1])); 195 return n; 196} 197 198 199static int str_char (lua_State *L) { 200 int n = lua_gettop(L); /* number of arguments */ 201 int i; 202 luaL_Buffer b; 203 char *p = luaL_buffinitsize(L, &b, n); 204 for (i=1; i<=n; i++) { 205 lua_Unsigned c = (lua_Unsigned)luaL_checkinteger(L, i); 206 luaL_argcheck(L, c <= (lua_Unsigned)UCHAR_MAX, i, "value out of range"); 207 p[i - 1] = uchar(c); 208 } 209 luaL_pushresultsize(&b, n); 210 return 1; 211} 212 213 214/* 215** Buffer to store the result of 'string.dump'. It must be initialized 216** after the call to 'lua_dump', to ensure that the function is on the 217** top of the stack when 'lua_dump' is called. ('luaL_buffinit' might 218** push stuff.) 219*/ 220struct str_Writer { 221 int init; /* true iff buffer has been initialized */ 222 luaL_Buffer B; 223}; 224 225 226static int writer (lua_State *L, const void *b, size_t size, void *ud) { 227 struct str_Writer *state = (struct str_Writer *)ud; 228 if (!state->init) { 229 state->init = 1; 230 luaL_buffinit(L, &state->B); 231 } 232 luaL_addlstring(&state->B, (const char *)b, size); 233 return 0; 234} 235 236 237static int str_dump (lua_State *L) { 238 struct str_Writer state; 239 int strip = lua_toboolean(L, 2); 240 luaL_checktype(L, 1, LUA_TFUNCTION); 241 lua_settop(L, 1); /* ensure function is on the top of the stack */ 242 state.init = 0; 243 if (l_unlikely(lua_dump(L, writer, &state, strip) != 0)) 244 return luaL_error(L, "unable to dump given function"); 245 luaL_pushresult(&state.B); 246 return 1; 247} 248 249 250 251/* 252** {====================================================== 253** METAMETHODS 254** ======================================================= 255*/ 256 257#if defined(LUA_NOCVTS2N) /* { */ 258 259/* no coercion from strings to numbers */ 260 261static const luaL_Reg stringmetamethods[] = { 262 {"__index", NULL}, /* placeholder */ 263 {NULL, NULL} 264}; 265 266#else /* }{ */ 267 268static int tonum (lua_State *L, int arg) { 269 if (lua_type(L, arg) == LUA_TNUMBER) { /* already a number? */ 270 lua_pushvalue(L, arg); 271 return 1; 272 } 273 else { /* check whether it is a numerical string */ 274 size_t len; 275 const char *s = lua_tolstring(L, arg, &len); 276 return (s != NULL && lua_stringtonumber(L, s) == len + 1); 277 } 278} 279 280 281static void trymt (lua_State *L, const char *mtname) { 282 lua_settop(L, 2); /* back to the original arguments */ 283 if (l_unlikely(lua_type(L, 2) == LUA_TSTRING || 284 !luaL_getmetafield(L, 2, mtname))) 285 luaL_error(L, "attempt to %s a '%s' with a '%s'", mtname + 2, 286 luaL_typename(L, -2), luaL_typename(L, -1)); 287 lua_insert(L, -3); /* put metamethod before arguments */ 288 lua_call(L, 2, 1); /* call metamethod */ 289} 290 291 292static int arith (lua_State *L, int op, const char *mtname) { 293 if (tonum(L, 1) && tonum(L, 2)) 294 lua_arith(L, op); /* result will be on the top */ 295 else 296 trymt(L, mtname); 297 return 1; 298} 299 300 301static int arith_add (lua_State *L) { 302 return arith(L, LUA_OPADD, "__add"); 303} 304 305static int arith_sub (lua_State *L) { 306 return arith(L, LUA_OPSUB, "__sub"); 307} 308 309static int arith_mul (lua_State *L) { 310 return arith(L, LUA_OPMUL, "__mul"); 311} 312 313static int arith_mod (lua_State *L) { 314 return arith(L, LUA_OPMOD, "__mod"); 315} 316 317#ifndef _KERNEL 318static int arith_pow (lua_State *L) { 319 return arith(L, LUA_OPPOW, "__pow"); 320} 321 322static int arith_div (lua_State *L) { 323 return arith(L, LUA_OPDIV, "__div"); 324} 325#endif /* _KERNEL */ 326 327static int arith_idiv (lua_State *L) { 328 return arith(L, LUA_OPIDIV, "__idiv"); 329} 330 331static int arith_unm (lua_State *L) { 332 return arith(L, LUA_OPUNM, "__unm"); 333} 334 335 336static const luaL_Reg stringmetamethods[] = { 337 {"__add", arith_add}, 338 {"__sub", arith_sub}, 339 {"__mul", arith_mul}, 340 {"__mod", arith_mod}, 341#ifndef _KERNEL 342 {"__pow", arith_pow}, 343 {"__div", arith_div}, 344#endif /* _KERNEL */ 345 {"__idiv", arith_idiv}, 346 {"__unm", arith_unm}, 347 {"__index", NULL}, /* placeholder */ 348 {NULL, NULL} 349}; 350 351#endif /* } */ 352 353/* }====================================================== */ 354 355/* 356** {====================================================== 357** PATTERN MATCHING 358** ======================================================= 359*/ 360 361 362#define CAP_UNFINISHED (-1) 363#define CAP_POSITION (-2) 364 365 366typedef struct MatchState { 367 const char *src_init; /* init of source string */ 368 const char *src_end; /* end ('\0') of source string */ 369 const char *p_end; /* end ('\0') of pattern */ 370 lua_State *L; 371 int matchdepth; /* control for recursive depth (to avoid C stack overflow) */ 372 unsigned char level; /* total number of captures (finished or unfinished) */ 373 struct { 374 const char *init; 375 ptrdiff_t len; 376 } capture[LUA_MAXCAPTURES]; 377} MatchState; 378 379 380/* recursive function */ 381static const char *match (MatchState *ms, const char *s, const char *p); 382 383 384/* maximum recursion depth for 'match' */ 385#if !defined(MAXCCALLS) 386#define MAXCCALLS 200 387#endif 388 389 390#define L_ESC '%' 391#define SPECIALS "^$*+?.([%-" 392 393 394static int check_capture (MatchState *ms, int l) { 395 l -= '1'; 396 if (l_unlikely(l < 0 || l >= ms->level || 397 ms->capture[l].len == CAP_UNFINISHED)) 398 return luaL_error(ms->L, "invalid capture index %%%d", l + 1); 399 return l; 400} 401 402 403static int capture_to_close (MatchState *ms) { 404 int level = ms->level; 405 for (level--; level>=0; level--) 406 if (ms->capture[level].len == CAP_UNFINISHED) return level; 407 return luaL_error(ms->L, "invalid pattern capture"); 408} 409 410 411static const char *classend (MatchState *ms, const char *p) { 412 switch (*p++) { 413 case L_ESC: { 414 if (l_unlikely(p == ms->p_end)) 415 luaL_error(ms->L, "malformed pattern (ends with '%%')"); 416 return p+1; 417 } 418 case '[': { 419 if (*p == '^') p++; 420 do { /* look for a ']' */ 421 if (l_unlikely(p == ms->p_end)) 422 luaL_error(ms->L, "malformed pattern (missing ']')"); 423 if (*(p++) == L_ESC && p < ms->p_end) 424 p++; /* skip escapes (e.g. '%]') */ 425 } while (*p != ']'); 426 return p+1; 427 } 428 default: { 429 return p; 430 } 431 } 432} 433 434 435static int match_class (int c, int cl) { 436 int res; 437 switch (tolower(cl)) { 438 case 'a' : res = isalpha(c); break; 439 case 'c' : res = iscntrl(c); break; 440 case 'd' : res = isdigit(c); break; 441 case 'g' : res = isgraph(c); break; 442 case 'l' : res = islower(c); break; 443 case 'p' : res = ispunct(c); break; 444 case 's' : res = isspace(c); break; 445 case 'u' : res = isupper(c); break; 446 case 'w' : res = isalnum(c); break; 447 case 'x' : res = isxdigit(c); break; 448 case 'z' : res = (c == 0); break; /* deprecated option */ 449 default: return (cl == c); 450 } 451 return (islower(cl) ? res : !res); 452} 453 454 455static int matchbracketclass (int c, const char *p, const char *ec) { 456 int sig = 1; 457 if (*(p+1) == '^') { 458 sig = 0; 459 p++; /* skip the '^' */ 460 } 461 while (++p < ec) { 462 if (*p == L_ESC) { 463 p++; 464 if (match_class(c, uchar(*p))) 465 return sig; 466 } 467 else if ((*(p+1) == '-') && (p+2 < ec)) { 468 p+=2; 469 if (uchar(*(p-2)) <= c && c <= uchar(*p)) 470 return sig; 471 } 472 else if (uchar(*p) == c) return sig; 473 } 474 return !sig; 475} 476 477 478static int singlematch (MatchState *ms, const char *s, const char *p, 479 const char *ep) { 480 if (s >= ms->src_end) 481 return 0; 482 else { 483 int c = uchar(*s); 484 switch (*p) { 485 case '.': return 1; /* matches any char */ 486 case L_ESC: return match_class(c, uchar(*(p+1))); 487 case '[': return matchbracketclass(c, p, ep-1); 488 default: return (uchar(*p) == c); 489 } 490 } 491} 492 493 494static const char *matchbalance (MatchState *ms, const char *s, 495 const char *p) { 496 if (l_unlikely(p >= ms->p_end - 1)) 497 luaL_error(ms->L, "malformed pattern (missing arguments to '%%b')"); 498 if (*s != *p) return NULL; 499 else { 500 int b = *p; 501 int e = *(p+1); 502 int cont = 1; 503 while (++s < ms->src_end) { 504 if (*s == e) { 505 if (--cont == 0) return s+1; 506 } 507 else if (*s == b) cont++; 508 } 509 } 510 return NULL; /* string ends out of balance */ 511} 512 513 514static const char *max_expand (MatchState *ms, const char *s, 515 const char *p, const char *ep) { 516 ptrdiff_t i = 0; /* counts maximum expand for item */ 517 while (singlematch(ms, s + i, p, ep)) 518 i++; 519 /* keeps trying to match with the maximum repetitions */ 520 while (i>=0) { 521 const char *res = match(ms, (s+i), ep+1); 522 if (res) return res; 523 i--; /* else didn't match; reduce 1 repetition to try again */ 524 } 525 return NULL; 526} 527 528 529static const char *min_expand (MatchState *ms, const char *s, 530 const char *p, const char *ep) { 531 for (;;) { 532 const char *res = match(ms, s, ep+1); 533 if (res != NULL) 534 return res; 535 else if (singlematch(ms, s, p, ep)) 536 s++; /* try with one more repetition */ 537 else return NULL; 538 } 539} 540 541 542static const char *start_capture (MatchState *ms, const char *s, 543 const char *p, int what) { 544 const char *res; 545 int level = ms->level; 546 if (level >= LUA_MAXCAPTURES) luaL_error(ms->L, "too many captures"); 547 ms->capture[level].init = s; 548 ms->capture[level].len = what; 549 ms->level = level+1; 550 if ((res=match(ms, s, p)) == NULL) /* match failed? */ 551 ms->level--; /* undo capture */ 552 return res; 553} 554 555 556static const char *end_capture (MatchState *ms, const char *s, 557 const char *p) { 558 int l = capture_to_close(ms); 559 const char *res; 560 ms->capture[l].len = s - ms->capture[l].init; /* close capture */ 561 if ((res = match(ms, s, p)) == NULL) /* match failed? */ 562 ms->capture[l].len = CAP_UNFINISHED; /* undo capture */ 563 return res; 564} 565 566 567static const char *match_capture (MatchState *ms, const char *s, int l) { 568 size_t len; 569 l = check_capture(ms, l); 570 len = ms->capture[l].len; 571 if ((size_t)(ms->src_end-s) >= len && 572 memcmp(ms->capture[l].init, s, len) == 0) 573 return s+len; 574 else return NULL; 575} 576 577 578static const char *match (MatchState *ms, const char *s, const char *p) { 579 if (l_unlikely(ms->matchdepth-- == 0)) 580 luaL_error(ms->L, "pattern too complex"); 581 init: /* using goto to optimize tail recursion */ 582 if (p != ms->p_end) { /* end of pattern? */ 583 switch (*p) { 584 case '(': { /* start capture */ 585 if (*(p + 1) == ')') /* position capture? */ 586 s = start_capture(ms, s, p + 2, CAP_POSITION); 587 else 588 s = start_capture(ms, s, p + 1, CAP_UNFINISHED); 589 break; 590 } 591 case ')': { /* end capture */ 592 s = end_capture(ms, s, p + 1); 593 break; 594 } 595 case '$': { 596 if ((p + 1) != ms->p_end) /* is the '$' the last char in pattern? */ 597 goto dflt; /* no; go to default */ 598 s = (s == ms->src_end) ? s : NULL; /* check end of string */ 599 break; 600 } 601 case L_ESC: { /* escaped sequences not in the format class[*+?-]? */ 602 switch (*(p + 1)) { 603 case 'b': { /* balanced string? */ 604 s = matchbalance(ms, s, p + 2); 605 if (s != NULL) { 606 p += 4; goto init; /* return match(ms, s, p + 4); */ 607 } /* else fail (s == NULL) */ 608 break; 609 } 610 case 'f': { /* frontier? */ 611 const char *ep; char previous; 612 p += 2; 613 if (l_unlikely(*p != '[')) 614 luaL_error(ms->L, "missing '[' after '%%f' in pattern"); 615 ep = classend(ms, p); /* points to what is next */ 616 previous = (s == ms->src_init) ? '\0' : *(s - 1); 617 if (!matchbracketclass(uchar(previous), p, ep - 1) && 618 matchbracketclass(uchar(*s), p, ep - 1)) { 619 p = ep; goto init; /* return match(ms, s, ep); */ 620 } 621 s = NULL; /* match failed */ 622 break; 623 } 624 case '0': case '1': case '2': case '3': 625 case '4': case '5': case '6': case '7': 626 case '8': case '9': { /* capture results (%0-%9)? */ 627 s = match_capture(ms, s, uchar(*(p + 1))); 628 if (s != NULL) { 629 p += 2; goto init; /* return match(ms, s, p + 2) */ 630 } 631 break; 632 } 633 default: goto dflt; 634 } 635 break; 636 } 637 default: dflt: { /* pattern class plus optional suffix */ 638 const char *ep = classend(ms, p); /* points to optional suffix */ 639 /* does not match at least once? */ 640 if (!singlematch(ms, s, p, ep)) { 641 if (*ep == '*' || *ep == '?' || *ep == '-') { /* accept empty? */ 642 p = ep + 1; goto init; /* return match(ms, s, ep + 1); */ 643 } 644 else /* '+' or no suffix */ 645 s = NULL; /* fail */ 646 } 647 else { /* matched once */ 648 switch (*ep) { /* handle optional suffix */ 649 case '?': { /* optional */ 650 const char *res; 651 if ((res = match(ms, s + 1, ep + 1)) != NULL) 652 s = res; 653 else { 654 p = ep + 1; goto init; /* else return match(ms, s, ep + 1); */ 655 } 656 break; 657 } 658 case '+': /* 1 or more repetitions */ 659 s++; /* 1 match already done */ 660 /* FALLTHROUGH */ 661 case '*': /* 0 or more repetitions */ 662 s = max_expand(ms, s, p, ep); 663 break; 664 case '-': /* 0 or more repetitions (minimum) */ 665 s = min_expand(ms, s, p, ep); 666 break; 667 default: /* no suffix */ 668 s++; p = ep; goto init; /* return match(ms, s + 1, ep); */ 669 } 670 } 671 break; 672 } 673 } 674 } 675 ms->matchdepth++; 676 return s; 677} 678 679 680 681static const char *lmemfind (const char *s1, size_t l1, 682 const char *s2, size_t l2) { 683 if (l2 == 0) return s1; /* empty strings are everywhere */ 684 else if (l2 > l1) return NULL; /* avoids a negative 'l1' */ 685 else { 686 const char *init; /* to search for a '*s2' inside 's1' */ 687 l2--; /* 1st char will be checked by 'memchr' */ 688 l1 = l1-l2; /* 's2' cannot be found after that */ 689 while (l1 > 0 && (init = (const char *)memchr(s1, *s2, l1)) != NULL) { 690 init++; /* 1st char is already checked */ 691 if (memcmp(init, s2+1, l2) == 0) 692 return init-1; 693 else { /* correct 'l1' and 's1' to try again */ 694 l1 -= init-s1; 695 s1 = init; 696 } 697 } 698 return NULL; /* not found */ 699 } 700} 701 702 703/* 704** get information about the i-th capture. If there are no captures 705** and 'i==0', return information about the whole match, which 706** is the range 's'..'e'. If the capture is a string, return 707** its length and put its address in '*cap'. If it is an integer 708** (a position), push it on the stack and return CAP_POSITION. 709*/ 710static size_t get_onecapture (MatchState *ms, int i, const char *s, 711 const char *e, const char **cap) { 712 if (i >= ms->level) { 713 if (l_unlikely(i != 0)) 714 luaL_error(ms->L, "invalid capture index %%%d", i + 1); 715 *cap = s; 716 return e - s; 717 } 718 else { 719 ptrdiff_t capl = ms->capture[i].len; 720 *cap = ms->capture[i].init; 721 if (l_unlikely(capl == CAP_UNFINISHED)) 722 luaL_error(ms->L, "unfinished capture"); 723 else if (capl == CAP_POSITION) 724 lua_pushinteger(ms->L, (ms->capture[i].init - ms->src_init) + 1); 725 return capl; 726 } 727} 728 729 730/* 731** Push the i-th capture on the stack. 732*/ 733static void push_onecapture (MatchState *ms, int i, const char *s, 734 const char *e) { 735 const char *cap; 736 ptrdiff_t l = get_onecapture(ms, i, s, e, &cap); 737 if (l != CAP_POSITION) 738 lua_pushlstring(ms->L, cap, l); 739 /* else position was already pushed */ 740} 741 742 743static int push_captures (MatchState *ms, const char *s, const char *e) { 744 int i; 745 int nlevels = (ms->level == 0 && s) ? 1 : ms->level; 746 luaL_checkstack(ms->L, nlevels, "too many captures"); 747 for (i = 0; i < nlevels; i++) 748 push_onecapture(ms, i, s, e); 749 return nlevels; /* number of strings pushed */ 750} 751 752 753/* check whether pattern has no special characters */ 754static int nospecials (const char *p, size_t l) { 755 size_t upto = 0; 756 do { 757 if (strpbrk(p + upto, SPECIALS)) 758 return 0; /* pattern has a special character */ 759 upto += strlen(p + upto) + 1; /* may have more after \0 */ 760 } while (upto <= l); 761 return 1; /* no special chars found */ 762} 763 764 765static void prepstate (MatchState *ms, lua_State *L, 766 const char *s, size_t ls, const char *p, size_t lp) { 767 ms->L = L; 768 ms->matchdepth = MAXCCALLS; 769 ms->src_init = s; 770 ms->src_end = s + ls; 771 ms->p_end = p + lp; 772} 773 774 775static void reprepstate (MatchState *ms) { 776 ms->level = 0; 777 lua_assert(ms->matchdepth == MAXCCALLS); 778} 779 780 781static int str_find_aux (lua_State *L, int find) { 782 size_t ls, lp; 783 const char *s = luaL_checklstring(L, 1, &ls); 784 const char *p = luaL_checklstring(L, 2, &lp); 785 size_t init = posrelatI(luaL_optinteger(L, 3, 1), ls) - 1; 786 if (init > ls) { /* start after string's end? */ 787 luaL_pushfail(L); /* cannot find anything */ 788 return 1; 789 } 790 /* explicit request or no special characters? */ 791 if (find && (lua_toboolean(L, 4) || nospecials(p, lp))) { 792 /* do a plain search */ 793 const char *s2 = lmemfind(s + init, ls - init, p, lp); 794 if (s2) { 795 lua_pushinteger(L, (s2 - s) + 1); 796 lua_pushinteger(L, (s2 - s) + lp); 797 return 2; 798 } 799 } 800 else { 801 MatchState ms; 802 const char *s1 = s + init; 803 int anchor = (*p == '^'); 804 if (anchor) { 805 p++; lp--; /* skip anchor character */ 806 } 807 prepstate(&ms, L, s, ls, p, lp); 808 do { 809 const char *res; 810 reprepstate(&ms); 811 if ((res=match(&ms, s1, p)) != NULL) { 812 if (find) { 813 lua_pushinteger(L, (s1 - s) + 1); /* start */ 814 lua_pushinteger(L, res - s); /* end */ 815 return push_captures(&ms, NULL, 0) + 2; 816 } 817 else 818 return push_captures(&ms, s1, res); 819 } 820 } while (s1++ < ms.src_end && !anchor); 821 } 822 luaL_pushfail(L); /* not found */ 823 return 1; 824} 825 826 827static int str_find (lua_State *L) { 828 return str_find_aux(L, 1); 829} 830 831 832static int str_match (lua_State *L) { 833 return str_find_aux(L, 0); 834} 835 836 837/* state for 'gmatch' */ 838typedef struct GMatchState { 839 const char *src; /* current position */ 840 const char *p; /* pattern */ 841 const char *lastmatch; /* end of last match */ 842 MatchState ms; /* match state */ 843} GMatchState; 844 845 846static int gmatch_aux (lua_State *L) { 847 GMatchState *gm = (GMatchState *)lua_touserdata(L, lua_upvalueindex(3)); 848 const char *src; 849 gm->ms.L = L; 850 for (src = gm->src; src <= gm->ms.src_end; src++) { 851 const char *e; 852 reprepstate(&gm->ms); 853 if ((e = match(&gm->ms, src, gm->p)) != NULL && e != gm->lastmatch) { 854 gm->src = gm->lastmatch = e; 855 return push_captures(&gm->ms, src, e); 856 } 857 } 858 return 0; /* not found */ 859} 860 861 862static int gmatch (lua_State *L) { 863 size_t ls, lp; 864 const char *s = luaL_checklstring(L, 1, &ls); 865 const char *p = luaL_checklstring(L, 2, &lp); 866 size_t init = posrelatI(luaL_optinteger(L, 3, 1), ls) - 1; 867 GMatchState *gm; 868 lua_settop(L, 2); /* keep strings on closure to avoid being collected */ 869 gm = (GMatchState *)lua_newuserdatauv(L, sizeof(GMatchState), 0); 870 if (init > ls) /* start after string's end? */ 871 init = ls + 1; /* avoid overflows in 's + init' */ 872 prepstate(&gm->ms, L, s, ls, p, lp); 873 gm->src = s + init; gm->p = p; gm->lastmatch = NULL; 874 lua_pushcclosure(L, gmatch_aux, 3); 875 return 1; 876} 877 878 879static void add_s (MatchState *ms, luaL_Buffer *b, const char *s, 880 const char *e) { 881 size_t l; 882 lua_State *L = ms->L; 883 const char *news = lua_tolstring(L, 3, &l); 884 const char *p; 885 while ((p = (char *)memchr(news, L_ESC, l)) != NULL) { 886 luaL_addlstring(b, news, p - news); 887 p++; /* skip ESC */ 888 if (*p == L_ESC) /* '%%' */ 889 luaL_addchar(b, *p); 890 else if (*p == '0') /* '%0' */ 891 luaL_addlstring(b, s, e - s); 892 else if (isdigit(uchar(*p))) { /* '%n' */ 893 const char *cap; 894 ptrdiff_t resl = get_onecapture(ms, *p - '1', s, e, &cap); 895 if (resl == CAP_POSITION) 896 luaL_addvalue(b); /* add position to accumulated result */ 897 else 898 luaL_addlstring(b, cap, resl); 899 } 900 else 901 luaL_error(L, "invalid use of '%c' in replacement string", L_ESC); 902 l -= p + 1 - news; 903 news = p + 1; 904 } 905 luaL_addlstring(b, news, l); 906} 907 908 909/* 910** Add the replacement value to the string buffer 'b'. 911** Return true if the original string was changed. (Function calls and 912** table indexing resulting in nil or false do not change the subject.) 913*/ 914static int add_value (MatchState *ms, luaL_Buffer *b, const char *s, 915 const char *e, int tr) { 916 lua_State *L = ms->L; 917 switch (tr) { 918 case LUA_TFUNCTION: { /* call the function */ 919 int n; 920 lua_pushvalue(L, 3); /* push the function */ 921 n = push_captures(ms, s, e); /* all captures as arguments */ 922 lua_call(L, n, 1); /* call it */ 923 break; 924 } 925 case LUA_TTABLE: { /* index the table */ 926 push_onecapture(ms, 0, s, e); /* first capture is the index */ 927 lua_gettable(L, 3); 928 break; 929 } 930 default: { /* LUA_TNUMBER or LUA_TSTRING */ 931 add_s(ms, b, s, e); /* add value to the buffer */ 932 return 1; /* something changed */ 933 } 934 } 935 if (!lua_toboolean(L, -1)) { /* nil or false? */ 936 lua_pop(L, 1); /* remove value */ 937 luaL_addlstring(b, s, e - s); /* keep original text */ 938 return 0; /* no changes */ 939 } 940 else if (l_unlikely(!lua_isstring(L, -1))) 941 return luaL_error(L, "invalid replacement value (a %s)", 942 luaL_typename(L, -1)); 943 else { 944 luaL_addvalue(b); /* add result to accumulator */ 945 return 1; /* something changed */ 946 } 947} 948 949 950static int str_gsub (lua_State *L) { 951 size_t srcl, lp; 952 const char *src = luaL_checklstring(L, 1, &srcl); /* subject */ 953 const char *p = luaL_checklstring(L, 2, &lp); /* pattern */ 954 const char *lastmatch = NULL; /* end of last match */ 955 int tr = lua_type(L, 3); /* replacement type */ 956 lua_Integer max_s = luaL_optinteger(L, 4, srcl + 1); /* max replacements */ 957 int anchor = (*p == '^'); 958 lua_Integer n = 0; /* replacement count */ 959 int changed = 0; /* change flag */ 960 MatchState ms; 961 luaL_Buffer b; 962 luaL_argexpected(L, tr == LUA_TNUMBER || tr == LUA_TSTRING || 963 tr == LUA_TFUNCTION || tr == LUA_TTABLE, 3, 964 "string/function/table"); 965 luaL_buffinit(L, &b); 966 if (anchor) { 967 p++; lp--; /* skip anchor character */ 968 } 969 prepstate(&ms, L, src, srcl, p, lp); 970 while (n < max_s) { 971 const char *e; 972 reprepstate(&ms); /* (re)prepare state for new match */ 973 if ((e = match(&ms, src, p)) != NULL && e != lastmatch) { /* match? */ 974 n++; 975 changed = add_value(&ms, &b, src, e, tr) | changed; 976 src = lastmatch = e; 977 } 978 else if (src < ms.src_end) /* otherwise, skip one character */ 979 luaL_addchar(&b, *src++); 980 else break; /* end of subject */ 981 if (anchor) break; 982 } 983 if (!changed) /* no changes? */ 984 lua_pushvalue(L, 1); /* return original string */ 985 else { /* something changed */ 986 luaL_addlstring(&b, src, ms.src_end-src); 987 luaL_pushresult(&b); /* create and return new string */ 988 } 989 lua_pushinteger(L, n); /* number of substitutions */ 990 return 2; 991} 992 993/* }====================================================== */ 994 995 996 997/* 998** {====================================================== 999** STRING FORMAT 1000** ======================================================= 1001*/ 1002 1003#if !defined(lua_number2strx) /* { */ 1004 1005/* 1006** Hexadecimal floating-point formatter 1007*/ 1008 1009#define SIZELENMOD (sizeof(LUA_NUMBER_FRMLEN)/sizeof(char)) 1010 1011 1012/* 1013** Number of bits that goes into the first digit. It can be any value 1014** between 1 and 4; the following definition tries to align the number 1015** to nibble boundaries by making what is left after that first digit a 1016** multiple of 4. 1017*/ 1018#define L_NBFD ((l_floatatt(MANT_DIG) - 1)%4 + 1) 1019 1020 1021/* 1022** Add integer part of 'x' to buffer and return new 'x' 1023*/ 1024static lua_Number adddigit (char *buff, int n, lua_Number x) { 1025 lua_Number dd = l_mathop(floor)(x); /* get integer part from 'x' */ 1026 int d = (int)dd; 1027 buff[n] = (d < 10 ? d + '0' : d - 10 + 'a'); /* add to buffer */ 1028 return x - dd; /* return what is left */ 1029} 1030 1031 1032static int num2straux (char *buff, int sz, lua_Number x) { 1033 /* if 'inf' or 'NaN', format it like '%g' */ 1034 if (x != x || x == (lua_Number)HUGE_VAL || x == -(lua_Number)HUGE_VAL) 1035 return l_sprintf(buff, sz, LUA_NUMBER_FMT, (LUAI_UACNUMBER)x); 1036 else if (x == 0) { /* can be -0... */ 1037 /* create "0" or "-0" followed by exponent */ 1038 return l_sprintf(buff, sz, LUA_NUMBER_FMT "x0p+0", (LUAI_UACNUMBER)x); 1039 } 1040 else { 1041 int e; 1042 lua_Number m = l_mathop(frexp)(x, &e); /* 'x' fraction and exponent */ 1043 int n = 0; /* character count */ 1044 if (m < 0) { /* is number negative? */ 1045 buff[n++] = '-'; /* add sign */ 1046 m = -m; /* make it positive */ 1047 } 1048 buff[n++] = '0'; buff[n++] = 'x'; /* add "0x" */ 1049 m = adddigit(buff, n++, m * (1 << L_NBFD)); /* add first digit */ 1050 e -= L_NBFD; /* this digit goes before the radix point */ 1051 if (m > 0) { /* more digits? */ 1052 buff[n++] = lua_getlocaledecpoint(); /* add radix point */ 1053 do { /* add as many digits as needed */ 1054 m = adddigit(buff, n++, m * 16); 1055 } while (m > 0); 1056 } 1057 n += l_sprintf(buff + n, sz - n, "p%+d", e); /* add exponent */ 1058 lua_assert(n < sz); 1059 return n; 1060 } 1061} 1062 1063 1064static int lua_number2strx (lua_State *L, char *buff, int sz, 1065 const char *fmt, lua_Number x) { 1066 int n = num2straux(buff, sz, x); 1067 if (fmt[SIZELENMOD] == 'A') { 1068 int i; 1069 for (i = 0; i < n; i++) 1070 buff[i] = toupper(uchar(buff[i])); 1071 } 1072 else if (l_unlikely(fmt[SIZELENMOD] != 'a')) 1073 return luaL_error(L, "modifiers for format '%%a'/'%%A' not implemented"); 1074 return n; 1075} 1076 1077#endif /* } */ 1078 1079 1080/* 1081** Maximum size for items formatted with '%f'. This size is produced 1082** by format('%.99f', -maxfloat), and is equal to 99 + 3 ('-', '.', 1083** and '\0') + number of decimal digits to represent maxfloat (which 1084** is maximum exponent + 1). (99+3+1, adding some extra, 110) 1085*/ 1086#define MAX_ITEMF (110 + l_floatatt(MAX_10_EXP)) 1087 1088 1089/* 1090** All formats except '%f' do not need that large limit. The other 1091** float formats use exponents, so that they fit in the 99 limit for 1092** significant digits; 's' for large strings and 'q' add items directly 1093** to the buffer; all integer formats also fit in the 99 limit. The 1094** worst case are floats: they may need 99 significant digits, plus 1095** '0x', '-', '.', 'e+XXXX', and '\0'. Adding some extra, 120. 1096*/ 1097#define MAX_ITEM 120 1098 1099 1100/* valid flags in a format specification */ 1101#if !defined(L_FMTFLAGSF) 1102 1103/* valid flags for a, A, e, E, f, F, g, and G conversions */ 1104#define L_FMTFLAGSF "-+#0 " 1105 1106/* valid flags for o, x, and X conversions */ 1107#define L_FMTFLAGSX "-#0" 1108 1109/* valid flags for d and i conversions */ 1110#define L_FMTFLAGSI "-+0 " 1111 1112/* valid flags for u conversions */ 1113#define L_FMTFLAGSU "-0" 1114 1115/* valid flags for c, p, and s conversions */ 1116#define L_FMTFLAGSC "-" 1117 1118#endif 1119 1120 1121/* 1122** Maximum size of each format specification (such as "%-099.99d"): 1123** Initial '%', flags (up to 5), width (2), period, precision (2), 1124** length modifier (8), conversion specifier, and final '\0', plus some 1125** extra. 1126*/ 1127#define MAX_FORMAT 32 1128 1129 1130static void addquoted (luaL_Buffer *b, const char *s, size_t len) { 1131 luaL_addchar(b, '"'); 1132 while (len--) { 1133 if (*s == '"' || *s == '\\' || *s == '\n') { 1134 luaL_addchar(b, '\\'); 1135 luaL_addchar(b, *s); 1136 } 1137 else if (iscntrl(uchar(*s))) { 1138 char buff[10]; 1139 if (!isdigit(uchar(*(s+1)))) 1140 l_sprintf(buff, sizeof(buff), "\\%d", (int)uchar(*s)); 1141 else 1142 l_sprintf(buff, sizeof(buff), "\\%03d", (int)uchar(*s)); 1143 luaL_addstring(b, buff); 1144 } 1145 else 1146 luaL_addchar(b, *s); 1147 s++; 1148 } 1149 luaL_addchar(b, '"'); 1150} 1151 1152 1153#ifndef _KERNEL 1154/* 1155** Serialize a floating-point number in such a way that it can be 1156** scanned back by Lua. Use hexadecimal format for "common" numbers 1157** (to preserve precision); inf, -inf, and NaN are handled separately. 1158** (NaN cannot be expressed as a numeral, so we write '(0/0)' for it.) 1159*/ 1160static int quotefloat (lua_State *L, char *buff, lua_Number n) { 1161 const char *s; /* for the fixed representations */ 1162 if (n == (lua_Number)HUGE_VAL) /* inf? */ 1163 s = "1e9999"; 1164 else if (n == -(lua_Number)HUGE_VAL) /* -inf? */ 1165 s = "-1e9999"; 1166 else if (n != n) /* NaN? */ 1167 s = "(0/0)"; 1168 else { /* format number as hexadecimal */ 1169 int nb = lua_number2strx(L, buff, MAX_ITEM, 1170 "%" LUA_NUMBER_FRMLEN "a", n); 1171 /* ensures that 'buff' string uses a dot as the radix character */ 1172 if (memchr(buff, '.', nb) == NULL) { /* no dot? */ 1173 char point = lua_getlocaledecpoint(); /* try locale point */ 1174 char *ppoint = (char *)memchr(buff, point, nb); 1175 if (ppoint) *ppoint = '.'; /* change it to a dot */ 1176 } 1177 return nb; 1178 } 1179 /* for the fixed representations */ 1180 return l_sprintf(buff, MAX_ITEM, "%s", s); 1181} 1182#endif /* _KERNEL */ 1183 1184 1185static void addliteral (lua_State *L, luaL_Buffer *b, int arg) { 1186 switch (lua_type(L, arg)) { 1187 case LUA_TSTRING: { 1188 size_t len; 1189 const char *s = lua_tolstring(L, arg, &len); 1190 addquoted(b, s, len); 1191 break; 1192 } 1193 case LUA_TNUMBER: { 1194 char *buff = luaL_prepbuffsize(b, MAX_ITEM); 1195 int nb; 1196#ifndef _KERNEL 1197 if (!lua_isinteger(L, arg)) /* float? */ 1198 nb = quotefloat(L, buff, lua_tonumber(L, arg)); 1199 else { /* integers */ 1200#endif /* _KERNEL */ 1201 lua_Integer n = lua_tointeger(L, arg); 1202 const char *format = (n == LUA_MININTEGER) /* corner case? */ 1203 ? "0x%" LUA_INTEGER_FRMLEN "x" /* use hex */ 1204 : LUA_INTEGER_FMT; /* else use default format */ 1205 nb = l_sprintf(buff, MAX_ITEM, format, (LUAI_UACINT)n); 1206#ifndef _KERNEL 1207 } 1208#endif /* _KERNEL */ 1209 luaL_addsize(b, nb); 1210 break; 1211 } 1212 case LUA_TNIL: case LUA_TBOOLEAN: { 1213 luaL_tolstring(L, arg, NULL); 1214 luaL_addvalue(b); 1215 break; 1216 } 1217 default: { 1218 luaL_argerror(L, arg, "value has no literal form"); 1219 } 1220 } 1221} 1222 1223 1224static const char *get2digits (const char *s) { 1225 if (isdigit(uchar(*s))) { 1226 s++; 1227 if (isdigit(uchar(*s))) s++; /* (2 digits at most) */ 1228 } 1229 return s; 1230} 1231 1232 1233/* 1234** Check whether a conversion specification is valid. When called, 1235** first character in 'form' must be '%' and last character must 1236** be a valid conversion specifier. 'flags' are the accepted flags; 1237** 'precision' signals whether to accept a precision. 1238*/ 1239static void checkformat (lua_State *L, const char *form, const char *flags, 1240 int precision) { 1241 const char *spec = form + 1; /* skip '%' */ 1242 spec += strspn(spec, flags); /* skip flags */ 1243 if (*spec != '0') { /* a width cannot start with '0' */ 1244 spec = get2digits(spec); /* skip width */ 1245 if (*spec == '.' && precision) { 1246 spec++; 1247 spec = get2digits(spec); /* skip precision */ 1248 } 1249 } 1250 if (!isalpha(uchar(*spec))) /* did not go to the end? */ 1251 luaL_error(L, "invalid conversion specification: '%s'", form); 1252} 1253 1254 1255/* 1256** Get a conversion specification and copy it to 'form'. 1257** Return the address of its last character. 1258*/ 1259static const char *getformat (lua_State *L, const char *strfrmt, 1260 char *form) { 1261 /* spans flags, width, and precision ('0' is included as a flag) */ 1262 size_t len = strspn(strfrmt, L_FMTFLAGSF "123456789."); 1263 len++; /* adds following character (should be the specifier) */ 1264 /* still needs space for '%', '\0', plus a length modifier */ 1265 if (len >= MAX_FORMAT - 10) 1266 luaL_error(L, "invalid format (too long)"); 1267 *(form++) = '%'; 1268 memcpy(form, strfrmt, len * sizeof(char)); 1269 *(form + len) = '\0'; 1270 return strfrmt + len - 1; 1271} 1272 1273 1274/* 1275** add length modifier into formats 1276*/ 1277static void addlenmod (char *form, const char *lenmod) { 1278 size_t l = strlen(form); 1279 size_t lm = strlen(lenmod); 1280 char spec = form[l - 1]; 1281 strcpy(form + l - 1, lenmod); 1282 form[l + lm - 1] = spec; 1283 form[l + lm] = '\0'; 1284} 1285 1286 1287static int str_format (lua_State *L) { 1288 int top = lua_gettop(L); 1289 int arg = 1; 1290 size_t sfl; 1291 const char *strfrmt = luaL_checklstring(L, arg, &sfl); 1292 const char *strfrmt_end = strfrmt+sfl; 1293 const char *flags; 1294 luaL_Buffer b; 1295 luaL_buffinit(L, &b); 1296 while (strfrmt < strfrmt_end) { 1297 if (*strfrmt != L_ESC) 1298 luaL_addchar(&b, *strfrmt++); 1299 else if (*++strfrmt == L_ESC) 1300 luaL_addchar(&b, *strfrmt++); /* %% */ 1301 else { /* format item */ 1302 char form[MAX_FORMAT]; /* to store the format ('%...') */ 1303 int maxitem = MAX_ITEM; /* maximum length for the result */ 1304 char *buff = luaL_prepbuffsize(&b, maxitem); /* to put result */ 1305 int nb = 0; /* number of bytes in result */ 1306 if (++arg > top) 1307 return luaL_argerror(L, arg, "no value"); 1308 strfrmt = getformat(L, strfrmt, form); 1309 switch (*strfrmt++) { 1310 case 'c': { 1311 checkformat(L, form, L_FMTFLAGSC, 0); 1312 nb = l_sprintf(buff, maxitem, form, (int)luaL_checkinteger(L, arg)); 1313 break; 1314 } 1315 case 'd': case 'i': 1316 flags = L_FMTFLAGSI; 1317 goto intcase; 1318 case 'u': 1319 flags = L_FMTFLAGSU; 1320 goto intcase; 1321 case 'o': case 'x': case 'X': 1322 flags = L_FMTFLAGSX; 1323 intcase: { 1324 lua_Integer n = luaL_checkinteger(L, arg); 1325 checkformat(L, form, flags, 1); 1326 addlenmod(form, LUA_INTEGER_FRMLEN); 1327 nb = l_sprintf(buff, maxitem, form, (LUAI_UACINT)n); 1328 break; 1329 } 1330#ifndef _KERNEL 1331 case 'a': case 'A': 1332 checkformat(L, form, L_FMTFLAGSF, 1); 1333 addlenmod(form, LUA_NUMBER_FRMLEN); 1334 nb = lua_number2strx(L, buff, maxitem, form, 1335 luaL_checknumber(L, arg)); 1336 break; 1337 case 'f': 1338 maxitem = MAX_ITEMF; /* extra space for '%f' */ 1339 buff = luaL_prepbuffsize(&b, maxitem); 1340 /* FALLTHROUGH */ 1341 case 'e': case 'E': case 'g': case 'G': { 1342 lua_Number n = luaL_checknumber(L, arg); 1343 checkformat(L, form, L_FMTFLAGSF, 1); 1344 addlenmod(form, LUA_NUMBER_FRMLEN); 1345 nb = l_sprintf(buff, maxitem, form, (LUAI_UACNUMBER)n); 1346 break; 1347 } 1348 case 'p': { 1349 const void *p = lua_topointer(L, arg); 1350 checkformat(L, form, L_FMTFLAGSC, 0); 1351 if (p == NULL) { /* avoid calling 'printf' with argument NULL */ 1352 p = "(null)"; /* result */ 1353 form[strlen(form) - 1] = 's'; /* format it as a string */ 1354 } 1355 nb = l_sprintf(buff, maxitem, form, p); 1356 break; 1357 } 1358#endif /* _KERNEL */ 1359 case 'q': { 1360 if (form[2] != '\0') /* modifiers? */ 1361 return luaL_error(L, "specifier '%%q' cannot have modifiers"); 1362 addliteral(L, &b, arg); 1363 break; 1364 } 1365 case 's': { 1366 size_t l; 1367 const char *s = luaL_tolstring(L, arg, &l); 1368 if (form[2] == '\0') /* no modifiers? */ 1369 luaL_addvalue(&b); /* keep entire string */ 1370 else { 1371 luaL_argcheck(L, l == strlen(s), arg, "string contains zeros"); 1372 checkformat(L, form, L_FMTFLAGSC, 1); 1373 if (strchr(form, '.') == NULL && l >= 100) { 1374 /* no precision and string is too long to be formatted */ 1375 luaL_addvalue(&b); /* keep entire string */ 1376 } 1377 else { /* format the string into 'buff' */ 1378 nb = l_sprintf(buff, maxitem, form, s); 1379 lua_pop(L, 1); /* remove result from 'luaL_tolstring' */ 1380 } 1381 } 1382 break; 1383 } 1384 default: { /* also treat cases 'pnLlh' */ 1385 return luaL_error(L, "invalid conversion '%s' to 'format'", form); 1386 } 1387 } 1388 lua_assert(nb < maxitem); 1389 luaL_addsize(&b, nb); 1390 } 1391 } 1392 luaL_pushresult(&b); 1393 return 1; 1394} 1395 1396/* }====================================================== */ 1397 1398 1399/* 1400** {====================================================== 1401** PACK/UNPACK 1402** ======================================================= 1403*/ 1404 1405 1406/* value used for padding */ 1407#if !defined(LUAL_PACKPADBYTE) 1408#define LUAL_PACKPADBYTE 0x00 1409#endif 1410 1411/* maximum size for the binary representation of an integer */ 1412#define MAXINTSIZE 16 1413 1414/* number of bits in a character */ 1415#define NB CHAR_BIT 1416 1417/* mask for one character (NB 1's) */ 1418#define MC ((1 << NB) - 1) 1419 1420/* size of a lua_Integer */ 1421#define SZINT ((int)sizeof(lua_Integer)) 1422 1423 1424/* dummy union to get native endianness */ 1425static const union { 1426 int dummy; 1427 char little; /* true iff machine is little endian */ 1428} nativeendian = {1}; 1429 1430 1431/* 1432** information to pack/unpack stuff 1433*/ 1434typedef struct Header { 1435 lua_State *L; 1436 int islittle; 1437 int maxalign; 1438} Header; 1439 1440 1441/* 1442** options for pack/unpack 1443*/ 1444typedef enum KOption { 1445 Kint, /* signed integers */ 1446 Kuint, /* unsigned integers */ 1447#ifndef _KERNEL 1448 Kfloat, /* single-precision floating-point numbers */ 1449 Knumber, /* Lua "native" floating-point numbers */ 1450 Kdouble, /* double-precision floating-point numbers */ 1451#endif /* _KERNEL */ 1452 Kchar, /* fixed-length strings */ 1453 Kstring, /* strings with prefixed length */ 1454 Kzstr, /* zero-terminated strings */ 1455 Kpadding, /* padding */ 1456 Kpaddalign, /* padding for alignment */ 1457 Knop /* no-op (configuration or spaces) */ 1458} KOption; 1459 1460 1461/* 1462** Read an integer numeral from string 'fmt' or return 'df' if 1463** there is no numeral 1464*/ 1465static int digit (int c) { return '0' <= c && c <= '9'; } 1466 1467static int getnum (const char **fmt, int df) { 1468 if (!digit(**fmt)) /* no number? */ 1469 return df; /* return default value */ 1470 else { 1471 int a = 0; 1472 do { 1473 a = a*10 + (*((*fmt)++) - '0'); 1474 } while (digit(**fmt) && a <= ((int)MAXSIZE - 9)/10); 1475 return a; 1476 } 1477} 1478 1479 1480/* 1481** Read an integer numeral and raises an error if it is larger 1482** than the maximum size for integers. 1483*/ 1484static int getnumlimit (Header *h, const char **fmt, int df) { 1485 int sz = getnum(fmt, df); 1486 if (l_unlikely(sz > MAXINTSIZE || sz <= 0)) 1487 return luaL_error(h->L, "integral size (%d) out of limits [1,%d]", 1488 sz, MAXINTSIZE); 1489 return sz; 1490} 1491 1492 1493/* 1494** Initialize Header 1495*/ 1496static void initheader (lua_State *L, Header *h) { 1497 h->L = L; 1498 h->islittle = nativeendian.little; 1499 h->maxalign = 1; 1500} 1501 1502 1503/* 1504** Read and classify next option. 'size' is filled with option's size. 1505*/ 1506static KOption getoption (Header *h, const char **fmt, int *size) { 1507 /* dummy structure to get native alignment requirements */ 1508 struct cD { char c; union { LUAI_MAXALIGN; } u; }; 1509 int opt = *((*fmt)++); 1510 *size = 0; /* default */ 1511 switch (opt) { 1512 case 'b': *size = sizeof(char); return Kint; 1513 case 'B': *size = sizeof(char); return Kuint; 1514 case 'h': *size = sizeof(short); return Kint; 1515 case 'H': *size = sizeof(short); return Kuint; 1516 case 'l': *size = sizeof(long); return Kint; 1517 case 'L': *size = sizeof(long); return Kuint; 1518 case 'j': *size = sizeof(lua_Integer); return Kint; 1519 case 'J': *size = sizeof(lua_Integer); return Kuint; 1520 case 'T': *size = sizeof(size_t); return Kuint; 1521#ifndef _KERNEL 1522 case 'f': *size = sizeof(float); return Kfloat; 1523 case 'd': *size = sizeof(double); return Kdouble; 1524#endif 1525#ifndef _KERNEL 1526 case 'n': *size = sizeof(lua_Number); return Knumber; 1527#else /* _KERNEL */ 1528 case 'n': *size = sizeof(lua_Number); return Kint; 1529#endif 1530 case 'i': *size = getnumlimit(h, fmt, sizeof(int)); return Kint; 1531 case 'I': *size = getnumlimit(h, fmt, sizeof(int)); return Kuint; 1532 case 's': *size = getnumlimit(h, fmt, sizeof(size_t)); return Kstring; 1533 case 'c': 1534 *size = getnum(fmt, -1); 1535 if (l_unlikely(*size == -1)) 1536 luaL_error(h->L, "missing size for format option 'c'"); 1537 return Kchar; 1538 case 'z': return Kzstr; 1539 case 'x': *size = 1; return Kpadding; 1540 case 'X': return Kpaddalign; 1541 case ' ': break; 1542 case '<': h->islittle = 1; break; 1543 case '>': h->islittle = 0; break; 1544 case '=': h->islittle = nativeendian.little; break; 1545 case '!': { 1546 const int maxalign = offsetof(struct cD, u); 1547 h->maxalign = getnumlimit(h, fmt, maxalign); 1548 break; 1549 } 1550 default: luaL_error(h->L, "invalid format option '%c'", opt); 1551 } 1552 return Knop; 1553} 1554 1555 1556/* 1557** Read, classify, and fill other details about the next option. 1558** 'psize' is filled with option's size, 'notoalign' with its 1559** alignment requirements. 1560** Local variable 'size' gets the size to be aligned. (Kpadal option 1561** always gets its full alignment, other options are limited by 1562** the maximum alignment ('maxalign'). Kchar option needs no alignment 1563** despite its size. 1564*/ 1565static KOption getdetails (Header *h, size_t totalsize, 1566 const char **fmt, int *psize, int *ntoalign) { 1567 KOption opt = getoption(h, fmt, psize); 1568 int align = *psize; /* usually, alignment follows size */ 1569 if (opt == Kpaddalign) { /* 'X' gets alignment from following option */ 1570 if (**fmt == '\0' || getoption(h, fmt, &align) == Kchar || align == 0) 1571 luaL_argerror(h->L, 1, "invalid next option for option 'X'"); 1572 } 1573 if (align <= 1 || opt == Kchar) /* need no alignment? */ 1574 *ntoalign = 0; 1575 else { 1576 if (align > h->maxalign) /* enforce maximum alignment */ 1577 align = h->maxalign; 1578 if (l_unlikely((align & (align - 1)) != 0)) /* not a power of 2? */ 1579 luaL_argerror(h->L, 1, "format asks for alignment not power of 2"); 1580 *ntoalign = (align - (int)(totalsize & (align - 1))) & (align - 1); 1581 } 1582 return opt; 1583} 1584 1585 1586/* 1587** Pack integer 'n' with 'size' bytes and 'islittle' endianness. 1588** The final 'if' handles the case when 'size' is larger than 1589** the size of a Lua integer, correcting the extra sign-extension 1590** bytes if necessary (by default they would be zeros). 1591*/ 1592static void packint (luaL_Buffer *b, lua_Unsigned n, 1593 int islittle, int size, int neg) { 1594 char *buff = luaL_prepbuffsize(b, size); 1595 int i; 1596 buff[islittle ? 0 : size - 1] = (char)(n & MC); /* first byte */ 1597 for (i = 1; i < size; i++) { 1598 n >>= NB; 1599 buff[islittle ? i : size - 1 - i] = (char)(n & MC); 1600 } 1601 if (neg && size > SZINT) { /* negative number need sign extension? */ 1602 for (i = SZINT; i < size; i++) /* correct extra bytes */ 1603 buff[islittle ? i : size - 1 - i] = (char)MC; 1604 } 1605 luaL_addsize(b, size); /* add result to buffer */ 1606} 1607 1608 1609#ifndef _KERNEL 1610/* 1611** Copy 'size' bytes from 'src' to 'dest', correcting endianness if 1612** given 'islittle' is different from native endianness. 1613*/ 1614static void copywithendian (char *dest, const char *src, 1615 int size, int islittle) { 1616 if (islittle == nativeendian.little) 1617 memcpy(dest, src, size); 1618 else { 1619 dest += size - 1; 1620 while (size-- != 0) 1621 *(dest--) = *(src++); 1622 } 1623} 1624#endif /* _KERNEL */ 1625 1626 1627static int str_pack (lua_State *L) { 1628 luaL_Buffer b; 1629 Header h; 1630 const char *fmt = luaL_checkstring(L, 1); /* format string */ 1631 int arg = 1; /* current argument to pack */ 1632 size_t totalsize = 0; /* accumulate total size of result */ 1633 initheader(L, &h); 1634 lua_pushnil(L); /* mark to separate arguments from string buffer */ 1635 luaL_buffinit(L, &b); 1636 while (*fmt != '\0') { 1637 int size, ntoalign; 1638 KOption opt = getdetails(&h, totalsize, &fmt, &size, &ntoalign); 1639 totalsize += ntoalign + size; 1640 while (ntoalign-- > 0) 1641 luaL_addchar(&b, LUAL_PACKPADBYTE); /* fill alignment */ 1642 arg++; 1643 switch (opt) { 1644 case Kint: { /* signed integers */ 1645 lua_Integer n = luaL_checkinteger(L, arg); 1646 if (size < SZINT) { /* need overflow check? */ 1647 lua_Integer lim = (lua_Integer)1 << ((size * NB) - 1); 1648 luaL_argcheck(L, -lim <= n && n < lim, arg, "integer overflow"); 1649 } 1650 packint(&b, (lua_Unsigned)n, h.islittle, size, (n < 0)); 1651 break; 1652 } 1653 case Kuint: { /* unsigned integers */ 1654 lua_Integer n = luaL_checkinteger(L, arg); 1655 if (size < SZINT) /* need overflow check? */ 1656 luaL_argcheck(L, (lua_Unsigned)n < ((lua_Unsigned)1 << (size * NB)), 1657 arg, "unsigned overflow"); 1658 packint(&b, (lua_Unsigned)n, h.islittle, size, 0); 1659 break; 1660 } 1661#ifndef _KERNEL 1662 case Kfloat: { /* C float */ 1663 float f = (float)luaL_checknumber(L, arg); /* get argument */ 1664 char *buff = luaL_prepbuffsize(&b, sizeof(f)); 1665 /* move 'f' to final result, correcting endianness if needed */ 1666 copywithendian(buff, (char *)&f, sizeof(f), h.islittle); 1667 luaL_addsize(&b, size); 1668 break; 1669 } 1670 case Knumber: { /* Lua float */ 1671 lua_Number f = luaL_checknumber(L, arg); /* get argument */ 1672 char *buff = luaL_prepbuffsize(&b, sizeof(f)); 1673 /* move 'f' to final result, correcting endianness if needed */ 1674 copywithendian(buff, (char *)&f, sizeof(f), h.islittle); 1675 luaL_addsize(&b, size); 1676 break; 1677 } 1678 case Kdouble: { /* C double */ 1679 double f = (double)luaL_checknumber(L, arg); /* get argument */ 1680 char *buff = luaL_prepbuffsize(&b, sizeof(f)); 1681 /* move 'f' to final result, correcting endianness if needed */ 1682 copywithendian(buff, (char *)&f, sizeof(f), h.islittle); 1683 luaL_addsize(&b, size); 1684 break; 1685 } 1686#endif /* _KERNEL */ 1687 case Kchar: { /* fixed-size string */ 1688 size_t len; 1689 const char *s = luaL_checklstring(L, arg, &len); 1690 luaL_argcheck(L, len <= (size_t)size, arg, 1691 "string longer than given size"); 1692 luaL_addlstring(&b, s, len); /* add string */ 1693 while (len++ < (size_t)size) /* pad extra space */ 1694 luaL_addchar(&b, LUAL_PACKPADBYTE); 1695 break; 1696 } 1697 case Kstring: { /* strings with length count */ 1698 size_t len; 1699 const char *s = luaL_checklstring(L, arg, &len); 1700 luaL_argcheck(L, size >= (int)sizeof(size_t) || 1701 len < ((size_t)1 << (size * NB)), 1702 arg, "string length does not fit in given size"); 1703 packint(&b, (lua_Unsigned)len, h.islittle, size, 0); /* pack length */ 1704 luaL_addlstring(&b, s, len); 1705 totalsize += len; 1706 break; 1707 } 1708 case Kzstr: { /* zero-terminated string */ 1709 size_t len; 1710 const char *s = luaL_checklstring(L, arg, &len); 1711 luaL_argcheck(L, strlen(s) == len, arg, "string contains zeros"); 1712 luaL_addlstring(&b, s, len); 1713 luaL_addchar(&b, '\0'); /* add zero at the end */ 1714 totalsize += len + 1; 1715 break; 1716 } 1717 case Kpadding: luaL_addchar(&b, LUAL_PACKPADBYTE); /* FALLTHROUGH */ 1718 case Kpaddalign: case Knop: 1719 arg--; /* undo increment */ 1720 break; 1721 } 1722 } 1723 luaL_pushresult(&b); 1724 return 1; 1725} 1726 1727 1728static int str_packsize (lua_State *L) { 1729 Header h; 1730 const char *fmt = luaL_checkstring(L, 1); /* format string */ 1731 size_t totalsize = 0; /* accumulate total size of result */ 1732 initheader(L, &h); 1733 while (*fmt != '\0') { 1734 int size, ntoalign; 1735 KOption opt = getdetails(&h, totalsize, &fmt, &size, &ntoalign); 1736 luaL_argcheck(L, opt != Kstring && opt != Kzstr, 1, 1737 "variable-length format"); 1738 size += ntoalign; /* total space used by option */ 1739 luaL_argcheck(L, totalsize <= MAXSIZE - size, 1, 1740 "format result too large"); 1741 totalsize += size; 1742 } 1743 lua_pushinteger(L, (lua_Integer)totalsize); 1744 return 1; 1745} 1746 1747 1748/* 1749** Unpack an integer with 'size' bytes and 'islittle' endianness. 1750** If size is smaller than the size of a Lua integer and integer 1751** is signed, must do sign extension (propagating the sign to the 1752** higher bits); if size is larger than the size of a Lua integer, 1753** it must check the unread bytes to see whether they do not cause an 1754** overflow. 1755*/ 1756static lua_Integer unpackint (lua_State *L, const char *str, 1757 int islittle, int size, int issigned) { 1758 lua_Unsigned res = 0; 1759 int i; 1760 int limit = (size <= SZINT) ? size : SZINT; 1761 for (i = limit - 1; i >= 0; i--) { 1762 res <<= NB; 1763 res |= (lua_Unsigned)(unsigned char)str[islittle ? i : size - 1 - i]; 1764 } 1765 if (size < SZINT) { /* real size smaller than lua_Integer? */ 1766 if (issigned) { /* needs sign extension? */ 1767 lua_Unsigned mask = (lua_Unsigned)1 << (size*NB - 1); 1768 res = ((res ^ mask) - mask); /* do sign extension */ 1769 } 1770 } 1771 else if (size > SZINT) { /* must check unread bytes */ 1772 int mask = (!issigned || (lua_Integer)res >= 0) ? 0 : MC; 1773 for (i = limit; i < size; i++) { 1774 if (l_unlikely((unsigned char)str[islittle ? i : size - 1 - i] != mask)) 1775 luaL_error(L, "%d-byte integer does not fit into Lua Integer", size); 1776 } 1777 } 1778 return (lua_Integer)res; 1779} 1780 1781 1782static int str_unpack (lua_State *L) { 1783 Header h; 1784 const char *fmt = luaL_checkstring(L, 1); 1785 size_t ld; 1786 const char *data = luaL_checklstring(L, 2, &ld); 1787 size_t pos = posrelatI(luaL_optinteger(L, 3, 1), ld) - 1; 1788 int n = 0; /* number of results */ 1789 luaL_argcheck(L, pos <= ld, 3, "initial position out of string"); 1790 initheader(L, &h); 1791 while (*fmt != '\0') { 1792 int size, ntoalign; 1793 KOption opt = getdetails(&h, pos, &fmt, &size, &ntoalign); 1794 luaL_argcheck(L, (size_t)ntoalign + size <= ld - pos, 2, 1795 "data string too short"); 1796 pos += ntoalign; /* skip alignment */ 1797 /* stack space for item + next position */ 1798 luaL_checkstack(L, 2, "too many results"); 1799 n++; 1800 switch (opt) { 1801 case Kint: 1802 case Kuint: { 1803 lua_Integer res = unpackint(L, data + pos, h.islittle, size, 1804 (opt == Kint)); 1805 lua_pushinteger(L, res); 1806 break; 1807 } 1808#ifndef _KERNEL 1809 case Kfloat: { 1810 float f; 1811 copywithendian((char *)&f, data + pos, sizeof(f), h.islittle); 1812 lua_pushnumber(L, (lua_Number)f); 1813 break; 1814 } 1815 case Knumber: { 1816 lua_Number f; 1817 copywithendian((char *)&f, data + pos, sizeof(f), h.islittle); 1818 lua_pushnumber(L, f); 1819 break; 1820 } 1821 case Kdouble: { 1822 double f; 1823 copywithendian((char *)&f, data + pos, sizeof(f), h.islittle); 1824 lua_pushnumber(L, (lua_Number)f); 1825 break; 1826 } 1827#endif /* _KERNEL */ 1828 case Kchar: { 1829 lua_pushlstring(L, data + pos, size); 1830 break; 1831 } 1832 case Kstring: { 1833 size_t len = (size_t)unpackint(L, data + pos, h.islittle, size, 0); 1834 luaL_argcheck(L, len <= ld - pos - size, 2, "data string too short"); 1835 lua_pushlstring(L, data + pos + size, len); 1836 pos += len; /* skip string */ 1837 break; 1838 } 1839 case Kzstr: { 1840 size_t len = strlen(data + pos); 1841 luaL_argcheck(L, pos + len < ld, 2, 1842 "unfinished string for format 'z'"); 1843 lua_pushlstring(L, data + pos, len); 1844 pos += len + 1; /* skip string plus final '\0' */ 1845 break; 1846 } 1847 case Kpaddalign: case Kpadding: case Knop: 1848 n--; /* undo increment */ 1849 break; 1850 } 1851 pos += size; 1852 } 1853 lua_pushinteger(L, pos + 1); /* next position */ 1854 return n + 1; 1855} 1856 1857/* }====================================================== */ 1858 1859 1860static const luaL_Reg strlib[] = { 1861 {"byte", str_byte}, 1862 {"char", str_char}, 1863 {"dump", str_dump}, 1864 {"find", str_find}, 1865 {"format", str_format}, 1866 {"gmatch", gmatch}, 1867 {"gsub", str_gsub}, 1868 {"len", str_len}, 1869 {"lower", str_lower}, 1870 {"match", str_match}, 1871 {"rep", str_rep}, 1872 {"reverse", str_reverse}, 1873 {"sub", str_sub}, 1874 {"upper", str_upper}, 1875 {"pack", str_pack}, 1876 {"packsize", str_packsize}, 1877 {"unpack", str_unpack}, 1878 {NULL, NULL} 1879}; 1880 1881 1882static void createmetatable (lua_State *L) { 1883 /* table to be metatable for strings */ 1884 luaL_newlibtable(L, stringmetamethods); 1885 luaL_setfuncs(L, stringmetamethods, 0); 1886 lua_pushliteral(L, ""); /* dummy string */ 1887 lua_pushvalue(L, -2); /* copy table */ 1888 lua_setmetatable(L, -2); /* set table as metatable for strings */ 1889 lua_pop(L, 1); /* pop dummy string */ 1890 lua_pushvalue(L, -2); /* get string library */ 1891 lua_setfield(L, -2, "__index"); /* metatable.__index = string */ 1892 lua_pop(L, 1); /* pop metatable */ 1893} 1894 1895 1896/* 1897** Open string library 1898*/ 1899LUAMOD_API int luaopen_string (lua_State *L) { 1900 luaL_newlib(L, strlib); 1901 createmetatable(L); 1902 return 1; 1903} 1904 1905