1/* 2 * $Id: encoding.xs,v 0.3 2002/04/21 22:14:41 dankogai Exp $ 3 */ 4 5#define PERL_NO_GET_CONTEXT 6#include "EXTERN.h" 7#include "perl.h" 8#include "XSUB.h" 9#define U8 U8 10 11#define OUR_DEFAULT_FB "Encode::PERLQQ" 12 13#if defined(USE_PERLIO) && !defined(USE_SFIO) 14 15/* Define an encoding "layer" in the perliol.h sense. 16 17 The layer defined here "inherits" in an object-oriented sense from 18 the "perlio" layer with its PerlIOBuf_* "methods". The 19 implementation is particularly efficient as until Encode settles 20 down there is no point in tryint to tune it. 21 22 The layer works by overloading the "fill" and "flush" methods. 23 24 "fill" calls "SUPER::fill" in perl terms, then calls the encode OO 25 perl API to convert the encoded data to UTF-8 form, then copies it 26 back to the buffer. The "base class's" read methods then see the 27 UTF-8 data. 28 29 "flush" transforms the UTF-8 data deposited by the "base class's 30 write method in the buffer back into the encoded form using the 31 encode OO perl API, then copies data back into the buffer and calls 32 "SUPER::flush. 33 34 Note that "flush" is _also_ called for read mode - we still do the 35 (back)-translate so that the base class's "flush" sees the 36 correct number of encoded chars for positioning the seek 37 pointer. (This double translation is the worst performance issue - 38 particularly with all-perl encode engine.) 39 40*/ 41 42#include "perliol.h" 43 44typedef struct { 45 PerlIOBuf base; /* PerlIOBuf stuff */ 46 SV *bufsv; /* buffer seen by layers above */ 47 SV *dataSV; /* data we have read from layer below */ 48 SV *enc; /* the encoding object */ 49 SV *chk; /* CHECK in Encode methods */ 50 int flags; /* Flags currently just needs lines */ 51} PerlIOEncode; 52 53#define NEEDS_LINES 1 54 55SV * 56PerlIOEncode_getarg(pTHX_ PerlIO * f, CLONE_PARAMS * param, int flags) 57{ 58 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode); 59 SV *sv = &PL_sv_undef; 60 if (e->enc) { 61 dSP; 62 /* Not 100% sure stack swap is right thing to do during dup ... */ 63 PUSHSTACKi(PERLSI_MAGIC); 64 SPAGAIN; 65 ENTER; 66 SAVETMPS; 67 PUSHMARK(sp); 68 XPUSHs(e->enc); 69 PUTBACK; 70 if (call_method("name", G_SCALAR) == 1) { 71 SPAGAIN; 72 sv = newSVsv(POPs); 73 PUTBACK; 74 } 75 FREETMPS; 76 LEAVE; 77 POPSTACK; 78 } 79 return sv; 80} 81 82IV 83PerlIOEncode_pushed(pTHX_ PerlIO * f, const char *mode, SV * arg, PerlIO_funcs *tab) 84{ 85 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode); 86 dSP; 87 IV code = PerlIOBuf_pushed(aTHX_ f, mode, Nullsv,tab); 88 SV *result = Nullsv; 89 90 PUSHSTACKi(PERLSI_MAGIC); 91 SPAGAIN; 92 93 ENTER; 94 SAVETMPS; 95 96 PUSHMARK(sp); 97 XPUSHs(arg); 98 PUTBACK; 99 if (call_pv("Encode::find_encoding", G_SCALAR) != 1) { 100 /* should never happen */ 101 Perl_die(aTHX_ "Encode::find_encoding did not return a value"); 102 return -1; 103 } 104 SPAGAIN; 105 result = POPs; 106 PUTBACK; 107 108 if (!SvROK(result) || !SvOBJECT(SvRV(result))) { 109 e->enc = Nullsv; 110 Perl_warner(aTHX_ packWARN(WARN_IO), "Cannot find encoding \"%" SVf "\"", 111 arg); 112 errno = EINVAL; 113 code = -1; 114 } 115 else { 116 117 /* $enc->renew */ 118 PUSHMARK(sp); 119 XPUSHs(result); 120 PUTBACK; 121 if (call_method("renew",G_SCALAR|G_EVAL) != 1 || SvTRUE(ERRSV)) { 122 Perl_warner(aTHX_ packWARN(WARN_IO), "\"%" SVf "\" does not support renew method", 123 arg); 124 } 125 else { 126 SPAGAIN; 127 result = POPs; 128 PUTBACK; 129 } 130 e->enc = newSVsv(result); 131 PUSHMARK(sp); 132 XPUSHs(e->enc); 133 PUTBACK; 134 if (call_method("needs_lines",G_SCALAR|G_EVAL) != 1 || SvTRUE(ERRSV)) { 135 Perl_warner(aTHX_ packWARN(WARN_IO), "\"%" SVf "\" does not support needs_lines", 136 arg); 137 } 138 else { 139 SPAGAIN; 140 result = POPs; 141 PUTBACK; 142 if (SvTRUE(result)) { 143 e->flags |= NEEDS_LINES; 144 } 145 } 146 PerlIOBase(f)->flags |= PERLIO_F_UTF8; 147 } 148 149 e->chk = newSVsv(get_sv("PerlIO::encoding::fallback", 0)); 150 151 FREETMPS; 152 LEAVE; 153 POPSTACK; 154 return code; 155} 156 157IV 158PerlIOEncode_popped(pTHX_ PerlIO * f) 159{ 160 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode); 161 if (e->enc) { 162 SvREFCNT_dec(e->enc); 163 e->enc = Nullsv; 164 } 165 if (e->bufsv) { 166 SvREFCNT_dec(e->bufsv); 167 e->bufsv = Nullsv; 168 } 169 if (e->dataSV) { 170 SvREFCNT_dec(e->dataSV); 171 e->dataSV = Nullsv; 172 } 173 if (e->chk) { 174 SvREFCNT_dec(e->chk); 175 e->chk = Nullsv; 176 } 177 return 0; 178} 179 180STDCHAR * 181PerlIOEncode_get_base(pTHX_ PerlIO * f) 182{ 183 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode); 184 if (!e->base.bufsiz) 185 e->base.bufsiz = 1024; 186 if (!e->bufsv) { 187 e->bufsv = newSV(e->base.bufsiz); 188 sv_setpvn(e->bufsv, "", 0); 189 } 190 e->base.buf = (STDCHAR *) SvPVX(e->bufsv); 191 if (!e->base.ptr) 192 e->base.ptr = e->base.buf; 193 if (!e->base.end) 194 e->base.end = e->base.buf; 195 if (e->base.ptr < e->base.buf 196 || e->base.ptr > e->base.buf + SvLEN(e->bufsv)) { 197 Perl_warn(aTHX_ " ptr %p(%p)%p", e->base.buf, e->base.ptr, 198 e->base.buf + SvLEN(e->bufsv)); 199 abort(); 200 } 201 if (SvLEN(e->bufsv) < e->base.bufsiz) { 202 SSize_t poff = e->base.ptr - e->base.buf; 203 SSize_t eoff = e->base.end - e->base.buf; 204 e->base.buf = (STDCHAR *) SvGROW(e->bufsv, e->base.bufsiz); 205 e->base.ptr = e->base.buf + poff; 206 e->base.end = e->base.buf + eoff; 207 } 208 if (e->base.ptr < e->base.buf 209 || e->base.ptr > e->base.buf + SvLEN(e->bufsv)) { 210 Perl_warn(aTHX_ " ptr %p(%p)%p", e->base.buf, e->base.ptr, 211 e->base.buf + SvLEN(e->bufsv)); 212 abort(); 213 } 214 return e->base.buf; 215} 216 217IV 218PerlIOEncode_fill(pTHX_ PerlIO * f) 219{ 220 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode); 221 dSP; 222 IV code = 0; 223 PerlIO *n; 224 SSize_t avail; 225 226 if (PerlIO_flush(f) != 0) 227 return -1; 228 n = PerlIONext(f); 229 if (!PerlIO_fast_gets(n)) { 230 /* Things get too messy if we don't have a buffer layer 231 push a :perlio to do the job */ 232 char mode[8]; 233 n = PerlIO_push(aTHX_ n, &PerlIO_perlio, PerlIO_modestr(f,mode), Nullsv); 234 if (!n) { 235 Perl_die(aTHX_ "panic: cannot push :perlio for %p",f); 236 } 237 } 238 PUSHSTACKi(PERLSI_MAGIC); 239 SPAGAIN; 240 ENTER; 241 SAVETMPS; 242 retry: 243 avail = PerlIO_get_cnt(n); 244 if (avail <= 0) { 245 avail = PerlIO_fill(n); 246 if (avail == 0) { 247 avail = PerlIO_get_cnt(n); 248 } 249 else { 250 if (!PerlIO_error(n) && PerlIO_eof(n)) 251 avail = 0; 252 } 253 } 254 if (avail > 0 || (e->flags & NEEDS_LINES)) { 255 STDCHAR *ptr = PerlIO_get_ptr(n); 256 SSize_t use = (avail >= 0) ? avail : 0; 257 SV *uni; 258 char *s; 259 STRLEN len = 0; 260 e->base.ptr = e->base.end = (STDCHAR *) Nullch; 261 (void) PerlIOEncode_get_base(aTHX_ f); 262 if (!e->dataSV) 263 e->dataSV = newSV(0); 264 if (SvTYPE(e->dataSV) < SVt_PV) { 265 sv_upgrade(e->dataSV,SVt_PV); 266 } 267 if (e->flags & NEEDS_LINES) { 268 /* Encoding needs whole lines (e.g. iso-2022-*) 269 search back from end of available data for 270 and line marker 271 */ 272 STDCHAR *nl = ptr+use-1; 273 while (nl >= ptr) { 274 if (*nl == '\n') { 275 break; 276 } 277 nl--; 278 } 279 if (nl >= ptr && *nl == '\n') { 280 /* found a line - take up to and including that */ 281 use = (nl+1)-ptr; 282 } 283 else if (avail > 0) { 284 /* No line, but not EOF - append avail to the pending data */ 285 sv_catpvn(e->dataSV, (char*)ptr, use); 286 PerlIO_set_ptrcnt(n, ptr+use, 0); 287 goto retry; 288 } 289 else if (!SvCUR(e->dataSV)) { 290 goto end_of_file; 291 } 292 } 293 if (SvCUR(e->dataSV)) { 294 /* something left over from last time - create a normal 295 SV with new data appended 296 */ 297 if (use + SvCUR(e->dataSV) > e->base.bufsiz) { 298 if (e->flags & NEEDS_LINES) { 299 /* Have to grow buffer */ 300 e->base.bufsiz = use + SvCUR(e->dataSV); 301 PerlIOEncode_get_base(aTHX_ f); 302 } 303 else { 304 use = e->base.bufsiz - SvCUR(e->dataSV); 305 } 306 } 307 sv_catpvn(e->dataSV,(char*)ptr,use); 308 } 309 else { 310 /* Create a "dummy" SV to represent the available data from layer below */ 311 if (SvLEN(e->dataSV) && SvPVX(e->dataSV)) { 312 Safefree(SvPVX(e->dataSV)); 313 } 314 if (use > (SSize_t)e->base.bufsiz) { 315 if (e->flags & NEEDS_LINES) { 316 /* Have to grow buffer */ 317 e->base.bufsiz = use; 318 PerlIOEncode_get_base(aTHX_ f); 319 } 320 else { 321 use = e->base.bufsiz; 322 } 323 } 324 SvPVX(e->dataSV) = (char *) ptr; 325 SvLEN(e->dataSV) = 0; /* Hands off sv.c - it isn't yours */ 326 SvCUR_set(e->dataSV,use); 327 SvPOK_only(e->dataSV); 328 } 329 SvUTF8_off(e->dataSV); 330 PUSHMARK(sp); 331 XPUSHs(e->enc); 332 XPUSHs(e->dataSV); 333 XPUSHs(e->chk); 334 PUTBACK; 335 if (call_method("decode", G_SCALAR) != 1) { 336 Perl_die(aTHX_ "panic: decode did not return a value"); 337 } 338 SPAGAIN; 339 uni = POPs; 340 PUTBACK; 341 /* Now get translated string (forced to UTF-8) and use as buffer */ 342 if (SvPOK(uni)) { 343 s = SvPVutf8(uni, len); 344#ifdef PARANOID_ENCODE_CHECKS 345 if (len && !is_utf8_string((U8*)s,len)) { 346 Perl_warn(aTHX_ "panic: decode did not return UTF-8 '%.*s'",(int) len,s); 347 } 348#endif 349 } 350 if (len > 0) { 351 /* Got _something */ 352 /* if decode gave us back dataSV then data may vanish when 353 we do ptrcnt adjust - so take our copy now. 354 (The copy is a pain - need a put-it-here option for decode.) 355 */ 356 sv_setpvn(e->bufsv,s,len); 357 e->base.ptr = e->base.buf = (STDCHAR*)SvPVX(e->bufsv); 358 e->base.end = e->base.ptr + SvCUR(e->bufsv); 359 PerlIOBase(f)->flags |= PERLIO_F_RDBUF; 360 SvUTF8_on(e->bufsv); 361 362 /* Adjust ptr/cnt not taking anything which 363 did not translate - not clear this is a win */ 364 /* compute amount we took */ 365 use -= SvCUR(e->dataSV); 366 PerlIO_set_ptrcnt(n, ptr+use, (avail-use)); 367 /* and as we did not take it it isn't pending */ 368 SvCUR_set(e->dataSV,0); 369 } else { 370 /* Got nothing - assume partial character so we need some more */ 371 /* Make sure e->dataSV is a normal SV before re-filling as 372 buffer alias will change under us 373 */ 374 s = SvPV(e->dataSV,len); 375 sv_setpvn(e->dataSV,s,len); 376 PerlIO_set_ptrcnt(n, ptr+use, (avail-use)); 377 goto retry; 378 } 379 } 380 else { 381 end_of_file: 382 code = -1; 383 if (avail == 0) 384 PerlIOBase(f)->flags |= PERLIO_F_EOF; 385 else 386 PerlIOBase(f)->flags |= PERLIO_F_ERROR; 387 } 388 FREETMPS; 389 LEAVE; 390 POPSTACK; 391 return code; 392} 393 394IV 395PerlIOEncode_flush(pTHX_ PerlIO * f) 396{ 397 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode); 398 IV code = 0; 399 400 if (e->bufsv) { 401 dSP; 402 SV *str; 403 char *s; 404 STRLEN len; 405 SSize_t count = 0; 406 if ((PerlIOBase(f)->flags & PERLIO_F_WRBUF) && (e->base.ptr > e->base.buf)) { 407 /* Write case - encode the buffer and write() to layer below */ 408 PUSHSTACKi(PERLSI_MAGIC); 409 SPAGAIN; 410 ENTER; 411 SAVETMPS; 412 PUSHMARK(sp); 413 XPUSHs(e->enc); 414 SvCUR_set(e->bufsv, e->base.ptr - e->base.buf); 415 SvUTF8_on(e->bufsv); 416 XPUSHs(e->bufsv); 417 XPUSHs(e->chk); 418 PUTBACK; 419 if (call_method("encode", G_SCALAR) != 1) { 420 Perl_die(aTHX_ "panic: encode did not return a value"); 421 } 422 SPAGAIN; 423 str = POPs; 424 PUTBACK; 425 s = SvPV(str, len); 426 count = PerlIO_write(PerlIONext(f),s,len); 427 if ((STRLEN)count != len) { 428 code = -1; 429 } 430 FREETMPS; 431 LEAVE; 432 POPSTACK; 433 if (PerlIO_flush(PerlIONext(f)) != 0) { 434 code = -1; 435 } 436 if (SvCUR(e->bufsv)) { 437 /* Did not all translate */ 438 e->base.ptr = e->base.buf+SvCUR(e->bufsv); 439 return code; 440 } 441 } 442 else if ((PerlIOBase(f)->flags & PERLIO_F_RDBUF)) { 443 /* read case */ 444 /* if we have any untranslated stuff then unread that first */ 445 /* FIXME - unread is fragile is there a better way ? */ 446 if (e->dataSV && SvCUR(e->dataSV)) { 447 s = SvPV(e->dataSV, len); 448 count = PerlIO_unread(PerlIONext(f),s,len); 449 if ((STRLEN)count != len) { 450 code = -1; 451 } 452 SvCUR_set(e->dataSV,0); 453 } 454 /* See if there is anything left in the buffer */ 455 if (e->base.ptr < e->base.end) { 456 /* Bother - have unread data. 457 re-encode and unread() to layer below 458 */ 459 PUSHSTACKi(PERLSI_MAGIC); 460 SPAGAIN; 461 ENTER; 462 SAVETMPS; 463 str = sv_newmortal(); 464 sv_upgrade(str, SVt_PV); 465 SvPVX(str) = (char*)e->base.ptr; 466 SvLEN(str) = 0; 467 SvCUR_set(str, e->base.end - e->base.ptr); 468 SvPOK_only(str); 469 SvUTF8_on(str); 470 PUSHMARK(sp); 471 XPUSHs(e->enc); 472 XPUSHs(str); 473 XPUSHs(e->chk); 474 PUTBACK; 475 if (call_method("encode", G_SCALAR) != 1) { 476 Perl_die(aTHX_ "panic: encode did not return a value"); 477 } 478 SPAGAIN; 479 str = POPs; 480 PUTBACK; 481 s = SvPV(str, len); 482 count = PerlIO_unread(PerlIONext(f),s,len); 483 if ((STRLEN)count != len) { 484 code = -1; 485 } 486 FREETMPS; 487 LEAVE; 488 POPSTACK; 489 } 490 } 491 e->base.ptr = e->base.end = e->base.buf; 492 PerlIOBase(f)->flags &= ~(PERLIO_F_RDBUF | PERLIO_F_WRBUF); 493 } 494 return code; 495} 496 497IV 498PerlIOEncode_close(pTHX_ PerlIO * f) 499{ 500 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode); 501 IV code; 502 if (PerlIOBase(f)->flags & PERLIO_F_RDBUF) { 503 /* Discard partial character */ 504 if (e->dataSV) { 505 SvCUR_set(e->dataSV,0); 506 } 507 /* Don't back decode and unread any pending data */ 508 e->base.ptr = e->base.end = e->base.buf; 509 } 510 code = PerlIOBase_close(aTHX_ f); 511 if (e->bufsv) { 512 /* This should only fire for write case */ 513 if (e->base.buf && e->base.ptr > e->base.buf) { 514 Perl_croak(aTHX_ "Close with partial character"); 515 } 516 SvREFCNT_dec(e->bufsv); 517 e->bufsv = Nullsv; 518 } 519 e->base.buf = NULL; 520 e->base.ptr = NULL; 521 e->base.end = NULL; 522 PerlIOBase(f)->flags &= ~(PERLIO_F_RDBUF | PERLIO_F_WRBUF); 523 return code; 524} 525 526Off_t 527PerlIOEncode_tell(pTHX_ PerlIO * f) 528{ 529 PerlIOBuf *b = PerlIOSelf(f, PerlIOBuf); 530 /* Unfortunately the only way to get a postion is to (re-)translate, 531 the UTF8 we have in bufefr and then ask layer below 532 */ 533 PerlIO_flush(f); 534 if (b->buf && b->ptr > b->buf) { 535 Perl_croak(aTHX_ "Cannot tell at partial character"); 536 } 537 return PerlIO_tell(PerlIONext(f)); 538} 539 540PerlIO * 541PerlIOEncode_dup(pTHX_ PerlIO * f, PerlIO * o, 542 CLONE_PARAMS * params, int flags) 543{ 544 if ((f = PerlIOBase_dup(aTHX_ f, o, params, flags))) { 545 PerlIOEncode *fe = PerlIOSelf(f, PerlIOEncode); 546 PerlIOEncode *oe = PerlIOSelf(o, PerlIOEncode); 547 if (oe->enc) { 548 fe->enc = PerlIO_sv_dup(aTHX_ oe->enc, params); 549 } 550 } 551 return f; 552} 553 554SSize_t 555PerlIOEncode_write(pTHX_ PerlIO *f, const void *vbuf, Size_t count) 556{ 557 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode); 558 if (e->flags & NEEDS_LINES) { 559 SSize_t done = 0; 560 const char *ptr = (const char *) vbuf; 561 const char *end = ptr+count; 562 while (ptr < end) { 563 const char *nl = ptr; 564 while (nl < end && *nl++ != '\n') /* empty body */; 565 done = PerlIOBuf_write(aTHX_ f, ptr, nl-ptr); 566 if (done != nl-ptr) { 567 if (done > 0) { 568 ptr += done; 569 } 570 break; 571 } 572 ptr += done; 573 if (ptr[-1] == '\n') { 574 if (PerlIOEncode_flush(aTHX_ f) != 0) { 575 break; 576 } 577 } 578 } 579 return (SSize_t) (ptr - (const char *) vbuf); 580 } 581 else { 582 return PerlIOBuf_write(aTHX_ f, vbuf, count); 583 } 584} 585 586PerlIO_funcs PerlIO_encode = { 587 sizeof(PerlIO_funcs), 588 "encoding", 589 sizeof(PerlIOEncode), 590 PERLIO_K_BUFFERED|PERLIO_K_DESTRUCT, 591 PerlIOEncode_pushed, 592 PerlIOEncode_popped, 593 PerlIOBuf_open, 594 NULL, /* binmode - always pop */ 595 PerlIOEncode_getarg, 596 PerlIOBase_fileno, 597 PerlIOEncode_dup, 598 PerlIOBuf_read, 599 PerlIOBuf_unread, 600 PerlIOEncode_write, 601 PerlIOBuf_seek, 602 PerlIOEncode_tell, 603 PerlIOEncode_close, 604 PerlIOEncode_flush, 605 PerlIOEncode_fill, 606 PerlIOBase_eof, 607 PerlIOBase_error, 608 PerlIOBase_clearerr, 609 PerlIOBase_setlinebuf, 610 PerlIOEncode_get_base, 611 PerlIOBuf_bufsiz, 612 PerlIOBuf_get_ptr, 613 PerlIOBuf_get_cnt, 614 PerlIOBuf_set_ptrcnt, 615}; 616#endif /* encode layer */ 617 618MODULE = PerlIO::encoding PACKAGE = PerlIO::encoding 619 620PROTOTYPES: ENABLE 621 622BOOT: 623{ 624 SV *chk = get_sv("PerlIO::encoding::fallback", GV_ADD|GV_ADDMULTI); 625 /* 626 * we now "use Encode ()" here instead of 627 * PerlIO/encoding.pm. This avoids SEGV when ":encoding()" 628 * is invoked without prior "use Encode". -- dankogai 629 */ 630 PUSHSTACKi(PERLSI_MAGIC); 631 SPAGAIN; 632 if (!get_cv(OUR_DEFAULT_FB, 0)) { 633#if 0 634 /* This would just be an irritant now loading works */ 635 Perl_warner(aTHX_ packWARN(WARN_IO), ":encoding without 'use Encode'"); 636#endif 637 ENTER; 638 /* Encode needs a lot of stack - it is likely to move ... */ 639 PUTBACK; 640 /* The SV is magically freed by load_module */ 641 load_module(PERL_LOADMOD_NOIMPORT, newSVpvn("Encode", 6), Nullsv, Nullsv); 642 SPAGAIN; 643 LEAVE; 644 } 645 PUSHMARK(sp); 646 PUTBACK; 647 if (call_pv(OUR_DEFAULT_FB, G_SCALAR) != 1) { 648 /* should never happen */ 649 Perl_die(aTHX_ "%s did not return a value",OUR_DEFAULT_FB); 650 } 651 SPAGAIN; 652 sv_setsv(chk, POPs); 653 PUTBACK; 654#ifdef PERLIO_LAYERS 655 PerlIO_define_layer(aTHX_ &PerlIO_encode); 656#endif 657 POPSTACK; 658} 659