1 /* 2 $Id: Encode.xs,v 2.4 2005/05/16 18:46:36 dankogai Exp dankogai $ 3 */ 4 5 #define PERL_NO_GET_CONTEXT 6 #include "EXTERN.h" 7 #include "perl.h" 8 #include "XSUB.h" 9 #define U8 U8 10 #include "encode.h" 11 12 # define PERLIO_MODNAME "PerlIO::encoding" 13 # define PERLIO_FILENAME "PerlIO/encoding.pm" 14 15 /* set 1 or more to profile. t/encoding.t dumps core because of 16 Perl_warner and PerlIO don't work well */ 17 #define ENCODE_XS_PROFILE 0 18 19 /* set 0 to disable floating point to calculate buffer size for 20 encode_method(). 1 is recommended. 2 restores NI-S original */ 21 #define ENCODE_XS_USEFP 1 22 23 #define UNIMPLEMENTED(x,y) y x (SV *sv, char *encoding) {dTHX; \ 24 Perl_croak(aTHX_ "panic_unimplemented"); \ 25 return (y)0; /* fool picky compilers */ \ 26 } 27 /**/ 28 29 ###### UNIMPLEMENTED(_encoded_utf8_to_bytes, I32) 30 ###### UNIMPLEMENTED(_encoded_bytes_to_utf8, I32) 31 32 #define UTF8_ALLOW_STRICT 0 33 #define UTF8_ALLOW_NONSTRICT (UTF8_ALLOW_ANY & \ 34 ~(UTF8_ALLOW_CONTINUATION | \ 35 UTF8_ALLOW_NON_CONTINUATION | \ 36 UTF8_ALLOW_LONG)) 37 38 void 39 Encode_XSEncoding(pTHX_ encode_t * enc) 40 148 { 41 148 dSP; 42 148 HV *stash = gv_stashpv("Encode::XS", TRUE); 43 148 SV *sv = sv_bless(newRV_noinc(newSViv(PTR2IV(enc))), stash); 44 148 int i = 0; 45 148 PUSHMARK(sp); 46 148 XPUSHs(sv); 47 296 while (enc->name[i]) { 48 148 const char *name = enc->name[i++]; 49 148 XPUSHs(sv_2mortal(newSVpvn(name, strlen(name)))); 50 } 51 148 PUTBACK; 52 148 call_pv("Encode::define_encoding", G_DISCARD); 53 148 SvREFCNT_dec(sv); 54 } 55 56 void 57 call_failure(SV * routine, U8 * done, U8 * dest, U8 * orig) 58 ###### { 59 /* Exists for breakpointing */ 60 } 61 62 63 #define ERR_ENCODE_NOMAP "\"\\x{%04" UVxf "}\" does not map to %s" 64 #define ERR_DECODE_NOMAP "%s \"\\x%02" UVXf "\" does not map to Unicode" 65 66 static SV * 67 encode_method(pTHX_ encode_t * enc, encpage_t * dir, SV * src, 68 int check, STRLEN * offset, SV * term, int * retcode) 69 39206 { 70 39206 STRLEN slen; 71 39206 U8 *s = (U8 *) SvPV(src, slen); 72 39206 STRLEN tlen = slen; 73 39206 STRLEN ddone = 0; 74 39206 STRLEN sdone = 0; 75 76 /* We allocate slen+1. 77 PerlIO dumps core if this value is smaller than this. */ 78 39206 SV *dst = sv_2mortal(newSV(slen+1)); 79 39206 U8 *d = (U8 *)SvPVX(dst); 80 39206 STRLEN dlen = SvLEN(dst)-1; 81 39206 int code = 0; 82 39206 STRLEN trmlen = 0; 83 39206 U8 *trm = term ? (U8*) SvPV(term, trmlen) : NULL; 84 85 39206 if (offset) { 86 223 s += *offset; 87 223 if (slen > *offset){ /* safeguard against slen overflow */ 88 223 slen -= *offset; 89 }else{ 90 ###### slen = 0; 91 } 92 223 tlen = slen; 93 } 94 95 39206 if (slen == 0){ 96 46 SvCUR_set(dst, 0); 97 46 SvPOK_only(dst); 98 46 goto ENCODE_END; 99 } 100 101 40946 while( (code = do_encode(dir, s, &slen, d, dlen, &dlen, !check, 102 trm, trmlen)) ) 103 { 104 2164 SvCUR_set(dst, dlen+ddone); 105 2164 SvPOK_only(dst); 106 107 2164 if (code == ENCODE_FALLBACK || code == ENCODE_PARTIAL || 108 code == ENCODE_FOUND_TERM) { 109 1828 break; 110 } 111 1828 switch (code) { 112 case ENCODE_NOSPACE: 113 { 114 1139 STRLEN more = 0; /* make sure you initialize! */ 115 1139 STRLEN sleft; 116 1139 sdone += slen; 117 1139 ddone += dlen; 118 1139 sleft = tlen - sdone; 119 #if ENCODE_XS_PROFILE >= 2 120 Perl_warn(aTHX_ 121 "more=%d, sdone=%d, sleft=%d, SvLEN(dst)=%d\n", 122 more, sdone, sleft, SvLEN(dst)); 123 #endif 124 1139 if (sdone != 0) { /* has src ever been processed ? */ 125 #if ENCODE_XS_USEFP == 2 126 more = (1.0*tlen*SvLEN(dst)+sdone-1)/sdone 127 - SvLEN(dst); 128 #elif ENCODE_XS_USEFP 129 1139 more = (STRLEN)((1.0*SvLEN(dst)+1)/sdone * sleft); 130 #else 131 /* safe until SvLEN(dst) == MAX_INT/16 */ 132 more = (16*SvLEN(dst)+1)/sdone/16 * sleft; 133 #endif 134 } 135 1139 more += UTF8_MAXLEN; /* insurance policy */ 136 1139 d = (U8 *) SvGROW(dst, SvLEN(dst) + more); 137 /* dst need to grow need MORE bytes! */ 138 1139 if (ddone >= SvLEN(dst)) { 139 ###### Perl_croak(aTHX_ "Destination couldn't be grown."); 140 } 141 1139 dlen = SvLEN(dst)-ddone-1; 142 1139 d += ddone; 143 1139 s += slen; 144 1139 slen = tlen-sdone; 145 1139 continue; 146 } 147 case ENCODE_NOREP: 148 /* encoding */ 149 689 if (dir == enc->f_utf8) { 150 645 STRLEN clen; 151 645 UV ch = 152 utf8n_to_uvuni(s+slen, (SvCUR(src)-slen), 153 645 &clen, UTF8_ALLOW_ANY|UTF8_CHECK_ONLY); 154 645 if (check & ENCODE_DIE_ON_ERR) { 155 1 Perl_croak(aTHX_ ERR_ENCODE_NOMAP, 156 (UV)ch, enc->name[0]); 157 644 return &PL_sv_undef; /* never reaches but be safe */ 158 } 159 644 if (check & ENCODE_WARN_ON_ERR){ 160 130 Perl_warner(aTHX_ packWARN(WARN_UTF8), 161 ERR_ENCODE_NOMAP, (UV)ch, enc->name[0]); 162 } 163 644 if (check & ENCODE_RETURN_ON_ERR){ 164 2 goto ENCODE_SET_SRC; 165 } 166 642 if (check & (ENCODE_PERLQQ|ENCODE_HTMLCREF|ENCODE_XMLCREF)){ 167 386 SV* subchar = 168 newSVpvf(check & ENCODE_PERLQQ ? "\\x{%04"UVxf"}" : 169 check & ENCODE_HTMLCREF ? "&#%" UVuf ";" : 170 386 "&#x%" UVxf ";", (UV)ch); 171 386 sdone += slen + clen; 172 386 ddone += dlen + SvCUR(subchar); 173 386 sv_catsv(dst, subchar); 174 386 SvREFCNT_dec(subchar); 175 } else { 176 /* fallback char */ 177 256 sdone += slen + clen; 178 256 ddone += dlen + enc->replen; 179 256 sv_catpvn(dst, (char*)enc->rep, enc->replen); 180 } 181 } 182 /* decoding */ 183 else { 184 44 if (check & ENCODE_DIE_ON_ERR){ 185 2 Perl_croak(aTHX_ ERR_DECODE_NOMAP, 186 enc->name[0], (UV)s[slen]); 187 42 return &PL_sv_undef; /* never reaches but be safe */ 188 } 189 42 if (check & ENCODE_WARN_ON_ERR){ 190 ###### Perl_warner( 191 aTHX_ packWARN(WARN_UTF8), 192 ERR_DECODE_NOMAP, 193 enc->name[0], (UV)s[slen]); 194 } 195 42 if (check & ENCODE_RETURN_ON_ERR){ 196 37 goto ENCODE_SET_SRC; 197 } 198 5 if (check & 199 (ENCODE_PERLQQ|ENCODE_HTMLCREF|ENCODE_XMLCREF)){ 200 1 SV* subchar = newSVpvf("\\x%02" UVXf, (UV)s[slen]); 201 1 sdone += slen + 1; 202 1 ddone += dlen + SvCUR(subchar); 203 1 sv_catsv(dst, subchar); 204 1 SvREFCNT_dec(subchar); 205 } else { 206 4 sdone += slen + 1; 207 4 ddone += dlen + strlen(FBCHAR_UTF8); 208 4 sv_catpv(dst, FBCHAR_UTF8); 209 } 210 } 211 /* settle variables when fallback */ 212 647 d = (U8 *)SvEND(dst); 213 647 dlen = SvLEN(dst) - ddone - 1; 214 647 s = (U8*)SvPVX(src) + sdone; 215 647 slen = tlen - sdone; 216 647 break; 217 218 default: 219 ###### Perl_croak(aTHX_ "Unexpected code %d converting %s %s", 220 code, (dir == enc->f_utf8) ? "to" : "from", 221 enc->name[0]); 222 39157 return &PL_sv_undef; 223 } 224 } 225 ENCODE_SET_SRC: 226 39157 if (check && !(check & ENCODE_LEAVE_SRC)){ 227 16830 sdone = SvCUR(src) - (slen+sdone); 228 16830 if (sdone) { 229 152 sv_setpvn(src, (char*)s+slen, sdone); 230 } 231 16830 SvCUR_set(src, sdone); 232 } 233 /* warn("check = 0x%X, code = 0x%d\n", check, code); */ 234 235 39157 SvCUR_set(dst, dlen+ddone); 236 39157 SvPOK_only(dst); 237 238 #if ENCODE_XS_PROFILE 239 if (SvCUR(dst) > SvCUR(src)){ 240 Perl_warn(aTHX_ 241 "SvLEN(dst)=%d, SvCUR(dst)=%d. %d bytes unused(%f %%)\n", 242 SvLEN(dst), SvCUR(dst), SvLEN(dst) - SvCUR(dst), 243 (SvLEN(dst) - SvCUR(dst))*1.0/SvLEN(dst)*100.0); 244 } 245 #endif 246 247 39157 if (offset) 248 223 *offset += sdone + slen; 249 250 ENCODE_END: 251 39203 *SvEND(dst) = '\0'; 252 39203 if (retcode) *retcode = code; 253 39203 return dst; 254 } 255 256 static bool 257 strict_utf8(pTHX_ SV* sv) 258 352 { 259 352 HV* hv; 260 352 SV** svp; 261 352 sv = SvRV(sv); 262 352 if (!sv || SvTYPE(sv) != SVt_PVHV) 263 ###### return 0; 264 352 hv = (HV*)sv; 265 352 svp = hv_fetch(hv, "strict_utf8", 11, 0); 266 352 if (!svp) 267 270 return 0; 268 82 return SvTRUE(*svp); 269 } 270 271 static U8* 272 process_utf8(pTHX_ SV* dst, U8* s, U8* e, int check, 273 bool encode, bool strict, bool stop_at_partial) 274 335 { 275 335 UV uv; 276 335 STRLEN ulen; 277 278 335 SvPOK_only(dst); 279 335 SvCUR_set(dst,0); 280 281 65860 while (s < e) { 282 65563 if (UTF8_IS_INVARIANT(*s)) { 283 17432 sv_catpvn(dst, (char *)s, 1); 284 17432 s++; 285 17432 continue; 286 } 287 288 48131 if (UTF8_IS_START(*s)) { 289 47778 U8 skip = UTF8SKIP(s); 290 47778 if ((s + skip) > e) { 291 /* Partial character */ 292 /* XXX could check that rest of bytes are UTF8_IS_CONTINUATION(ch) */ 293 19 if (stop_at_partial) 294 8 break; 295 296 47759 goto malformed_byte; 297 } 298 299 47759 uv = utf8n_to_uvuni(s, e - s, &ulen, 300 UTF8_CHECK_ONLY | (strict ? UTF8_ALLOW_STRICT : 301 UTF8_ALLOW_NONSTRICT) 302 ); 303 #if 1 /* perl-5.8.6 and older do not check UTF8_ALLOW_LONG */ 304 47759 if (strict && uv > PERL_UNICODE_MAX) 305 6 ulen = -1; 306 #endif 307 47759 if (ulen == -1) { 308 316 if (strict) { 309 15 uv = utf8n_to_uvuni(s, e - s, &ulen, 310 UTF8_CHECK_ONLY | UTF8_ALLOW_NONSTRICT); 311 15 if (ulen == -1) 312 1 goto malformed_byte; 313 47443 goto malformed; 314 } 315 47443 goto malformed_byte; 316 } 317 318 319 /* Whole char is good */ 320 47443 sv_catpvn(dst,(char *)s,skip); 321 47443 s += skip; 322 47443 continue; 323 } 324 325 /* If we get here there is something wrong with alleged UTF-8 */ 326 malformed_byte: 327 666 uv = (UV)*s; 328 666 ulen = 1; 329 330 malformed: 331 680 if (check & ENCODE_DIE_ON_ERR){ 332 15 if (encode) 333 13 Perl_croak(aTHX_ ERR_ENCODE_NOMAP, uv, "utf8"); 334 else 335 2 Perl_croak(aTHX_ ERR_DECODE_NOMAP, "utf8", uv); 336 } 337 665 if (check & ENCODE_WARN_ON_ERR){ 338 136 if (encode) 339 ###### Perl_warner(aTHX_ packWARN(WARN_UTF8), 340 ERR_ENCODE_NOMAP, uv, "utf8"); 341 else 342 136 Perl_warner(aTHX_ packWARN(WARN_UTF8), 343 ERR_DECODE_NOMAP, "utf8", uv); 344 } 345 665 if (check & ENCODE_RETURN_ON_ERR) { 346 15 break; 347 } 348 650 if (check & (ENCODE_PERLQQ|ENCODE_HTMLCREF|ENCODE_XMLCREF)){ 349 391 SV* subchar = newSVpvf(check & ENCODE_PERLQQ ? (ulen == 1 ? "\\x%02" UVXf : "\\x{%04" UVXf "}"): 350 check & ENCODE_HTMLCREF ? "&#%" UVuf ";" : 351 391 "&#x%" UVxf ";", uv); 352 391 sv_catsv(dst, subchar); 353 391 SvREFCNT_dec(subchar); 354 } else { 355 259 sv_catpv(dst, FBCHAR_UTF8); 356 } 357 650 s += ulen; 358 } 359 320 *SvEND(dst) = '\0'; 360 361 320 return s; 362 } 363 364 365 MODULE = Encode PACKAGE = Encode::utf8 PREFIX = Method_ 366 367 PROTOTYPES: DISABLE 368 369 void 370 Method_decode_xs(obj,src,check = 0) 371 SV * obj 372 SV * src 373 int check 374 CODE: 375 { 376 297 STRLEN slen; 377 297 U8 *s = (U8 *) SvPV(src, slen); 378 297 U8 *e = (U8 *) SvEND(src); 379 297 SV *dst = newSV(slen>0?slen:1); /* newSV() abhors 0 -- inaba */ 380 381 /* 382 * PerlIO check -- we assume the object is of PerlIO if renewed 383 */ 384 297 int renewed = 0; 385 297 dSP; ENTER; SAVETMPS; 386 297 PUSHMARK(sp); 387 297 XPUSHs(obj); 388 297 PUTBACK; 389 297 if (call_method("renewed",G_SCALAR) == 1) { 390 297 SPAGAIN; 391 297 renewed = POPi; 392 297 PUTBACK; 393 #if 0 394 fprintf(stderr, "renewed == %d\n", renewed); 395 #endif 396 } 397 297 FREETMPS; LEAVE; 398 /* end PerlIO check */ 399 400 297 if (SvUTF8(src)) { 401 21 s = utf8_to_bytes(s,&slen); 402 21 if (s) { 403 21 SvCUR_set(src,slen); 404 21 SvUTF8_off(src); 405 21 e = s+slen; 406 } 407 else { 408 ###### croak("Cannot decode string with wide characters"); 409 } 410 } 411 412 297 s = process_utf8(aTHX_ dst, s, e, check, 0, strict_utf8(aTHX_ obj), renewed); 413 414 /* Clear out translated part of source unless asked not to */ 415 295 if (check && !(check & ENCODE_LEAVE_SRC)){ 416 40 slen = e-s; 417 40 if (slen) { 418 23 sv_setpvn(src, (char*)s, slen); 419 } 420 40 SvCUR_set(src, slen); 421 } 422 295 SvUTF8_on(dst); 423 295 ST(0) = sv_2mortal(dst); 424 295 XSRETURN(1); 425 } 426 427 void 428 Method_encode_xs(obj,src,check = 0) 429 SV * obj 430 SV * src 431 int check 432 CODE: 433 { 434 64 STRLEN slen; 435 64 U8 *s = (U8 *) SvPV(src, slen); 436 64 U8 *e = (U8 *) SvEND(src); 437 64 SV *dst = newSV(slen>0?slen:1); /* newSV() abhors 0 -- inaba */ 438 64 if (SvUTF8(src)) { 439 /* Already encoded */ 440 55 if (strict_utf8(aTHX_ obj)) { 441 38 s = process_utf8(aTHX_ dst, s, e, check, 1, 1, 0); 442 } 443 else { 444 /* trust it and just copy the octets */ 445 17 sv_setpvn(dst,(char *)s,(e-s)); 446 17 s = e; 447 } 448 } 449 else { 450 /* Native bytes - can always encode */ 451 9 U8 *d = (U8 *) SvGROW(dst, 2*slen+1); /* +1 or assertion will botch */ 452 33 while (s < e) { 453 24 UV uv = NATIVE_TO_UNI((UV) *s++); 454 24 if (UNI_IS_INVARIANT(uv)) 455 22 *d++ = (U8)UTF_TO_NATIVE(uv); 456 else { 457 2 *d++ = (U8)UTF8_EIGHT_BIT_HI(uv); 458 2 *d++ = (U8)UTF8_EIGHT_BIT_LO(uv); 459 } 460 } 461 9 SvCUR_set(dst, d- (U8 *)SvPVX(dst)); 462 9 *SvEND(dst) = '\0'; 463 } 464 465 /* Clear out translated part of source unless asked not to */ 466 51 if (check && !(check & ENCODE_LEAVE_SRC)){ 467 9 slen = e-s; 468 9 if (slen) { 469 ###### sv_setpvn(src, (char*)s, slen); 470 } 471 9 SvCUR_set(src, slen); 472 } 473 51 SvPOK_only(dst); 474 51 SvUTF8_off(dst); 475 51 ST(0) = sv_2mortal(dst); 476 51 XSRETURN(1); 477 } 478 479 MODULE = Encode PACKAGE = Encode::XS PREFIX = Method_ 480 481 PROTOTYPES: ENABLE 482 483 void 484 Method_renew(obj) 485 SV * obj 486 CODE: 487 { 488 53 XSRETURN(1); 489 } 490 491 int 492 Method_renewed(obj) 493 SV * obj 494 CODE: 495 ###### RETVAL = 0; 496 OUTPUT: 497 RETVAL 498 499 void 500 Method_name(obj) 501 SV * obj 502 CODE: 503 { 504 984 encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); 505 984 ST(0) = sv_2mortal(newSVpvn(enc->name[0],strlen(enc->name[0]))); 506 984 XSRETURN(1); 507 } 508 509 void 510 Method_cat_decode(obj, dst, src, off, term, check = 0) 511 SV * obj 512 SV * dst 513 SV * src 514 SV * off 515 SV * term 516 int check 517 CODE: 518 { 519 223 encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); 520 223 STRLEN offset = (STRLEN)SvIV(off); 521 223 int code = 0; 522 223 if (SvUTF8(src)) { 523 ###### sv_utf8_downgrade(src, FALSE); 524 } 525 sv_catsv(dst, encode_method(aTHX_ enc, enc->t_utf8, src, check, 526 223 &offset, term, &code)); 527 223 SvIV_set(off, (IV)offset); 528 223 if (code == ENCODE_FOUND_TERM) { 529 223 ST(0) = &PL_sv_yes; 530 }else{ 531 ###### ST(0) = &PL_sv_no; 532 } 533 223 XSRETURN(1); 534 } 535 536 void 537 Method_decode(obj,src,check = 0) 538 SV * obj 539 SV * src 540 int check 541 CODE: 542 { 543 4410 encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); 544 4410 if (SvUTF8(src)) { 545 248 sv_utf8_downgrade(src, FALSE); 546 } 547 4410 ST(0) = encode_method(aTHX_ enc, enc->t_utf8, src, check, 548 NULL, Nullsv, NULL); 549 4408 SvUTF8_on(ST(0)); 550 4408 XSRETURN(1); 551 } 552 553 void 554 Method_encode(obj,src,check = 0) 555 SV * obj 556 SV * src 557 int check 558 CODE: 559 { 560 34573 encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); 561 34573 sv_utf8_upgrade(src); 562 34573 ST(0) = encode_method(aTHX_ enc, enc->f_utf8, src, check, 563 NULL, Nullsv, NULL); 564 34572 XSRETURN(1); 565 } 566 567 void 568 Method_needs_lines(obj) 569 SV * obj 570 CODE: 571 { 572 /* encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); */ 573 53 ST(0) = &PL_sv_no; 574 53 XSRETURN(1); 575 } 576 577 void 578 Method_perlio_ok(obj) 579 SV * obj 580 CODE: 581 { 582 /* encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); */ 583 /* require_pv(PERLIO_FILENAME); */ 584 585 8 eval_pv("require PerlIO::encoding", 0); 586 587 8 if (SvTRUE(get_sv("@", 0))) { 588 ###### ST(0) = &PL_sv_no; 589 }else{ 590 8 ST(0) = &PL_sv_yes; 591 } 592 8 XSRETURN(1); 593 } 594 595 MODULE = Encode PACKAGE = Encode 596 597 PROTOTYPES: ENABLE 598 599 I32 600 _bytes_to_utf8(sv, ...) 601 SV * sv 602 CODE: 603 { 604 ###### SV * encoding = items == 2 ? ST(1) : Nullsv; 605 606 ###### if (encoding) 607 ###### RETVAL = _encoded_bytes_to_utf8(sv, SvPV_nolen(encoding)); 608 else { 609 ###### STRLEN len; 610 ###### U8* s = (U8*)SvPV(sv, len); 611 ###### U8* converted; 612 613 ###### converted = bytes_to_utf8(s, &len); /* This allocs */ 614 ###### sv_setpvn(sv, (char *)converted, len); 615 ###### SvUTF8_on(sv); /* XXX Should we? */ 616 ###### Safefree(converted); /* ... so free it */ 617 ###### RETVAL = len; 618 } 619 } 620 OUTPUT: 621 RETVAL 622 623 I32 624 _utf8_to_bytes(sv, ...) 625 SV * sv 626 CODE: 627 { 628 ###### SV * to = items > 1 ? ST(1) : Nullsv; 629 ###### SV * check = items > 2 ? ST(2) : Nullsv; 630 631 ###### if (to) { 632 ###### RETVAL = _encoded_utf8_to_bytes(sv, SvPV_nolen(to)); 633 } else { 634 ###### STRLEN len; 635 ###### U8 *s = (U8*)SvPV(sv, len); 636 637 ###### RETVAL = 0; 638 ###### if (SvTRUE(check)) { 639 /* Must do things the slow way */ 640 ###### U8 *dest; 641 /* We need a copy to pass to check() */ 642 ###### U8 *src = (U8*)savepv((char *)s); 643 ###### U8 *send = s + len; 644 645 ###### New(83, dest, len, U8); /* I think */ 646 647 ###### while (s < send) { 648 ###### if (*s < 0x80){ 649 ###### *dest++ = *s++; 650 } else { 651 ###### STRLEN ulen; 652 ###### UV uv = *s++; 653 654 /* Have to do it all ourselves because of error routine, 655 aargh. */ 656 ###### if (!(uv & 0x40)){ goto failure; } 657 ###### if (!(uv & 0x20)) { ulen = 2; uv &= 0x1f; } 658 ###### else if (!(uv & 0x10)) { ulen = 3; uv &= 0x0f; } 659 ###### else if (!(uv & 0x08)) { ulen = 4; uv &= 0x07; } 660 ###### else if (!(uv & 0x04)) { ulen = 5; uv &= 0x03; } 661 ###### else if (!(uv & 0x02)) { ulen = 6; uv &= 0x01; } 662 ###### else if (!(uv & 0x01)) { ulen = 7; uv = 0; } 663 ###### else { ulen = 13; uv = 0; } 664 665 /* Note change to utf8.c variable naming, for variety */ 666 ###### while (ulen--) { 667 ###### if ((*s & 0xc0) != 0x80){ 668 ###### goto failure; 669 } else { 670 ###### uv = (uv << 6) | (*s++ & 0x3f); 671 } 672 } 673 ###### if (uv > 256) { 674 failure: 675 ###### call_failure(check, s, dest, src); 676 /* Now what happens? */ 677 } 678 ###### *dest++ = (U8)uv; 679 } 680 } 681 } else { 682 ###### RETVAL = (utf8_to_bytes(s, &len) ? len : 0); 683 } 684 } 685 } 686 OUTPUT: 687 RETVAL 688 689 bool 690 is_utf8(sv, check = 0) 691 SV * sv 692 int check 693 CODE: 694 { 695 545 if (SvGMAGICAL(sv)) /* it could be $1, for example */ 696 1 sv = newSVsv(sv); /* GMAGIG will be done */ 697 545 if (SvPOK(sv)) { 698 545 RETVAL = SvUTF8(sv) ? TRUE : FALSE; 699 545 if (RETVAL && 700 check && 701 !is_utf8_string((U8*)SvPVX(sv), SvCUR(sv))) 702 ###### RETVAL = FALSE; 703 } else { 704 ###### RETVAL = FALSE; 705 } 706 545 if (sv != ST(0)) 707 1 SvREFCNT_dec(sv); /* it was a temp copy */ 708 } 709 OUTPUT: 710 RETVAL 711 712 SV * 713 _utf8_on(sv) 714 SV * sv 715 CODE: 716 { 717 1 if (SvPOK(sv)) { 718 1 SV *rsv = newSViv(SvUTF8(sv)); 719 1 RETVAL = rsv; 720 1 SvUTF8_on(sv); 721 } else { 722 ###### RETVAL = &PL_sv_undef; 723 } 724 } 725 OUTPUT: 726 RETVAL 727 728 SV * 729 _utf8_off(sv) 730 SV * sv 731 CODE: 732 { 733 1 if (SvPOK(sv)) { 734 1 SV *rsv = newSViv(SvUTF8(sv)); 735 1 RETVAL = rsv; 736 1 SvUTF8_off(sv); 737 } else { 738 ###### RETVAL = &PL_sv_undef; 739 } 740 } 741 OUTPUT: 742 RETVAL 743 744 int 745 DIE_ON_ERR() 746 CODE: 747 ###### RETVAL = ENCODE_DIE_ON_ERR; 748 OUTPUT: 749 RETVAL 750 751 int 752 WARN_ON_ERR() 753 CODE: 754 19 RETVAL = ENCODE_WARN_ON_ERR; 755 OUTPUT: 756 RETVAL 757 758 int 759 LEAVE_SRC() 760 CODE: 761 1489 RETVAL = ENCODE_LEAVE_SRC; 762 OUTPUT: 763 RETVAL 764 765 int 766 RETURN_ON_ERR() 767 CODE: 768 ###### RETVAL = ENCODE_RETURN_ON_ERR; 769 OUTPUT: 770 RETVAL 771 772 int 773 PERLQQ() 774 CODE: 775 35 RETVAL = ENCODE_PERLQQ; 776 OUTPUT: 777 RETVAL 778 779 int 780 HTMLCREF() 781 CODE: 782 1 RETVAL = ENCODE_HTMLCREF; 783 OUTPUT: 784 RETVAL 785 786 int 787 XMLCREF() 788 CODE: 789 ###### RETVAL = ENCODE_XMLCREF; 790 OUTPUT: 791 RETVAL 792 793 int 794 FB_DEFAULT() 795 CODE: 796 2 RETVAL = ENCODE_FB_DEFAULT; 797 OUTPUT: 798 RETVAL 799 800 int 801 FB_CROAK() 802 CODE: 803 10 RETVAL = ENCODE_FB_CROAK; 804 OUTPUT: 805 RETVAL 806 807 int 808 FB_QUIET() 809 CODE: 810 204 RETVAL = ENCODE_FB_QUIET; 811 OUTPUT: 812 RETVAL 813 814 int 815 FB_WARN() 816 CODE: 817 2 RETVAL = ENCODE_FB_WARN; 818 OUTPUT: 819 RETVAL 820 821 int 822 FB_PERLQQ() 823 CODE: 824 1518 RETVAL = ENCODE_FB_PERLQQ; 825 OUTPUT: 826 RETVAL 827 828 int 829 FB_HTMLCREF() 830 CODE: 831 2 RETVAL = ENCODE_FB_HTMLCREF; 832 OUTPUT: 833 RETVAL 834 835 int 836 FB_XMLCREF() 837 CODE: 838 2 RETVAL = ENCODE_FB_XMLCREF; 839 OUTPUT: 840 RETVAL 841 842 BOOT: 843 { 844 #include "def_t.h" 845 #include "def_t.exh" 846 } 847 }