     1			/*
     2			 * This file was generated automatically by ExtUtils::ParseXS version 2.10 from the
     3			 * contents of Unicode.xs. Do not edit this file, edit Unicode.xs instead.
     4			 *
     5			 *	ANY CHANGES MADE HERE WILL BE LOST! 
     6			 *
     7			 */
     8			
     9			#line 1 "Unicode.xs"
    10			/*
    11			 $Id: Unicode.xs,v 2.1 2004/10/24 13:00:29 dankogai Exp $
    12			 */
    13			
    14			#define PERL_NO_GET_CONTEXT
    15			#include "EXTERN.h"
    16			#include "perl.h"
    17			#include "XSUB.h"
    18			#define U8 U8
    19			#include "../Encode/encode.h"
    20			
    21			#define FBCHAR			0xFFFd
    22			#define BOM_BE			0xFeFF
    23			#define BOM16LE			0xFFFe
    24			#define BOM32LE			0xFFFe0000
    25			#define issurrogate(x)		(0xD800 <= (x)  && (x) <= 0xDFFF )
    26			#define isHiSurrogate(x)	(0xD800 <= (x)  && (x) <  0xDC00 )
    27			#define isLoSurrogate(x)	(0xDC00 <= (x)  && (x) <= 0xDFFF )
    28			#define invalid_ucs2(x)         ( issurrogate(x) || 0xFFFF < (x) )
    29			
    30			static UV
    31			enc_unpack(pTHX_ U8 **sp,U8 *e,STRLEN size,U8 endian)
    32			{
    33			    U8 *s = *sp;
    34			    UV v = 0;
    35			    if (s+size > e) {
    36				croak("Partial character %c",(char) endian);
    37			    }
    38			    switch(endian) {
    39			    case 'N':
    40				v = *s++;
    41				v = (v << 8) | *s++;
    42			    case 'n':
    43				v = (v << 8) | *s++;
    44				v = (v << 8) | *s++;
    45				break;
    46			    case 'V':
    47			    case 'v':
    48				v |= *s++;
    49				v |= (*s++ << 8);
    50				if (endian == 'v')
    51				    break;
    52				v |= (*s++ << 16);
    53				v |= (*s++ << 24);
    54				break;
    55			    default:
    56				croak("Unknown endian %c",(char) endian);
    57				break;
    58			    }
    59			    *sp = s;
    60			    return v;
    61			}
    62			
    63			void
    64			enc_pack(pTHX_ SV *result,STRLEN size,U8 endian,UV value)
    65			{
    66			    U8 *d = (U8 *)SvGROW(result,SvCUR(result)+size+1);
    67			    switch(endian) {
    68			    case 'v':
    69			    case 'V':
    70				d += SvCUR(result);
    71				SvCUR_set(result,SvCUR(result)+size);
    72				while (size--) {
    73				    *d++ = (U8)(value & 0xFF);
    74				    value >>= 8;
    75				}
    76				break;
    77			    case 'n':
    78			    case 'N':
    79				SvCUR_set(result,SvCUR(result)+size);
    80				d += SvCUR(result);
    81				while (size--) {
    82				    *--d = (U8)(value & 0xFF);
    83				    value >>= 8;
    84				}
    85				break;
    86			    default:
    87				croak("Unknown endian %c",(char) endian);
    88				break;
    89			    }
    90			}
    91			
    92			#ifndef PERL_UNUSED_VAR
    93			#  define PERL_UNUSED_VAR(var) if (0) var = var
    94			#endif
    95			
    96			#line 97 "Unicode.c"
    97			#define attr(k, l)  (hv_exists((HV *)SvRV(obj),k,l) ? \
    98			    *hv_fetch((HV *)SvRV(obj),k,l,0) : &PL_sv_undef)
    99			
   100			XS(XS_Encode__Unicode_decode_xs); /* prototype to pass -Wmissing-prototypes */
   101			XS(XS_Encode__Unicode_decode_xs)
   102	        2232    {
   103	        2232        dXSARGS;
   104	        2232        if (items < 2 || items > 3)
   105	      ######    	Perl_croak(aTHX_ "Usage: Encode::Unicode::decode_xs(obj, str, check = 0)");
   106	        2232        PERL_UNUSED_VAR(cv); /* -W */
   107			    {
   108	        2232    	SV *	obj = ST(0);
   109	        2232    	SV *	str = ST(1);
   110	        2232    	IV	check;
   111			
   112	        2232    	if (items < 3)
   113	        2100    	    check = 0;
   114				else {
   115	         132    	    check = (IV)SvIV(ST(2));
   116				}
   117			#line 96 "Unicode.xs"
   118			{
   119			    U8 endian   = *((U8 *)SvPV_nolen(attr("endian", 6)));
   120			    int size    =   SvIV(attr("size",   4));
   121			    int ucs2    = SvTRUE(attr("ucs2",   4));
   122			    int renewed = SvTRUE(attr("renewed",  7));
   123			    SV *result  = newSVpvn("",0);
   124			    STRLEN ulen;
   125			    U8 *s = (U8 *)SvPVbyte(str,ulen);
   126			    U8 *e = (U8 *)SvEND(str);
   127			    ST(0) = sv_2mortal(result);
   128			    SvUTF8_on(result);
   129			
   130			    if (!endian && s+size <= e) {
   131				UV bom;
   132				endian = (size == 4) ? 'N' : 'n';
   133				bom = enc_unpack(aTHX_ &s,e,size,endian);
   134			        if (bom != BOM_BE) {
   135				    if (bom == BOM16LE) {
   136					endian = 'v';
   137				    }
   138				    else if (bom == BOM32LE) {
   139					endian = 'V';
   140				    }
   141				    else {
   142					croak("%"SVf":Unrecognised BOM %"UVxf,
   143			                      *hv_fetch((HV *)SvRV(obj),"Name",4,0),
   144					      bom);
   145				    }
   146				}
   147			#if 1
   148				/* Update endian for next sequence */
   149				if (renewed) {
   150				    hv_store((HV *)SvRV(obj),"endian",6,newSVpv((char *)&endian,1),0);
   151				}
   152			#endif
   153			    }
   154			    while (s < e && s+size <= e) {
   155				UV ord = enc_unpack(aTHX_ &s,e,size,endian);
   156				U8 *d;
   157				if (size != 4 && invalid_ucs2(ord)) {
   158				    if (ucs2) {
   159					if (check) {
   160					    croak("%"SVf":no surrogates allowed %"UVxf,
   161						  *hv_fetch((HV *)SvRV(obj),"Name",4,0),
   162						  ord);
   163					}
   164					if (s+size <= e) {
   165			                    /* skip the next one as well */
   166					    enc_unpack(aTHX_ &s,e,size,endian);
   167					}
   168					ord = FBCHAR;
   169				    }
   170				    else {
   171					UV lo;
   172					if (!isHiSurrogate(ord)) {
   173					    croak("%"SVf":Malformed HI surrogate %"UVxf,
   174						  *hv_fetch((HV *)SvRV(obj),"Name",4,0),
   175						  ord);
   176					}
   177					if (s+size > e) {
   178					    /* Partial character */
   179					    s -= size;   /* back up to 1st half */
   180					    break;       /* And exit loop */
   181					}
   182					lo = enc_unpack(aTHX_ &s,e,size,endian);
   183					if (!isLoSurrogate(lo)){
   184					    croak("%"SVf":Malformed LO surrogate %"UVxf,
   185						  *hv_fetch((HV *)SvRV(obj),"Name",4,0),
   186						  ord);
   187					}
   188					ord = 0x10000 + ((ord - 0xD800) << 10) + (lo - 0xDC00);
   189				    }
   190				}
   191				d = (U8 *) SvGROW(result,SvCUR(result)+UTF8_MAXLEN+1);
   192				d = uvuni_to_utf8_flags(d+SvCUR(result), ord, 0);
   193				SvCUR_set(result,d - (U8 *)SvPVX(result));
   194			    }
   195			    if (s < e) {
   196				/* unlikely to happen because it's fixed-length -- dankogai */
   197				if (check & ENCODE_WARN_ON_ERR){
   198				    Perl_warner(aTHX_ packWARN(WARN_UTF8),"%"SVf":Partial character",
   199						*hv_fetch((HV *)SvRV(obj),"Name",4,0));
   200				}
   201			    }
   202			    if (check && !(check & ENCODE_LEAVE_SRC)){
   203				if (s < e) {
   204				    Move(s,SvPVX(str),e-s,U8);
   205				    SvCUR_set(str,(e-s));
   206				}
   207				else {
   208				    SvCUR_set(str,0);
   209				}
   210				*SvEND(str) = '\0';
   211			    }
   212			    XSRETURN(1);
   213			}
   214			#line 215 "Unicode.c"
   215			    }
   216			    XSRETURN(1);
   217			}
   218			
   219			
   220			XS(XS_Encode__Unicode_encode_xs); /* prototype to pass -Wmissing-prototypes */
   221			XS(XS_Encode__Unicode_encode_xs)
   222	        2190    {
   223	        2190        dXSARGS;
   224	        2190        if (items < 2 || items > 3)
   225	      ######    	Perl_croak(aTHX_ "Usage: Encode::Unicode::encode_xs(obj, utf8, check = 0)");
   226	        2190        PERL_UNUSED_VAR(cv); /* -W */
   227			    {
   228	        2190    	SV *	obj = ST(0);
   229	        2190    	SV *	utf8 = ST(1);
   230	        2190    	IV	check;
   231			
   232	        2190    	if (items < 3)
   233	        2100    	    check = 0;
   234				else {
   235	          90    	    check = (IV)SvIV(ST(2));
   236				}
   237			#line 199 "Unicode.xs"
   238			{
   239			    U8 endian   = *((U8 *)SvPV_nolen(attr("endian", 6)));
   240			    int size    =   SvIV(attr("size",   4));
   241			    int ucs2    = SvTRUE(attr("ucs2",   4));
   242			    int renewed = SvTRUE(attr("renewed",  7));
   243			    SV *result  = newSVpvn("",0);
   244			    STRLEN ulen;
   245			    U8 *s = (U8 *)SvPVutf8(utf8,ulen);
   246			    U8 *e = (U8 *)SvEND(utf8);
   247			    ST(0) = sv_2mortal(result);
   248			    if (!endian) {
   249				endian = (size == 4) ? 'N' : 'n';
   250				enc_pack(aTHX_ result,size,endian,BOM_BE);
   251			#if 1
   252				/* Update endian for next sequence */
   253				if (renewed){
   254				    hv_store((HV *)SvRV(obj),"endian",6,newSVpv((char *)&endian,1),0);
   255				}
   256			#endif
   257			    }
   258			    while (s < e && s+UTF8SKIP(s) <= e) {
   259				STRLEN len;
   260				UV ord = utf8n_to_uvuni(s, e-s, &len, 0);
   261			        s += len;
   262				if (size != 4 && invalid_ucs2(ord)) {
   263				    if (!issurrogate(ord)){
   264					if (ucs2) {
   265					    if (check) {
   266						croak("%"SVf":code point \"\\x{%"UVxf"}\" too high",
   267							  *hv_fetch((HV *)SvRV(obj),"Name",4,0),ord);
   268					    }
   269					    enc_pack(aTHX_ result,size,endian,FBCHAR);
   270					}else{
   271					    UV hi = ((ord - 0x10000) >> 10)   + 0xD800;
   272					    UV lo = ((ord - 0x10000) & 0x3FF) + 0xDC00;
   273					    enc_pack(aTHX_ result,size,endian,hi);
   274					    enc_pack(aTHX_ result,size,endian,lo);
   275					}
   276				    }
   277				    else {
   278					/* not supposed to happen */
   279					enc_pack(aTHX_ result,size,endian,FBCHAR);
   280				    }
   281				}
   282				else {
   283				    enc_pack(aTHX_ result,size,endian,ord);
   284				}
   285			    }
   286			    if (s < e) {
   287				/* UTF-8 partial char happens often on PerlIO.
   288				   Since this is okay and normal, we do not warn.
   289				   But this is critical when you choose to LEAVE_SRC
   290				   in which case we die */
   291				if (check & (ENCODE_DIE_ON_ERR|ENCODE_LEAVE_SRC)){
   292				    Perl_croak(aTHX_ "%"SVf":partial character is not allowed "
   293					       "when CHECK = 0x%" UVuf,
   294					       *hv_fetch((HV *)SvRV(obj),"Name",4,0), check);
   295				}
   296			
   297			    }
   298			    if (check && !(check & ENCODE_LEAVE_SRC)){
   299				if (s < e) {
   300				    Move(s,SvPVX(utf8),e-s,U8);
   301				    SvCUR_set(utf8,(e-s));
   302				}
   303				else {
   304				    SvCUR_set(utf8,0);
   305				}
   306				*SvEND(utf8) = '\0';
   307			    } 
   308			    XSRETURN(1);
   309			}
   310			#line 311 "Unicode.c"
   311			    }
   312			    XSRETURN(1);
   313			}
   314			
   315			#ifdef __cplusplus
   316			extern "C"
   317			#endif
   318			XS(boot_Encode__Unicode); /* prototype to pass -Wmissing-prototypes */
   319			XS(boot_Encode__Unicode)
   320	           5    {
   321	           5        dXSARGS;
   322	           5        char* file = __FILE__;
   323			
   324	           5        PERL_UNUSED_VAR(cv); /* -W */
   325	           5        PERL_UNUSED_VAR(items); /* -W */
   326	           5        XS_VERSION_BOOTCHECK ;
   327			
   328	           5            newXS("Encode::Unicode::decode_xs", XS_Encode__Unicode_decode_xs, file);
   329	           5            newXS("Encode::Unicode::encode_xs", XS_Encode__Unicode_encode_xs, file);
   330	           5        XSRETURN_YES;
   331			}
   332			
