		/*
		Data structures for encoding transformations.
		
		Perl works internally in either a native 'byte' encoding or
		in UTF-8 encoded Unicode.  We have no immediate need for a "wchar_t"
		representation. When we do we can use utf8_to_uv().
		
		Most character encodings are either simple byte mappings or
		variable length multi-byte encodings. UTF-8 can be viewed as a
		rather extreme case of the latter.
		
		So to solve an important part of perl's encode needs we need to solve the
		"multi-byte -> multi-byte" case. The simple byte forms are then just degenerate
		case. (Where one of multi-bytes will usually be UTF-8.)
		
		The other type of encoding is a shift encoding where a prefix sequence
		determines what subsequent bytes mean. Such encodings have state.
		
		We also need to handle case where a character in one encoding has to be
		represented as multiple characters in the other. e.g. letter+diacritic.
		
		The process can be considered as pseudo perl:
		
		my $dst = '';
		while (length($src))
		 {
		  my $size    = $count($src);
		  my $in_seq  = substr($src,0,$size,'');
		  my $out_seq = $s2d_hash{$in_seq};
		  if (defined $out_seq)
		   {
		    $dst .= $out_seq;
		   }
		  else
		   {
		    # an error condition
		   }
		 }
		return $dst;
		
		That has the following components:
		 &src_count - a "rule" for how many bytes make up the next character in the
		              source.
		 %s2d_hash  - a mapping from input sequences to output sequences
		
		The problem with that scheme is that it does not allow the output
		character repertoire to affect the characters considered from the
		input.
		
		So we use a "trie" representation which can also be considered
		a state machine:
		
		my $dst   = '';
		my $seq   = \@s2d_seq;
		my $next  = \@s2d_next;
		while (length($src))
		 {
		  my $byte    = $substr($src,0,1,'');
		  my $out_seq = $seq->[$byte];
		  if (defined $out_seq)
		   {
		    $dst .= $out_seq;
		   }
		  else
		   {
		    # an error condition
		   }
		  ($next,$seq) = @$next->[$byte] if $next;
		 }
		return $dst;
		
		There is now a pair of data structures to represent everything.
		It is valid for output sequence at a particular point to
		be defined but zero length, that just means "don't know yet".
		For the single byte case there is no 'next' so new tables will be the same as
		the original tables. For a multi-byte case a prefix byte will flip to the tables
		for  the next page (adding nothing to the output), then the tables for the page
		will provide the actual output and set tables back to original base page.
		
		This scheme can also handle shift encodings.
		
		A slight enhancement to the scheme also allows for look-ahead - if
		we add a flag to re-add the removed byte to the source we could handle
		  a" -> ä
		  ab -> a (and take b back please)
		
		*/
		
		#include <EXTERN.h>
		#include <perl.h>
		#define U8 U8
		#include "encode.h"
		
		int
		do_encode(encpage_t * enc, const U8 * src, STRLEN * slen, U8 * dst,
			  STRLEN dlen, STRLEN * dout, int approx, const U8 *term, STRLEN tlen)
       40946    {
       40946        const U8 *s = src;
       40946        const U8 *send = s + *slen;
       40946        const U8 *last = s;
       40946        U8 *d = dst;
       40946        U8 *dend = d + dlen, *dlast = d;
       40946        int code = 0;
     7214055        while (s < send) {
     7175273    	encpage_t *e = enc;
     7175273    	U8 byte = *s;
    45155330    	while (byte > e->max)
    37980057    	    e++;
     7175273    	if (byte >= e->min && e->slen && (approx || !(e->slen & 0x80))) {
     7174584    	    const U8 *cend = s + (e->slen & 0x7f);
     7174584    	    if (cend <= send) {
     7174471    		STRLEN n;
     7174471    		if ((n = e->dlen)) {
     5202718    		    const U8 *out = e->seq + n * (byte - e->min);
     5202718    		    U8 *oend = d + n;
     5202718    		    if (dst) {
     5202718    			if (oend <= dend) {
    12265646    			    while (d < oend)
     7064067    				*d++ = *out++;
					}
					else {
					    /* Out of space */
        1139    			    code = ENCODE_NOSPACE;
        1139    			    break;
					}
				    }
				    else
      ######    			d = oend;
				}
     7173332    		enc = e->next;
     7173332    		s++;
     7173332    		if (s == cend) {
     5201579    		    if (approx && (e->slen & 0x80))
      ######    			code = ENCODE_FALLBACK;
     5201579    		    last = s;
     5201579    		    if (term && (STRLEN)(d-dlast) == tlen && memEQ(dlast, term, tlen)) {
         223    		      code = ENCODE_FOUND_TERM;
         223    		      break;
				    }
     5201356    		    dlast = d;
				}
			    }
			    else {
				/* partial source character */
         113    		code = ENCODE_PARTIAL;
         113    		break;
			    }
			}
			else {
			    /* Cannot represent */
         689    	    code = ENCODE_NOREP;
			    break;
			}
		    }
       40946        *slen = last - src;
       40946        *dout = d - dst;
       40946        return code;
		}
