<html><head><meta name="color-scheme" content="light dark"></head><body><pre style="word-wrap: break-word; white-space: pre-wrap;">#! /bin/false

# vim: set autoindent shiftwidth=4 tabstop=4:

# Conversion routines for UTF-8 (perl &lt; 5.8.0).
# Copyright (C) 2002-2017 Guido Flohr &lt;guido.flohr@cantanea.com&gt;,
# all rights reserved.

# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.

# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License
# along with this program.  If not, see &lt;http://www.gnu.org/licenses/&gt;.

package Locale::RecodeData::UTF_8;

use strict;

require Locale::RecodeData;
use base qw(Locale::RecodeData);

sub _recode
{
    if ($_[0]-&gt;{_from} eq 'INTERNAL') {
	return $_[0]-&gt;_fromInternal ($_[1]);
    } else {
	return $_[0]-&gt;_toInternal ($_[1]);
    }
}

# This routine assumes that the internal representation is always sane
# and contains valid codes only.
sub _fromInternal
{
    $_[1] = join '', map {
	if ($_ &lt;= 0x7f) {
	    chr $_;
	} elsif ($_ &lt;= 0x7ff) {
	    pack ("C2", 
		  (0xc0 | (($_ &gt;&gt; 6) &amp; 0x1f)),
		  (0x80 | ($_ &amp; 0x3f)));
	} elsif ($_ &lt;= 0xffff) {
	    pack ("C3", 
		  (0xe0 | (($_ &gt;&gt; 12) &amp; 0xf)),
		  (0x80 | (($_ &gt;&gt; 6) &amp; 0x3f)),
		  (0x80 | ($_ &amp; 0x3f)));
	} elsif ($_ &lt;= 0x1fffff) {
	    pack ("C4", 
		  (0xf0 | (($_ &gt;&gt; 18) &amp; 0x7)),
		  (0x80 | (($_ &gt;&gt; 12) &amp; 0x3f)),
		  (0x80 | (($_ &gt;&gt; 6) &amp; 0x3f)),
		  (0x80 | ($_ &amp; 0x3f)));
	} elsif ($_ &lt;= 0x3ffffff) {
	    pack ("C5", 
		  (0xf0 | (($_ &gt;&gt; 24) &amp; 0x3)),
		  (0x80 | (($_ &gt;&gt; 18) &amp; 0x3f)),
		  (0x80 | (($_ &gt;&gt; 12) &amp; 0x3f)),
		  (0x80 | (($_ &gt;&gt; 6) &amp; 0x3f)),
		  (0x80 | ($_ &amp; 0x3f)));
	} else {
	    pack ("C6", 
		  (0xf0 | (($_ &gt;&gt; 30) &amp; 0x3)),
		  (0x80 | (($_ &gt;&gt; 24) &amp; 0x1)),
		  (0x80 | (($_ &gt;&gt; 18) &amp; 0x3f)),
		  (0x80 | (($_ &gt;&gt; 12) &amp; 0x3f)),
		  (0x80 | (($_ &gt;&gt; 6) &amp; 0x3f)),
		  (0x80 | ($_ &amp; 0x3f)));
	}
    } @{$_[1]};
    return 1;
}

# Decode UTF-8 into integers.  We do not bother to care about possibly
# configured replacement characters here and simply fall back to 0xfffd.
# Rationale: the internal format is never output directly and the other
# encoders will handle the replacement character correctly.
sub _toInternal
{
    if ($] &gt;= 5.006) {
	$_[1] = [ unpack "U*", $_[1] ];
	return 1;
    }

    # Sigh, we have to decode ourselves.  FIXME: Should be optimized.
    # The routine is awfully slow.
    # It also does not necessarily detect illegal multi-byte sequences.

    my @chars = ();
    my @bytes = unpack "C*", $_[1];

    BYTE: while (@bytes) {
	my $byte = shift @bytes;
        if ($byte &lt; 0x80) {
            push @chars, $byte;
        } elsif ($byte &lt; 0xc0 || $byte &gt; 0xfd) {
            push @chars, 0xfffd;
        } else {
            my $num_bytes;
            my $char;
            if ($byte &lt; 0xe0) {
                $char = $byte &amp; 0x1f;
                $num_bytes = 1;
            } elsif ($byte &lt; 0xf0) {
                $char = $byte &amp; 0xf;
                $num_bytes = 2;
            } elsif ($byte &lt; 0xf8) {
                $char = $byte &amp; 0x7;
                $num_bytes = 3;
            } elsif ($byte &lt; 0xfc) {
                $char = $byte &amp; 0x3;
                $num_bytes = 4;
            } else {
                $char = $byte &amp; 0x1;
                $num_bytes = 5;
            }
            for (my $i = 0; $i &lt; $num_bytes; ++$i) {
                my $next = shift @bytes;
                if (!defined $next || $next &lt; 0x80 || $next &gt; 0xbf) {
                    push @chars, 0xfffd;
                    next BYTE;
                } else {
                    $char &lt;&lt;= 6;
                    $char |= $next &amp; 0x3f;
                }
            }
            push @chars, $char;
        }
    }
    
    $_[1] = \@chars;
    
    return 1;
}

1;

__END__

=head1 NAME

Locale::RecodeData::UTF_8 - Conversion routines for UTF-8

=head1 SYNOPSIS

This module is internal to libintl.  Do not use directly!

=head1 DESCRIPTION

This modules contains the conversion tables for UTF-8.  It is capable of
converting from UTF-8 to the internal format of libintl-perl and vice
versa.   It is only suitable for Perl versions E&lt;lt&gt;= 5.8.0.  However,
you do not have to bother about version checking, Locale::Recode(3)
will do that for you.


=head1 CHARACTER TABLE

See http://www.unicode.org/.

=head1 AUTHOR

Copyright (C) 2002-2017 L&lt;Guido Flohr|http://www.guido-flohr.net/&gt;
(L&lt;mailto:guido.flohr@cantanea.com&gt;), all rights reserved.  See the source
code for details!code for details!

=head1 SEE ALSO

Locale::RecodeData(3), Locale::Recode(3), perl(1)

=cut
Local Variables:
mode: perl
perl-indent-level: 4
perl-continued-statement-offset: 4
perl-continued-brace-offset: 0
perl-brace-offset: -4
perl-brace-imaginary-offset: 0
perl-label-offset: -4
cperl-indent-level: 4
cperl-continued-statement-offset: 2
tab-width: 4
End:
</pre></body></html>