@@ -102,10 +102,10 @@ sub post_initialize
$self->{'clean'}{'FILES'} .= join(' ',@files);
open(XS,">$name.xs") || die "Cannot open $name.xs:$!";
print XS <<'END';
+#define PERL_NO_GET_CONTEXT
#include <EXTERN.h>
#include <perl.h>
#include <XSUB.h>
-#define U8 U8
#include "encode.h"
END
foreach my $table (sort keys %tables) {
@@ -118,8 +118,14 @@ Encode_XSEncoding(pTHX_ encode_t *enc)
{
dSP;
HV *stash = gv_stashpv("Encode::XS", TRUE);
- SV *sv = sv_bless(newRV_noinc(newSViv(PTR2IV(enc))),stash);
+ SV *iv = newSViv(PTR2IV(enc));
+ SV *sv = sv_bless(newRV_noinc(iv),stash);
int i = 0;
+ /* with the SvLEN() == 0 hack, PVX won't be freed. We cast away name's
+ constness, in the hope that perl won't mess with it. */
+ assert(SvTYPE(iv) >= SVt_PV); assert(SvLEN(iv) == 0);
+ SvFLAGS(iv) |= SVp_POK;
+ SvPVX(iv) = (char*) enc->name[0];
PUSHMARK(sp);
XPUSHs(sv);
while (enc->name[i])
@@ -78,10 +78,10 @@ sub post_initialize
$self->{'clean'}{'FILES'} .= join(' ',@files);
open(XS,">$name.xs") || die "Cannot open $name.xs:$!";
print XS <<'END';
+#define PERL_NO_GET_CONTEXT
#include <EXTERN.h>
#include <perl.h>
#include <XSUB.h>
-#define U8 U8
#include "encode.h"
END
foreach my $table (sort keys %tables) {
@@ -94,8 +94,14 @@ Encode_XSEncoding(pTHX_ encode_t *enc)
{
dSP;
HV *stash = gv_stashpv("Encode::XS", TRUE);
- SV *sv = sv_bless(newRV_noinc(newSViv(PTR2IV(enc))),stash);
+ SV *iv = newSViv(PTR2IV(enc));
+ SV *sv = sv_bless(newRV_noinc(iv),stash);
int i = 0;
+ /* with the SvLEN() == 0 hack, PVX won't be freed. We cast away name's
+ constness, in the hope that perl won't mess with it. */
+ assert(SvTYPE(iv) >= SVt_PV); assert(SvLEN(iv) == 0);
+ SvFLAGS(iv) |= SVp_POK;
+ SvPVX(iv) = (char*) enc->name[0];
PUSHMARK(sp);
XPUSHs(sv);
while (enc->name[i])
@@ -1,8 +1,133 @@
# Revision history for Perl extension Encode.
#
-# $Id: Changes,v 2.55 2013/09/14 07:51:59 dankogai Exp dankogai $
+# $Id: Changes,v 2.64 2014/10/29 15:37:54 dankogai Exp dankogai $
#
-$Revision: 2.55 $ $Date: 2013/09/14 07:51:59 $
+$Revision: 2.64 $ $Date: 2014/10/29 15:37:54 $
+! t/utf8warnings.t MANIFEST
+ Retouch pull #26 so it works with perl < 5.14
+! Encode.pm
++ t/utf8warnings.t
+ Pulled: Catch and re-issue utf8 warnings at a higher level
+ https://github.com/dankogai/p5-encode/pull/26
++ Encode.xs
+ Pulled: Validate continuations in the incremental UTF-X decoder
+ https://github.com/dankogai/p5-encode/pull/25
+
+2.63 2014/10/19 07:01:15
+! Encode.xs
+ Applied: RT #99264: call_pv() can reallocate the stack
+ https://rt.cpan.org/Ticket/Display.html?id=99264
+! Byte/Makefile.PL CN/Makefile.PL EBCDIC/Makefile.PL Encode.xs
+ JP/Makefile.PL KR/Makefile.PL Symbol/Makefile.PL TW/Makefile.PL
+ bin/enc2xs encengine.c
+ Pulled: add PERL_NO_GET_CONTEXT to all dynamic libs
+ https://github.com/dankogai/p5-encode/pull/24
+
+2.62 2014/05/31 12:12:39
+! Encode.pm
+ s/2013/2014/ on COPYRIGHT section
+! Byte/Makefile.PL
+ CN/Makefile.PL
+ EBCDIC/Makefile.PL
+ Encode/Makefile_PL.e2x
+ Encode.xs
+ JP/Makefile.PL
+ KR/Makefile.PL
+ Symbol/Makefile.PL
+ TW/Makefile.PL
+ bin/enc2xs
+ Merged from perl.git: "Fix Encode 2.60 with g++"
+ http://perl5.git.perl.org/perl.git/commit/89c2544cd3
+
+2.61 2014/05/31 09:48:48
+! bin/piconv
+ Applied: piconv nit
+ + Better error handling when the encoding name is nonexistent
+ Message-Id: <537139A0.1000503@iki.fi>
+! Encode.xs
+ Applied: RT #95466:
+ fallback definition of SvIsCOW() is wrong
+ (and hence breaks on 5.8.2 and earlier)
+ https://rt.cpan.org/Ticket/Display.html?id=95466
+
+2.60 2014/04/29 16:25:06
+! Byte/Makefile.PL
+ CN/Makefile.PL
+ EBCDIC/Makefile.PL
+ Encode/Makefile_PL.e2x
+ Encode/encode.h
+ JP/Makefile.PL
+ KR/Makefile.PL
+ Symbol/Makefile.PL
+ TW/Makefile.PL
+ bin/enc2xs
+ encengine.c
+ Applied: more Fix Windows build (of Encode) with VC++ 6.0
+ http://perl5.git.perl.org/perl.git/commit/9e9002efd1609c7d154f98af43a026320df7582c
+! Unicode/Unicode.xs
+ Addressed: sign extension issue found by Coverity #21
+ https://github.com/dankogai/p5-encode/issues/21
+! Encode/encode.h Encode.xs Unicode/Unicode.xs
+ removed #define U8 U8
+ https://rt.perl.org/Ticket/Display.html?id=121554
+ http://perl5.git.perl.org/perl.git/commit/2f2b4ff2c154a8e461857f2e82cb815c238d0d94
+
+2.59 2014/04/06 17:23:55
+! Byte/Makefile.PL
+ CN/Makefile.PL
+ EBCDIC/Makefile.PL
+ Encode.pm
+ Encode.xs
+ Encode/Makefile_PL.e2x
+ JP/Makefile.PL
+ KR/Makefile.PL
+ Symbol/Makefile.PL
+ TW/Makefile.PL
+ bin/enc2xs
+ Restored the signature of Encode_XSEncoding() to address RT#94478
+ * While https://github.com/dankogai/p5-encode/pull/20
+ pulls the symnames via argument thus breaks the compatibility
+ with Encode::XX modules with *.ucm, the restored version
+ pulls the symanmes via enc->name[0] so the added 2nd argument
+ is no longer needed.
+ https://rt.cpan.org/Public/Bug/Display.html?id=94478
+
+2.58 2014/03/28 02:37:42
+! bin/piconv
+ Addressed: piconv bug of decoding UTF-16 (with fix)
+ https://github.com/dankogai/p5-encode/issues/19
+! Byte/Makefile.PL
+ CN/Makefile.PL
+ EBCDIC/Makefile.PL
+ Encode.pm
+ Encode.xs
+ Encode/Makefile_PL.e2x
+ JP/Makefile.PL
+ KR/Makefile.PL
+ Symbol/Makefile.PL
+ TW/Makefile.PL
+ bin/enc2xs
+ Pulled: Remap symname [RT #94221]
+ https://github.com/dankogai/p5-encode/pull/20
+ https://rt.cpan.org/Public/Bug/Display.html?id=94221
+! Encode.pm
+ Pulled: [doc] clarify that CHECK coderefs return octets #18
+ https://github.com/dankogai/p5-encode/pull/18
+
+2.57 2014/01/03 04:52:36
+! encengine.c
+ Pulled: sun compiler (maybe others) doesn't like UTF-8 in the source
+ https://github.com/dankogai/p5-encode/pull/17
+! bin/enc2xs
+ Merged RT#91763: POD errors
+ https://rt.cpan.org/Ticket/Display.html?id=91763
+
+2.56 2013/12/22 13:40:00
+! Encode.pm t/Encode.t
+ Merged RT#91569: [PATCH] decode_utf8 and non-PVs
+ https://rt.cpan.org/Ticket/Display.html?id=91569
+
+2.55 2013/09/14 07:51:59
! Encode.pm
Makefile.PL
Unicode/Unicode.pm
@@ -59,10 +59,10 @@ sub post_initialize
$self->{'clean'}{'FILES'} .= join(' ',@files);
open(XS,">$name.xs") || die "Cannot open $name.xs:$!";
print XS <<'END';
+#define PERL_NO_GET_CONTEXT
#include <EXTERN.h>
#include <perl.h>
#include <XSUB.h>
-#define U8 U8
#include "encode.h"
END
foreach my $table (sort keys %tables) {
@@ -75,8 +75,14 @@ Encode_XSEncoding(pTHX_ encode_t *enc)
{
dSP;
HV *stash = gv_stashpv("Encode::XS", TRUE);
- SV *sv = sv_bless(newRV_noinc(newSViv(PTR2IV(enc))),stash);
+ SV *iv = newSViv(PTR2IV(enc));
+ SV *sv = sv_bless(newRV_noinc(iv),stash);
int i = 0;
+ /* with the SvLEN() == 0 hack, PVX won't be freed. We cast away name's
+ constness, in the hope that perl won't mess with it. */
+ assert(SvTYPE(iv) >= SVt_PV); assert(SvLEN(iv) == 0);
+ SvFLAGS(iv) |= SVp_POK;
+ SvPVX(iv) = (char*) enc->name[0];
PUSHMARK(sp);
XPUSHs(sv);
while (enc->name[i])
@@ -98,7 +98,6 @@ sub post_initialize
#include <EXTERN.h>
#include <perl.h>
#include <XSUB.h>
-#define U8 U8
#include "encode.h"
END
foreach my $table (sort keys %tables) {
@@ -111,8 +110,14 @@ Encode_XSEncoding(pTHX_ encode_t *enc)
{
dSP;
HV *stash = gv_stashpv("Encode::XS", TRUE);
- SV *sv = sv_bless(newRV_noinc(newSViv(PTR2IV(enc))),stash);
+ SV *iv = newSViv(PTR2IV(enc));
+ SV *sv = sv_bless(newRV_noinc(iv),stash);
int i = 0;
+ /* with the SvLEN() == 0 hack, PVX won't be freed. We cast away name's
+ constness, in the hope that perl won't mess with it. */
+ assert(SvTYPE(iv) >= SVt_PV); assert(SvLEN(iv) == 0);
+ SvFLAGS(iv) |= SVp_POK;
+ SvPVX(iv) = (char*) enc->name[0];
PUSHMARK(sp);
XPUSHs(sv);
while (enc->name[i])
@@ -1,11 +1,8 @@
#ifndef ENCODE_H
#define ENCODE_H
-#ifndef U8
-/*
- A tad devious this:
- perl normally has a #define for U8 - if that isn't present then we
- typedef it - leaving it #ifndef so we can do data parts without
+#ifndef H_PERL
+/* check whether we're "in perl" so that we can do data parts without
getting extern references to the code parts
*/
typedef unsigned char U8;
@@ -74,7 +71,7 @@ struct encode_s
const char *const name[2]; /* name(s) of this encoding */
};
-#ifdef U8
+#ifdef H_PERL
/* See comment at top of file for deviousness */
extern int do_encode(const encpage_t *enc, const U8 *src, STRLEN *slen,
@@ -83,7 +80,7 @@ extern int do_encode(const encpage_t *enc, const U8 *src, STRLEN *slen,
extern void Encode_DefineEncoding(encode_t *enc);
-#endif /* U8 */
+#endif /* H_PERL */
#define ENCODE_NOSPACE 1
#define ENCODE_PARTIAL 2
@@ -1,10 +1,10 @@
#
-# $Id: Encode.pm,v 2.55 2013/09/14 07:51:59 dankogai Exp dankogai $
+# $Id: Encode.pm,v 2.64 2014/10/29 15:37:54 dankogai Exp dankogai $
#
package Encode;
use strict;
use warnings;
-our $VERSION = sprintf "%d.%02d", q$Revision: 2.55 $ =~ /(\d+)/g;
+our $VERSION = sprintf "%d.%02d", q$Revision: 2.64 $ =~ /(\d+)/g;
use constant DEBUG => !!$ENV{PERL_ENCODE_DEBUG};
use XSLoader ();
XSLoader::load( __PACKAGE__, $VERSION );
@@ -156,7 +156,20 @@ sub encode($$;$) {
require Carp;
Carp::croak("Unknown encoding '$name'");
}
- my $octets = $enc->encode( $string, $check );
+ # For Unicode, warnings need to be caught and re-issued at this level
+ # so that callers can disable utf8 warnings lexically.
+ my $octets;
+ if ( ref($enc) eq 'Encode::Unicode' ) {
+ my $warn = '';
+ {
+ local $SIG{__WARN__} = sub { $warn = shift };
+ $octets = $enc->encode( $string, $check );
+ }
+ warnings::warnif('utf8', $warn) if length $warn;
+ }
+ else {
+ $octets = $enc->encode( $string, $check );
+ }
$_[1] = $string if $check and !ref $check and !( $check & LEAVE_SRC() );
return $octets;
}
@@ -172,7 +185,20 @@ sub decode($$;$) {
require Carp;
Carp::croak("Unknown encoding '$name'");
}
- my $string = $enc->decode( $octets, $check );
+ # For Unicode, warnings need to be caught and re-issued at this level
+ # so that callers can disable utf8 warnings lexically.
+ my $string;
+ if ( ref($enc) eq 'Encode::Unicode' ) {
+ my $warn = '';
+ {
+ local $SIG{__WARN__} = sub { $warn = shift };
+ $string = $enc->decode( $octets, $check );
+ }
+ warnings::warnif('utf8', $warn) if length $warn;
+ }
+ else {
+ $string = $enc->decode( $octets, $check );
+ }
$_[1] = $octets if $check and !ref $check and !( $check & LEAVE_SRC() );
return $string;
}
@@ -209,7 +235,7 @@ my $utf8enc;
sub decode_utf8($;$) {
my ( $octets, $check ) = @_;
return undef unless defined $octets;
- $octets .= '' if ref $octets;
+ $octets .= '';
$check ||= 0;
$utf8enc ||= find_encoding('utf8');
my $string = $utf8enc->decode( $octets, $check );
@@ -801,13 +827,24 @@ If you're not interested in this, then bitwise-OR it with the bitmask.
=head2 coderef for CHECK
As of C<Encode> 2.12, C<CHECK> can also be a code reference which takes the
-ordinal value of the unmapped character as an argument and returns a string
-that represents the fallback character. For instance:
+ordinal value of the unmapped character as an argument and returns
+octets that represent the fallback character. For instance:
$ascii = encode("ascii", $utf8, sub{ sprintf "<U+%04X>", shift });
Acts like C<FB_PERLQQ> but U+I<XXXX> is used instead of C<\x{I<XXXX>}>.
+Even the fallback for C<decode> must return octets, which are
+then decoded with the character encoding that C<decode> accepts. So for
+example if you wish to decode octests as UTF-8, and use ISO-8859-15 as
+a fallback for bytes that are not valid UTF-8, you could write
+
+ $str = decode 'UTF-8', $octets, sub {
+ my $tmp = chr shift;
+ from_to $tmp, 'ISO-8859-15', 'UTF-8';
+ return $tmp;
+ };
+
=head1 Defining Encodings
To define a new encoding, use:
@@ -1018,7 +1055,7 @@ who submitted code to the project.
=head1 COPYRIGHT
-Copyright 2002-2013 Dan Kogai I<< <dankogai@cpan.org> >>.
+Copyright 2002-2014 Dan Kogai I<< <dankogai@cpan.org> >>.
This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself.
@@ -1,12 +1,11 @@
/*
- $Id: Encode.xs,v 2.24 2013/08/29 16:47:39 dankogai Exp $
+ $Id: Encode.xs,v 2.31 2014/10/29 15:37:54 dankogai Exp dankogai $
*/
#define PERL_NO_GET_CONTEXT
#include "EXTERN.h"
#include "perl.h"
#include "XSUB.h"
-#define U8 U8
#include "encode.h"
# define PERLIO_MODNAME "PerlIO::encoding"
@@ -20,8 +19,8 @@
encode_method(). 1 is recommended. 2 restores NI-S original */
#define ENCODE_XS_USEFP 1
-#define UNIMPLEMENTED(x,y) y x (SV *sv, char *encoding) {dTHX; \
- Perl_croak(aTHX_ "panic_unimplemented"); \
+#define UNIMPLEMENTED(x,y) y x (SV *sv, char *encoding) { \
+ Perl_croak_nocontext("panic_unimplemented"); \
return (y)0; /* fool picky compilers */ \
}
/**/
@@ -45,8 +44,14 @@ Encode_XSEncoding(pTHX_ encode_t * enc)
{
dSP;
HV *stash = gv_stashpv("Encode::XS", TRUE);
- SV *sv = sv_bless(newRV_noinc(newSViv(PTR2IV(enc))), stash);
+ SV *iv = newSViv(PTR2IV(enc));
+ SV *sv = sv_bless(newRV_noinc(iv),stash);
int i = 0;
+ /* with the SvLEN() == 0 hack, PVX won't be freed. We cast away name's
+ constness, in the hope that perl won't mess with it. */
+ assert(SvTYPE(iv) >= SVt_PV); assert(SvLEN(iv) == 0);
+ SvFLAGS(iv) |= SVp_POK;
+ SvPVX(iv) = (char*) enc->name[0];
PUSHMARK(sp);
XPUSHs(sv);
while (enc->name[i]) {
@@ -338,10 +343,14 @@ process_utf8(pTHX_ SV* dst, U8* s, U8* e, SV *check_sv,
if (UTF8_IS_START(*s)) {
U8 skip = UTF8SKIP(s);
if ((s + skip) > e) {
- /* Partial character */
- /* XXX could check that rest of bytes are UTF8_IS_CONTINUATION(ch) */
- if (stop_at_partial || (check & ENCODE_STOP_AT_PARTIAL))
+ if (stop_at_partial || (check & ENCODE_STOP_AT_PARTIAL)) {
+ const U8 *p = s + 1;
+ for (; p < e; p++) {
+ if (!UTF8_IS_CONTINUATION(*p))
+ goto malformed_byte;
+ }
break;
+ }
goto malformed_byte;
}
@@ -681,6 +690,7 @@ CODE:
/* require_pv(PERLIO_FILENAME); */
eval_pv("require PerlIO::encoding", 0);
+ SPAGAIN;
if (SvTRUE(get_sv("@", 0))) {
ST(0) = &PL_sv_no;
@@ -698,6 +708,7 @@ CODE:
encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj)));
SV *retval;
eval_pv("require Encode::MIME::Name", 0);
+ SPAGAIN;
if (SvTRUE(get_sv("@", 0))) {
ST(0) = &PL_sv_undef;
@@ -838,7 +849,7 @@ OUTPUT:
RETVAL
#ifndef SvIsCOW
-# define SvIsCOW (SvREADONLY(sv) && SvFAKE(sv))
+# define SvIsCOW(sv) (SvREADONLY(sv) && SvFAKE(sv))
#endif
SV *
@@ -78,10 +78,10 @@ sub post_initialize
$self->{'clean'}{'FILES'} .= join(' ',@files);
open(XS,">$name.xs") || die "Cannot open $name.xs:$!";
print XS <<'END';
+#define PERL_NO_GET_CONTEXT
#include <EXTERN.h>
#include <perl.h>
#include <XSUB.h>
-#define U8 U8
#include "encode.h"
END
foreach my $table (sort keys %tables) {
@@ -94,8 +94,14 @@ Encode_XSEncoding(pTHX_ encode_t *enc)
{
dSP;
HV *stash = gv_stashpv("Encode::XS", TRUE);
- SV *sv = sv_bless(newRV_noinc(newSViv(PTR2IV(enc))),stash);
+ SV *iv = newSViv(PTR2IV(enc));
+ SV *sv = sv_bless(newRV_noinc(iv),stash);
int i = 0;
+ /* with the SvLEN() == 0 hack, PVX won't be freed. We cast away name's
+ constness, in the hope that perl won't mess with it. */
+ assert(SvTYPE(iv) >= SVt_PV); assert(SvLEN(iv) == 0);
+ SvFLAGS(iv) |= SVp_POK;
+ SvPVX(iv) = (char*) enc->name[0];
PUSHMARK(sp);
XPUSHs(sv);
while (enc->name[i])
@@ -76,10 +76,10 @@ sub post_initialize
$self->{'clean'}{'FILES'} .= join(' ',@files);
open(XS,">$name.xs") || die "Cannot open $name.xs:$!";
print XS <<'END';
+#define PERL_NO_GET_CONTEXT
#include <EXTERN.h>
#include <perl.h>
#include <XSUB.h>
-#define U8 U8
#include "encode.h"
END
foreach my $table (sort keys %tables) {
@@ -92,8 +92,14 @@ Encode_XSEncoding(pTHX_ encode_t *enc)
{
dSP;
HV *stash = gv_stashpv("Encode::XS", TRUE);
- SV *sv = sv_bless(newRV_noinc(newSViv(PTR2IV(enc))),stash);
+ SV *iv = newSViv(PTR2IV(enc));
+ SV *sv = sv_bless(newRV_noinc(iv),stash);
int i = 0;
+ /* with the SvLEN() == 0 hack, PVX won't be freed. We cast away name's
+ constness, in the hope that perl won't mess with it. */
+ assert(SvTYPE(iv) >= SVt_PV); assert(SvLEN(iv) == 0);
+ SvFLAGS(iv) |= SVp_POK;
+ SvPVX(iv) = (char*) enc->name[0];
PUSHMARK(sp);
XPUSHs(sv);
while (enc->name[i])
@@ -102,6 +102,7 @@ t/taint.t test script
t/unibench.pl benchmark script
t/utf8ref.t test script
t/utf8strict.t test script
+t/utf8warnings.t test script
ucm/8859-1.ucm Unicode Character Map
ucm/8859-10.ucm Unicode Character Map
ucm/8859-11.ucm Unicode Character Map
@@ -4,7 +4,7 @@
"unknown"
],
"dynamic_config" : 1,
- "generated_by" : "ExtUtils::MakeMaker version 6.72, CPAN::Meta::Converter version 2.132140",
+ "generated_by" : "ExtUtils::MakeMaker version 6.98, CPAN::Meta::Converter version 2.142690",
"license" : [
"perl_5"
],
@@ -43,5 +43,5 @@
"url" : "https://github.com/dankogai/p5-encode"
}
},
- "version" : "2.55"
+ "version" : "2.64"
}
@@ -3,23 +3,23 @@ abstract: unknown
author:
- unknown
build_requires:
- ExtUtils::MakeMaker: 0
+ ExtUtils::MakeMaker: '0'
configure_requires:
- ExtUtils::MakeMaker: 0
+ ExtUtils::MakeMaker: '0'
dynamic_config: 1
-generated_by: 'ExtUtils::MakeMaker version 6.72, CPAN::Meta::Converter version 2.132140'
+generated_by: 'ExtUtils::MakeMaker version 6.98, CPAN::Meta::Converter version 2.142690'
license: perl
meta-spec:
url: http://module-build.sourceforge.net/META-spec-v1.4.html
- version: 1.4
+ version: '1.4'
name: Encode
no_index:
directory:
- t
- inc
requires:
- Exporter: 5.57
- parent: 0.221
+ Exporter: '5.57'
+ parent: '0.221'
resources:
repository: https://github.com/dankogai/p5-encode
-version: 2.55
+version: '2.64'
@@ -1,5 +1,5 @@
#
-# $Id: Makefile.PL,v 2.12 2013/09/14 07:51:59 dankogai Exp dankogai $
+# $Id: Makefile.PL,v 2.12 2013/09/14 07:51:59 dankogai Exp $
#
use 5.007003;
use strict;
@@ -64,10 +64,10 @@ sub post_initialize
$self->{'clean'}{'FILES'} .= join(' ',@files);
open(XS,">$name.xs") || die "Cannot open $name.xs:$!";
print XS <<'END';
+#define PERL_NO_GET_CONTEXT
#include <EXTERN.h>
#include <perl.h>
#include <XSUB.h>
-#define U8 U8
#include "encode.h"
END
foreach my $table (sort keys %tables) {
@@ -80,8 +80,14 @@ Encode_XSEncoding(pTHX_ encode_t *enc)
{
dSP;
HV *stash = gv_stashpv("Encode::XS", TRUE);
- SV *sv = sv_bless(newRV_noinc(newSViv(PTR2IV(enc))),stash);
+ SV *iv = newSViv(PTR2IV(enc));
+ SV *sv = sv_bless(newRV_noinc(iv),stash);
int i = 0;
+ /* with the SvLEN() == 0 hack, PVX won't be freed. We cast away name's
+ constness, in the hope that perl won't mess with it. */
+ assert(SvTYPE(iv) >= SVt_PV); assert(SvLEN(iv) == 0);
+ SvFLAGS(iv) |= SVp_POK;
+ SvPVX(iv) = (char*) enc->name[0];
PUSHMARK(sp);
XPUSHs(sv);
while (enc->name[i])
@@ -74,10 +74,10 @@ sub post_initialize
$self->{'clean'}{'FILES'} .= join(' ',@files);
open(XS,">$name.xs") || die "Cannot open $name.xs:$!";
print XS <<'END';
+#define PERL_NO_GET_CONTEXT
#include <EXTERN.h>
#include <perl.h>
#include <XSUB.h>
-#define U8 U8
#include "encode.h"
END
foreach my $table (sort keys %tables) {
@@ -90,8 +90,14 @@ Encode_XSEncoding(pTHX_ encode_t *enc)
{
dSP;
HV *stash = gv_stashpv("Encode::XS", TRUE);
- SV *sv = sv_bless(newRV_noinc(newSViv(PTR2IV(enc))),stash);
+ SV *iv = newSViv(PTR2IV(enc));
+ SV *sv = sv_bless(newRV_noinc(iv),stash);
int i = 0;
+ /* with the SvLEN() == 0 hack, PVX won't be freed. We cast away name's
+ constness, in the hope that perl won't mess with it. */
+ assert(SvTYPE(iv) >= SVt_PV); assert(SvLEN(iv) == 0);
+ SvFLAGS(iv) |= SVp_POK;
+ SvPVX(iv) = (char*) enc->name[0];
PUSHMARK(sp);
XPUSHs(sv);
while (enc->name[i])
@@ -1,12 +1,11 @@
/*
- $Id: Unicode.xs,v 2.10 2013/04/26 18:30:46 dankogai Exp $
+ $Id: Unicode.xs,v 2.11 2014/04/29 16:25:06 dankogai Exp $
*/
#define PERL_NO_GET_CONTEXT
#include "EXTERN.h"
#include "perl.h"
#include "XSUB.h"
-#define U8 U8
#include "../Encode/encode.h"
#define FBCHAR 0xFFFd
@@ -80,7 +79,7 @@ enc_unpack(pTHX_ U8 **sp, U8 *e, STRLEN size, U8 endian)
if (endian == 'v')
break;
v |= (*s++ << 16);
- v |= (*s++ << 24);
+ v |= ((UV)*s++ << 24);
break;
default:
croak("Unknown endian %c",(char) endian);
@@ -10,7 +10,7 @@ use warnings;
use Getopt::Std;
use Config;
my @orig_ARGV = @ARGV;
-our $VERSION = do { my @r = (q$Revision: 2.8 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
+our $VERSION = do { my @r = (q$Revision: 2.14 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
# These may get re-ordered.
# RAW is a do_now as inserted by &enter
@@ -184,10 +184,10 @@ END
if ($cname =~ /(\w+)\.xs$/)
{
+ print C "#define PERL_NO_GET_CONTEXT\n";
print C "#include <EXTERN.h>\n";
print C "#include <perl.h>\n";
print C "#include <XSUB.h>\n";
- print C "#define U8 U8\n";
}
print C "#include \"encode.h\"\n\n";
@@ -314,8 +314,14 @@ Encode_XSEncoding(pTHX_ encode_t *enc)
{
dSP;
HV *stash = gv_stashpv("Encode::XS", TRUE);
- SV *sv = sv_bless(newRV_noinc(newSViv(PTR2IV(enc))),stash);
+ SV *iv = newSViv(PTR2IV(enc));
+ SV *sv = sv_bless(newRV_noinc(iv),stash);
int i = 0;
+ /* with the SvLEN() == 0 hack, PVX won't be freed. We cast away name's
+ constness, in the hope that perl won't mess with it. */
+ assert(SvTYPE(iv) >= SVt_PV); assert(SvLEN(iv) == 0);
+ SvFLAGS(iv) |= SVp_POK;
+ SvPVX(iv) = (char*) enc->name[0];
PUSHMARK(sp);
XPUSHs(sv);
while (enc->name[i])
@@ -1080,7 +1086,7 @@ add a new encoding, just read this chapter and forget the rest.
=over 4
-=item 0.
+=item 0.Z<>
Have a .ucm file ready. You can get it from somewhere or you can write
your own from scratch or you can grab one from the Encode distribution
@@ -1091,7 +1097,7 @@ in I<my.ucm>. C<$> is a shell prompt.
$ ls -F
my.ucm
-=item 1.
+=item 1.Z<>
Issue a command as follows;
@@ -1114,7 +1120,7 @@ The following files were created.
=over 4
-=item 1.1.
+=item 1.1.Z<>
If you want *.ucm installed together with the modules, do as follows;
@@ -1124,20 +1130,20 @@ If you want *.ucm installed together with the modules, do as follows;
=back
-=item 2.
+=item 2.Z<>
Edit the files generated. You don't have to if you have no time AND no
intention to give it to someone else. But it is a good idea to edit
the pod and to add more tests.
-=item 3.
+=item 3.Z<>
Now issue a command all Perl Mongers love:
$ perl Makefile.PL
Writing Makefile for Encode::My
-=item 4.
+=item 4.Z<>
Now all you have to do is make.
@@ -1158,7 +1164,7 @@ The time it takes varies depending on how fast your machine is and
how large your encoding is. Unless you are working on something big
like euc-tw, it won't take too long.
-=item 5.
+=item 5.Z<>
You can "make install" already but you should test first.
@@ -1171,11 +1177,11 @@ You can "make install" already but you should test first.
Files=1, Tests=2, 0 wallclock secs
( 0.09 cusr + 0.01 csys = 0.09 CPU)
-=item 6.
+=item 6.Z<>
If you are content with the test result, just "make install"
-=item 7.
+=item 7.Z<>
If you want to add your encoding to Encode's demand-loading list
(so you don't have to "use Encode::YourEncoding"), run
@@ -1,5 +1,5 @@
#!./perl
-# $Id: piconv,v 2.5 2013/04/26 18:30:46 dankogai Exp $
+# $Id: piconv,v 2.7 2014/05/31 09:48:48 dankogai Exp $
#
use 5.8.0;
use strict;
@@ -58,9 +58,13 @@ $Opt{perlqq} and $Opt{check} = Encode::PERLQQ;
$Opt{htmlcref} and $Opt{check} = Encode::HTMLCREF;
$Opt{xmlcref} and $Opt{check} = Encode::XMLCREF;
+my $efrom = Encode->getEncoding($from) || die "Unknown encoding '$from'";
+my $eto = Encode->getEncoding($to) || die "Unknown encoding '$to'";
+
+my $cfrom = $efrom->name;
+my $cto = $eto->name;
+
if ($Opt{debug}){
- my $cfrom = Encode->getEncoding($from)->name;
- my $cto = Encode->getEncoding($to)->name;
print <<"EOT";
Scheme: $scheme
From: $from => $cfrom
@@ -68,14 +72,15 @@ To: $to => $cto
EOT
}
-my %use_bom = map { $_ => 1 } qw/UTF-16 UTF-32/;
+my %use_bom =
+ map { $_ => 1 } qw/UTF-16 UTF-16BE UTF-16LE UTF-32 UTF-32BE UTF-32LE/;
# we do not use <> (or ARGV) for the sake of binmode()
@ARGV or push @ARGV, \*STDIN;
unless ( $scheme eq 'perlio' ) {
binmode STDOUT;
- my $need2slurp = $use_bom{ find_encoding($to)->name };
+ my $need2slurp = $use_bom{ $eto } || $use_bom{ $efrom };
for my $argv (@ARGV) {
my $ifh = ref $argv ? $argv : undef;
$ifh or open $ifh, "<", $argv or warn "Can't open $argv: $!" and next;
@@ -169,7 +174,7 @@ The following are mainly of interest to Encode hackers:
-D,--debug show debug information
-S,--scheme scheme use the scheme for conversion
Those are handy when you can only see ASCII characters:
- -p,--perlqq transliterate characters missing in encoding to \x{HHHH}
+ -p,--perlqq transliterate characters missing in encoding to \\x{HHHH}
where HHHH is the hexadecimal Unicode code point
--htmlcref transliterate characters missing in encoding to &#NNN;
where NNN is the decimal Unicode code point
@@ -81,14 +81,14 @@ This scheme can also handle shift encodings.
A slight enhancement to the scheme also allows for look-ahead - if
we add a flag to re-add the removed byte to the source we could handle
- a" -> รค
+ a" -> U+00E4 (LATIN SMALL LETTER A WITH DIAERESIS)
ab -> a (and take b back please)
*/
+#define PERL_NO_GET_CONTEXT
#include <EXTERN.h>
#include <perl.h>
-#define U8 U8
#include "encode.h"
int
@@ -1,5 +1,5 @@
#
-# $Id: Encoder.pm,v 2.3 2013/09/14 07:51:59 dankogai Exp dankogai $
+# $Id: Encoder.pm,v 2.3 2013/09/14 07:51:59 dankogai Exp $
#
package Encode::Encoder;
use strict;
@@ -1,5 +1,5 @@
#
-# $Id: GSM0338.pm,v 2.5 2013/09/14 07:51:59 dankogai Exp dankogai $
+# $Id: GSM0338.pm,v 2.5 2013/09/14 07:51:59 dankogai Exp $
#
package Encode::GSM0338;
@@ -1,5 +1,5 @@
#
-# $Id: UTF7.pm,v 2.8 2013/09/14 07:51:59 dankogai Exp dankogai $
+# $Id: UTF7.pm,v 2.8 2013/09/14 07:51:59 dankogai Exp $
#
package Encode::Unicode::UTF7;
use strict;
@@ -25,7 +25,7 @@ my @character_set = ('0'..'9', 'A'..'Z', 'a'..'z');
my @source = qw(ascii iso8859-1 cp1250);
my @destiny = qw(cp1047 cp37 posix-bc);
my @ebcdic_sets = qw(cp1047 cp37 posix-bc);
-plan test => 38+$n*@encodings + 2*@source*@destiny*@character_set + 2*@ebcdic_sets*256 + 6 + 4;
+plan test => 38+$n*@encodings + 2*@source*@destiny*@character_set + 2*@ebcdic_sets*256 + 6 + 5;
my $str = join('',map(chr($_),0x20..0x7E));
my $cpy = $str;
ok(length($str),from_to($cpy,'iso8859-1','Unicode'),"Length Wrong");
@@ -150,6 +150,10 @@ package main;
ok(decode(latin1 => Encode::Dummy->new("foobar")), "foobar");
ok(encode(utf8 => Encode::Dummy->new("foobar")), "foobar");
+# RT#91569
+# decode_utf8 with non-string arguments
+ok(decode_utf8(*1), "*main::1");
+
# hash keys
my $key = (keys %{{ "whatever\x{100}" => '' }})[0];
my $kopy = $key;
@@ -1,5 +1,5 @@
#
-# $Id: Encoder.t,v 2.1 2013/09/14 07:51:59 dankogai Exp dankogai $
+# $Id: Encoder.t,v 2.1 2013/09/14 07:51:59 dankogai Exp $
#
BEGIN {
@@ -0,0 +1,66 @@
+use strict;
+use warnings;
+BEGIN {
+ if ($] < 5.014){
+ print "1..0 # Skip: Perl 5.14.0 or later required\n";
+ exit 0;
+ }
+}
+
+use Encode;
+use Test::More tests => 7;
+
+my $valid = "\x61\x00\x00\x00";
+my $invalid = "\x78\x56\x34\x12";
+
+my @warnings;
+$SIG{__WARN__} = sub {push @warnings, "@_"};
+
+my $enc = find_encoding("UTF32-LE");
+
+{
+ @warnings = ();
+ my $ret = Encode::Unicode::decode( $enc, $valid );
+ is("@warnings", "", "Calling decode in Encode::Unicode on valid string produces no warnings");
+}
+
+{
+ @warnings = ();
+ my $ret = Encode::Unicode::decode( $enc, $invalid );
+ like("@warnings", qr/is not Unicode/, "Calling decode in Encode::Unicode on invalid string warns");
+}
+
+{
+ no warnings 'utf8';
+ @warnings = ();
+ my $ret = Encode::Unicode::decode( $enc, $invalid );
+ is("@warnings", "", "Warning from decode in Encode::Unicode can be silenced via no warnings 'utf8'");
+}
+
+{
+ no warnings;
+ @warnings = ();
+ my $ret = Encode::Unicode::decode( $enc, $invalid );
+ is("@warnings", "", "Warning from decode in Encode::Unicode can be silenced via no warnings");
+}
+
+{
+ @warnings = ();
+ my $ret = Encode::decode( $enc, $invalid );
+ like("@warnings", qr/is not Unicode/, "Calling decode in Encode on invalid string warns");
+}
+
+{
+ no warnings 'utf8';
+ @warnings = ();
+ my $ret = Encode::decode( $enc, $invalid );
+ is("@warnings", "", "Warning from decode in Encode can be silenced via no warnings 'utf8'");
+};
+
+{
+ no warnings;
+ @warnings = ();
+ my $ret = Encode::decode( $enc, $invalid );
+ is("@warnings", "", "Warning from decode in Encode can be silenced via no warnings 'utf8'");
+};
+