@@ -1,5 +1,8 @@
Revision history for Text-Ngram
+0.15 Thu Jul 17 16:50:58 WEST 2014
+ - substitute lowercase by case folding.
+
0.14 Wed Jan 25 15:40:30 WET 2012
- releasing as stable
@@ -4,7 +4,7 @@
"Alberto Simoes <ambs@cpan.org>"
],
"dynamic_config" : 1,
- "generated_by" : "ExtUtils::MakeMaker version 6.62, CPAN::Meta::Converter version 2.113640",
+ "generated_by" : "ExtUtils::MakeMaker version 6.98, CPAN::Meta::Converter version 2.141520",
"license" : [
"unknown"
],
@@ -22,18 +22,20 @@
"prereqs" : {
"build" : {
"requires" : {
- "ExtUtils::MakeMaker" : 0
+ "ExtUtils::MakeMaker" : "0"
}
},
"configure" : {
"requires" : {
- "ExtUtils::MakeMaker" : 0
+ "ExtUtils::MakeMaker" : "0"
}
},
"runtime" : {
- "requires" : {}
+ "requires" : {
+ "Unicode::CaseFold" : "1.00"
+ }
}
},
"release_status" : "stable",
- "version" : "0.14"
+ "version" : "0.15"
}
@@ -3,19 +3,20 @@ abstract: 'Ngram analysis of text'
author:
- 'Alberto Simoes <ambs@cpan.org>'
build_requires:
- ExtUtils::MakeMaker: 0
+ ExtUtils::MakeMaker: '0'
configure_requires:
- ExtUtils::MakeMaker: 0
+ ExtUtils::MakeMaker: '0'
dynamic_config: 1
-generated_by: 'ExtUtils::MakeMaker version 6.62, CPAN::Meta::Converter version 2.113640'
+generated_by: 'ExtUtils::MakeMaker version 6.98, CPAN::Meta::Converter version 2.141520'
license: unknown
meta-spec:
url: http://module-build.sourceforge.net/META-spec-v1.4.html
- version: 1.4
+ version: '1.4'
name: Text-Ngram
no_index:
directory:
- t
- inc
-requires: {}
-version: 0.14
+requires:
+ Unicode::CaseFold: '1.00'
+version: '0.15'
@@ -4,7 +4,9 @@ use ExtUtils::MakeMaker;
WriteMakefile(
'NAME' => 'Text::Ngram',
'VERSION_FROM' => 'lib/Text/Ngram.pm',
- 'PREREQ_PM' => {},
+ 'PREREQ_PM' => {
+ 'Unicode::CaseFold' => '1.00',
+ },
($] >= 5.005 ?
(ABSTRACT_FROM => 'lib/Text/Ngram.pm',
AUTHOR => 'Alberto Simoes <ambs@cpan.org>') : ()),
@@ -21,6 +21,7 @@ void _process_buffer(pTHX_ SV* sv, unsigned int window, HV** counts_hv) {
unsigned int c;
len = sv_len_utf8(sv);
unsigned int windows = (len < window) ? 0 : len - window + 1;
+
while (windows--) {
cur = next = buffer + UTF8SKIP(buffer);
for (c = window - 1; c--; cur += UTF8SKIP(cur)) ;
@@ -4,6 +4,8 @@ use 5.008008;
use strict;
use warnings;
+use Unicode::CaseFold;
+
require Exporter;
our @ISA = qw(Exporter);
@@ -11,7 +13,7 @@ our %EXPORT_TAGS = ( 'all' => [ qw( ngram_counts add_to_counts) ] );
our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );
our @EXPORT = qw();
-our $VERSION = '0.14';
+our $VERSION = '0.15';
=head1 NAME
@@ -56,15 +58,15 @@ XSLoader::load('Text::Ngram', $VERSION);
sub _clean_buffer {
my %config = %{+shift};
my $buffer = shift;
- $buffer = lc $buffer if $config{lowercase};
+ $buffer = fc $buffer if $config{lowercase};
$buffer =~ s/\s+/ /g;
unless ($config{punctuation}) {
- if ($config{flankbreaks}) {
- $buffer =~ s/[^[:alpha:] ]+/ \xff /g;
- }
- else {
- $buffer =~ s/[^[:alpha:] ]+/\xff/g;
- }
+ if ($config{flankbreaks}) {
+ $buffer =~ s/[^[:alpha:] ]+/ \xff /g;
+ }
+ else {
+ $buffer =~ s/[^[:alpha:] ]+/\xff/g;
+ }
}
$buffer =~ y/ / /s;
return $buffer;
@@ -154,11 +156,11 @@ avoid calculating the same thing twice is probably this:
sub ngram_counts {
my %config = (
- spaces => 1,
- punctuation => 0,
- lowercase => 1,
- flankbreaks => 1
- );
+ spaces => 1,
+ punctuation => 0,
+ lowercase => 1,
+ flankbreaks => 1
+ );
if (ref($_[0]) eq 'HASH') {
%config = (%config, %{+shift});
}