The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
Changes 03
META.json 57
META.yml 67
Makefile.PL 13
Ngram.xs 01
lib/Text/Ngram.pm 1315
6 files changed (This is a version diff) 2536
@@ -1,5 +1,8 @@
 Revision history for Text-Ngram
 
+0.15   Thu Jul 17 16:50:58 WEST 2014
+        - substitute lowercase by case folding.
+
 0.14   Wed Jan 25 15:40:30 WET 2012
         - releasing as stable
 
@@ -4,7 +4,7 @@
       "Alberto Simoes <ambs@cpan.org>"
    ],
    "dynamic_config" : 1,
-   "generated_by" : "ExtUtils::MakeMaker version 6.62, CPAN::Meta::Converter version 2.113640",
+   "generated_by" : "ExtUtils::MakeMaker version 6.98, CPAN::Meta::Converter version 2.141520",
    "license" : [
       "unknown"
    ],
@@ -22,18 +22,20 @@
    "prereqs" : {
       "build" : {
          "requires" : {
-            "ExtUtils::MakeMaker" : 0
+            "ExtUtils::MakeMaker" : "0"
          }
       },
       "configure" : {
          "requires" : {
-            "ExtUtils::MakeMaker" : 0
+            "ExtUtils::MakeMaker" : "0"
          }
       },
       "runtime" : {
-         "requires" : {}
+         "requires" : {
+            "Unicode::CaseFold" : "1.00"
+         }
       }
    },
    "release_status" : "stable",
-   "version" : "0.14"
+   "version" : "0.15"
 }
@@ -3,19 +3,20 @@ abstract: 'Ngram analysis of text'
 author:
   - 'Alberto Simoes <ambs@cpan.org>'
 build_requires:
-  ExtUtils::MakeMaker: 0
+  ExtUtils::MakeMaker: '0'
 configure_requires:
-  ExtUtils::MakeMaker: 0
+  ExtUtils::MakeMaker: '0'
 dynamic_config: 1
-generated_by: 'ExtUtils::MakeMaker version 6.62, CPAN::Meta::Converter version 2.113640'
+generated_by: 'ExtUtils::MakeMaker version 6.98, CPAN::Meta::Converter version 2.141520'
 license: unknown
 meta-spec:
   url: http://module-build.sourceforge.net/META-spec-v1.4.html
-  version: 1.4
+  version: '1.4'
 name: Text-Ngram
 no_index:
   directory:
     - t
     - inc
-requires: {}
-version: 0.14
+requires:
+  Unicode::CaseFold: '1.00'
+version: '0.15'
@@ -4,7 +4,9 @@ use ExtUtils::MakeMaker;
 WriteMakefile(
     'NAME'		=> 'Text::Ngram',
     'VERSION_FROM'	=> 'lib/Text/Ngram.pm',
-    'PREREQ_PM'		=> {},
+    'PREREQ_PM'		=> {
+    	'Unicode::CaseFold' => '1.00',
+    	},
     ($] >= 5.005 ? 
       (ABSTRACT_FROM => 'lib/Text/Ngram.pm', 
        AUTHOR     => 'Alberto Simoes <ambs@cpan.org>') : ()),
@@ -21,6 +21,7 @@ void _process_buffer(pTHX_ SV* sv, unsigned int window, HV** counts_hv) {
         unsigned int c;
         len = sv_len_utf8(sv);
         unsigned int windows = (len < window) ? 0 : len - window + 1;
+
         while (windows--) {
             cur = next = buffer + UTF8SKIP(buffer);
             for (c = window - 1;  c--; cur += UTF8SKIP(cur)) ;
@@ -4,6 +4,8 @@ use 5.008008;
 use strict;
 use warnings;
 
+use Unicode::CaseFold;
+
 require Exporter;
 
 our @ISA = qw(Exporter);
@@ -11,7 +13,7 @@ our %EXPORT_TAGS = ( 'all' => [ qw( ngram_counts add_to_counts) ] );
 our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );
 our @EXPORT = qw();
 
-our $VERSION = '0.14';
+our $VERSION = '0.15';
 
 =head1 NAME
 
@@ -56,15 +58,15 @@ XSLoader::load('Text::Ngram', $VERSION);
 sub _clean_buffer {
     my %config = %{+shift};
     my $buffer = shift;
-    $buffer = lc $buffer if $config{lowercase};
+    $buffer = fc $buffer if $config{lowercase};
     $buffer =~ s/\s+/ /g;
     unless ($config{punctuation}) {
-      if ($config{flankbreaks}) {
-        $buffer =~ s/[^[:alpha:] ]+/ \xff /g;
-      }
-      else {
-        $buffer =~ s/[^[:alpha:] ]+/\xff/g;
-      }
+        if ($config{flankbreaks}) {
+            $buffer =~ s/[^[:alpha:] ]+/ \xff /g;
+        }
+        else {
+            $buffer =~ s/[^[:alpha:] ]+/\xff/g;
+        }
     }
     $buffer =~ y/ / /s;
     return $buffer;
@@ -154,11 +156,11 @@ avoid calculating the same thing twice is probably this:
 
 sub ngram_counts {
     my %config = (
-        spaces => 1,
-        punctuation => 0,
-        lowercase => 1,
-        flankbreaks => 1
-        );
+                  spaces => 1,
+                  punctuation => 0,
+                  lowercase => 1,
+                  flankbreaks => 1
+                 );
     if (ref($_[0]) eq 'HASH') {
         %config = (%config, %{+shift});
     }