The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#!perl -w
use strict;
use Test::More tests => 50;

use Unicode::ICU::Collator qw(:constants);

# based on
# http://www.perl.com/pub/2011/08/whats-wrong-with-sort-and-how-to-fix-it.html
my @sorted_names = split /\n/, <<EOS;
Bobrowski
Bodmer
B\xf6hme
B\xf6ll
B\xf6ttcher
Borchert
Born
Brandis
Brant
EOS

my $L_bar = "\x{023d}"; # L with bar
my $m_mid_tilde = "\x{1d6f}"; # small m with middle tilde

# 325 is combining ring below
# 30A is combining ring above
my $a_under_over = "a\x{0325}\x{030A}";
my $a_over_under = "a\x{030A}\x{0325}";

{
  my $col = Unicode::ICU::Collator->new('de@collation=phonebook');
  ok($col, "make de phonebook collator");
  print "# actual: ", $col->getLocale(ULOC_ACTUAL_LOCALE()), "\n";
  print "# valid: ", $col->getLocale(ULOC_VALID_LOCALE()), "\n";
  print "# req: ", $col->getLocale(ULOC_REQUESTED_LOCALE()), "\n";
  my @names = reverse @sorted_names;
  my @sorted = sort { $col->cmp($a, $b) } @names;
  is_deeply(\@sorted, \@sorted_names, "check sorted names (cmp)");

  is($col->cmp($L_bar, $m_mid_tilde), -1, "compare sure-to-be-utf8 text");
}

{
  my $col = Unicode::ICU::Collator->new('en');
  ok($col, "make en collator");
  my $key_A1 = $col->getSortKey("A");
  ok($key_A1, "make sort key for A");
  print "# ", unpack("H*", $key_A1), "\n";

  # this was broken until I got fuzzy on the buffer lengths
  my $key_A2 = $col->getSortKey("A");
  ok($key_A2, "make sort key for A again");
  print "# ", unpack("H*", $key_A2), "\n";
  
  is($key_A1, $key_A2, "make sure A repeatably returns the same sort key");
  cmp_ok($key_A1, '!~', qr/\0/, "sort key shouldn't contain \\0");

  my $key_a = $col->getSortKey("a");
  ok($key_a, "got key for a");
  print "# ", unpack("H*", $key_a), "\n";
  isnt($key_a, $key_A1, "doesn't equal A key");

  cmp_ok($key_a, 'lt', $key_A1, "a sorts before A");

  my $L_bar_key = $col->getSortKey($L_bar);
  ok($L_bar_key, "made L_bar key");
  print "# ", unpack("H*", $L_bar_key), "\n";
  my $m_tilde_key = $col->getSortKey($m_mid_tilde);
  ok($m_tilde_key, "made m_mid_tilde_key");
  print "# ", unpack("H*", $m_tilde_key), "\n";
  cmp_ok($L_bar_key, 'lt', $m_tilde_key, "make sure they compare ok");

  is($col->getAttribute(UCOL_NORMALIZATION_MODE()), UCOL_OFF(),
     "Normalization is off by default");
  $col->setAttribute(UCOL_NORMALIZATION_MODE(), UCOL_ON());
  is($col->getAttribute(UCOL_NORMALIZATION_MODE()), UCOL_ON(),
     "now it's on");

  my $over_under_key = $col->getSortKey($a_over_under);
  ok($over_under_key, "got key for over under");
  print "# ", unpack("H*", $over_under_key), "\n";
  my $under_over_key = $col->getSortKey($a_under_over);
  ok($under_over_key, "got key for under over");
  print "# ", unpack("H*", $under_over_key), "\n";
  is($over_under_key, $under_over_key, "they should be the same");

  is($col->cmp($a_under_over, $a_over_under), 0,
     "changed mark order should be equal");

  my $m_tilde_many = $m_mid_tilde x 1000;
  my $m_tilde_many_key = $col->getSortKey($m_tilde_many);
  ok($m_tilde_many_key, "try a long sort key");
  print "# long key length: ", length $m_tilde_many_key, "\n";
}

{ # the same with getSortKey
  my $col = Unicode::ICU::Collator->new('de@collation=phonebook');
  ok($col, "make de phonebook collator");
  my @names = reverse @sorted_names;
  my @sorted = map $_->[1],
    sort { $a->[0] cmp $b->[0] }
      map [ $col->getSortKey($_), $_ ], @names;
  is_deeply(\@sorted, \@sorted_names, "check sorted names (getSortKey)");
}

{ # get rules
  {
    my $col = Unicode::ICU::Collator->new("root");
    ok($col, "make root collator");
    my $rules = $col->getRules;
    isnt($rules, "", "all collators have rules");
    ok(utf8::is_utf8($rules), "default rules should be utf8");
    is($col->getRules(UCOL_TAILORING_ONLY), "",
       "root locale has no tailoring");
  }
  {
    my $col = Unicode::ICU::Collator->new("de__phonebook");
    ok($col, "make german phonebook collator");
    my $tailor = $col->getRules(UCOL_TAILORING_ONLY);
    isnt($tailor, "", "de phonebook locale has tailoring");
    ok(utf8::is_utf8($tailor), "de tailoring should have utf8 on");
  }
}

{ # versions
  my $col = Unicode::ICU::Collator->new("en");
  my $ver_qr = qr/\A[0-9]+(\.[0-9]+)+\z/;

  my $ver = $col->getVersion;
  ok($ver, "have a version");
  like($ver, $ver_qr, "right format");

  my $uca_ver = $col->getUCAVersion;
  ok($uca_ver, "have a UCA version");
  like($uca_ver, $ver_qr, "right format");
}

{ # our various operators
  my $col = Unicode::ICU::Collator->new("en");
  print "# ", $col->getAttribute(UCOL_CASE_FIRST), "\n";
  # ignore case differences
  $col->setAttribute(UCOL_STRENGTH(), UCOL_SECONDARY());
  ok($col->eq("Test", "test"), "eq");
  ok(!$col->eq("Test", "Tast"), "!eq");
  ok($col->ne("Test", "Tast"), "ne");
  ok(!$col->ne("Test", "test"), "!ne");
  ok($col->le("Test", "test"), "le (equal");
  ok($col->le("tast", "Test"), "le (less)");
  ok(!$col->le("Test", "tast"), "!le (greater)");
  ok($col->ge("Test", "test"), "ge (equal)");
  ok($col->ge("Test", "tast"), "ge (greater)");
  ok(!$col->ge("tast", "Test"), "!ge (less)");
  ok($col->lt("tast", "Test"), "lt (less)");
  ok(!$col->lt("Test", "test"), "!lt (equal)");
  ok(!$col->lt("Test", "tast"), "!lt (greater)");
  ok($col->gt("Test", "tast"), "gt (greater)");
  ok(!$col->gt("Test", "test"), "!gt (equal)");
  ok(!$col->gt("tast", "Test"), "!gt (less)");
}