The London Perl and Raku Workshop takes place on 26th Oct 2024. If your company depends on Perl, please consider sponsoring and/or attending.
# Copyright 2007, 2008, 2009, 2010, 2011 Kevin Ryde

# This file is part of Chart.
#
# Chart is free software; you can redistribute it and/or modify it under the
# terms of the GNU General Public License as published by the Free Software
# Foundation; either version 3, or (at your option) any later version.
#
# Chart is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
# details.
#
# You should have received a copy of the GNU General Public License along
# with Chart.  If not, see <http://www.gnu.org/licenses/>.


package App::Chart::Texinfo::Util;
use 5.004;
use strict;
use warnings;
use Exporter;

use vars qw(@ISA @EXPORT_OK %EXPORT_TAGS $VERSION);
@ISA = qw(Exporter);
@EXPORT_OK = qw(node_to_html_anchor);
%EXPORT_TAGS = (all => \@EXPORT_OK);


# Nodes "HTML Xref Node Name Expansion"
# and   "HTML Xref 8-bit Character Expansion" in the Texinfo manual
# Given $node is wide-char.
#
sub node_to_html_anchor {
  my ($node) = @_;

  # precomposed characters where possible
  if ($node =~ /[^[:ascii:]]/) {
    require Unicode::Normalize;
    $node = Unicode::Normalize::NFC ($node);
  }

  # rule 3 multiple space,tab,newline become one space
  $node =~ s/[ \t\n]+/ /g;

  # rule 4 lose leading and trailing space
  $node =~ s/^ +//;
  $node =~ s/ +$//;

  # rule 6 chars except ascii alnum and the "-" (just inserted) become hex
  $node =~ s/([^ A-Za-z0-9])/_escape_char($1)/ge;

  # rule 5 remaining spaces become dashes
  $node =~ tr/ /-/;

  # rule 7 prepend "g_t" if doesn't begin with alpha
  if ($node =~ /^[^A-Za-z]/) {
    $node = 'g_t' . $node;
  }
  return $node;
}
# ENHANCE-ME: For EBCDIC presumably a UTF-EBCDIC -> unicode conversion is
# needed here, instead of just ord().
sub _escape_char {
  my ($c) = @_; # single-char string
  $c = ord($c);
  if ($c <= 0xFFFF) {
    return sprintf ('_%04x', $c);
  } elsif ($c <= 0xFF_FFFF) {
    return sprintf ('__%06x', $c);
  }
}

1;
__END__

=for stopwords texinfo Texinfo utf unicode

=head1 NAME

App::Chart::Texinfo::Util -- some texinfo utilities

=for test_synopsis my ($anchor, $node)

=head1 SYNOPSIS

 use App::Chart::Texinfo::Util;
 $anchor = App::Chart::Texinfo::Util::node_to_html_anchor ($node);

 # or imported
 use App::Chart::Texinfo::Util ':all';
 $anchor = node_to_html_anchor ($node);

=head1 DESCRIPTION

A function which hasn't found a better place to live yet.

=head1 FUNCTIONS

=over 4

=item C<$string = App::Chart::Texinfo::Util::node_to_html_anchor ($node)>

Return a HTML anchor for a Texinfo node name, as per anchor generation
specified in the Texinfo manual "HTML Xref Node Name Expansion" and "HTML
Xref 8-bit Character Expansion".  It encodes various spaces and
non-alphanumeric characters as hexadecimal "_HHHH" sequences.  For example,

    App::Chart::Texinfo::Util::node_to_html_anchor ('My Node-Name')
    # returns 'My-Node_002dName'

Perl utf8 wide-char strings can be passed here.  Characters beyond 255 are
taken to be unicode and encoded as 4 or 6 hex digits per the Texinfo spec.

=back

=head1 SEE ALSO

L<Texinfo::Menus>

=cut