The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#!/usr/bin/env perl

use 5.014;

use utf8;
use strict;
use autodie;
use warnings;
use warnings    qw< FATAL  utf8     >;
use open        qw< :std  :utf8     >;
use charnames   qw< :full >;
use feature     qw< unicode_strings >;

use re          "/msux";

#############################################

use File::Basename      qw< basename >;
use Carp                qw< carp croak confess cluck >;
use Encode              qw< encode decode >;
use Unicode::Normalize  qw< NFD NFC NFKD NFKC >;

#############################################

sub compile		    (    ); 
sub convert_to_superscripts   ( _  );
sub deQ			    ( $  );
sub deQQ		    ( $  );
sub dequeue		    ( $$ );
sub filter		    (    ); 
sub fix_encodings	    (    ); 
sub inits		    (    ); 
sub last_rites		    (    ); 
sub main		    (    ); 

#############################################

MAIN: {
    main();
    exit(0);
} 
die "NOT REACHED";

#############################################


sub fix_encodings() { 
    if (grep /\P{ASCII}/ => @ARGV) {
       @ARGV = map { decode("UTF-8", $_) } @ARGV;
    }
}

sub inits() { 
    last_rites();
    fix_encodings();
    $0 = basename($0);  # shorter messages
    $| = 1;
}

sub last_rites() { 

    $SIG{__DIE__} = sub {
	confess "Uncaught exception: @_" unless $^S;
    };

    $SIG{__WARN__} = sub {
	if ($^S) { cluck   "Trapped warning: @_" }
	else     { confess "Deadly warning: @_"  }
    };

}

sub main() { 
    inits();
    compile();
    filter();
}

sub compile() { 

    my $superscripts = q();
    my $originals    = q();

    local $_;

    binmode(DATA, ":utf8");
    while (<DATA>) {
	next if / \A [\h\v] + \z /;
	next if /  ^ \h* \N{NUMBER SIGN} /;
	chomp;
	die "bad data line: $_" unless m{
	    \A \h+

	    (?<CHAR>
		\H
	    )

	    \h+

	    (?<CPNUM>
		\p{ahex}{4,6}
	    )

	    \t

	    (?<NAME>
		(?=  \w )
		[A-Z0-9\N{SPACE}\N{HYPHEN-MINUS}] +
		(?<= \w )
	    )

	    \z
	};

	my($char, $cpnum, $name) = @+{qw[CHAR CPNUM NAME]};
	my $nfkd = NFKD($char);
	if ($char ne $nfkd && length($nfkd) == 1) { 
	    $superscripts .= $char;
	    $originals    .= $nfkd;
	}
    }

    my $code = deQ<<'LITERAL' . deQQ<<"INTERPOLATED";
		|Q|
		|Q|   use utf8;
		|Q|
		|Q|   sub convert_to_superscripts (_) {
		|Q|       confess "argcount" unless @_ == 1;
		|Q|       my $string = $_[0];
		|Q|       confess "want string" if ref $_[0];
LITERAL
	       |QQ|
	       |QQ|      \$string =~ tr[$originals][$superscripts];
	       |QQ|       return \$string;
	       |QQ|   }
	       |QQ|
	       |QQ|   'ig00'
	       |QQ|
INTERPOLATED

    eval $code || die;

}

sub filter() { 

    if (@ARGV == 0 && -t STDIN) {
	print STDERR "$0: reading from standard input\n"
	    if -t STDERR;
    }

    eval q{ 
	END { close STDOUT } 
	1;
    } || die;

    while (my $line = <>)  {
	chomp $line;
	my  $nfline = NFD($line);
	my  $superb = convert_to_superscripts($nfline);
	say $superb;
    }

}

sub dequeue($$) {
    my($leader, $body) = @_;
    $body =~ s/^\s*\Q$leader\E ?//gm;
    return $body;
}

sub deQ($) {
    my $text = $_[0];
    return dequeue q<|Q|>,  $text;
}

sub deQQ($) {
    my $text = $_[0];
    return dequeue qq<|QQ|>, $text;
}


__END__
 ⁺  207A	SUPERSCRIPT PLUS SIGN
 ⁻  207B	SUPERSCRIPT MINUS
 ⁼  207C	SUPERSCRIPT EQUALS SIGN
 ⁽  207D	SUPERSCRIPT LEFT PARENTHESIS
 ⁾  207E	SUPERSCRIPT RIGHT PARENTHESIS

 ⁰  2070	SUPERSCRIPT ZERO
 ¹  00B9	SUPERSCRIPT ONE
 ²  00B2	SUPERSCRIPT TWO
 ³  00B3	SUPERSCRIPT THREE
 ⁴  2074	SUPERSCRIPT FOUR
 ⁵  2075	SUPERSCRIPT FIVE
 ⁶  2076	SUPERSCRIPT SIX
 ⁷  2077	SUPERSCRIPT SEVEN
 ⁸  2078	SUPERSCRIPT EIGHT
 ⁹  2079	SUPERSCRIPT NINE

 ᴬ  1D2C	MODIFIER LETTER CAPITAL A
 ᵃ  1D43	MODIFIER LETTER SMALL A
 ᴭ  1D2D	MODIFIER LETTER CAPITAL AE
 ᵆ  1D46	MODIFIER LETTER SMALL TURNED AE
 ᵄ  1D44	MODIFIER LETTER SMALL TURNED A
 ᵅ  1D45	MODIFIER LETTER SMALL ALPHA
 ᶛ  1D9B	MODIFIER LETTER SMALL TURNED ALPHA
 ᴮ  1D2E	MODIFIER LETTER CAPITAL B
 ᵇ  1D47	MODIFIER LETTER SMALL B
 ᴯ  1D2F	MODIFIER LETTER CAPITAL BARRED B
 ᶜ  1D9C	MODIFIER LETTER SMALL C
 ᶝ  1D9D	MODIFIER LETTER SMALL C WITH CURL
 ᴰ  1D30	MODIFIER LETTER CAPITAL D
 ᵈ  1D48	MODIFIER LETTER SMALL D
 ᶞ  1D9E	MODIFIER LETTER SMALL ETH
 ᴱ  1D31	MODIFIER LETTER CAPITAL E
 ᵉ  1D49	MODIFIER LETTER SMALL E
 ᴲ  1D32	MODIFIER LETTER CAPITAL REVERSED E
 ᵊ  1D4A	MODIFIER LETTER SMALL SCHWA
 ᵋ  1D4B	MODIFIER LETTER SMALL OPEN E
 ᶟ  1D9F	MODIFIER LETTER SMALL REVERSED OPEN E
 ᵌ  1D4C	MODIFIER LETTER SMALL TURNED OPEN E
 ᶠ  1DA0	MODIFIER LETTER SMALL F
 ᴳ  1D33	MODIFIER LETTER CAPITAL G
 ᵍ  1D4D	MODIFIER LETTER SMALL G
 ᶢ  1DA2	MODIFIER LETTER SMALL SCRIPT G
 ˠ  02E0	MODIFIER LETTER SMALL GAMMA
 ʰ  02B0	MODIFIER LETTER SMALL H
 ᴴ  1D34	MODIFIER LETTER CAPITAL H
 ʱ  02B1	MODIFIER LETTER SMALL H WITH HOOK
 ʻ  02BB	MODIFIER LETTER TURNED COMMA
 ʽ  02BD	MODIFIER LETTER REVERSED COMMA
 ᴵ  1D35	MODIFIER LETTER CAPITAL I
 ⁱ  2071	SUPERSCRIPT LATIN SMALL LETTER I
 ᶦ  1DA6	MODIFIER LETTER SMALL CAPITAL I
 ᵎ  1D4E	MODIFIER LETTER SMALL TURNED I
 ᶤ  1DA4	MODIFIER LETTER SMALL I WITH STROKE
 ᶧ  1DA7	MODIFIER LETTER SMALL CAPITAL I WITH STROKE
 ᶥ  1DA5	MODIFIER LETTER SMALL IOTA
 ʲ  02B2	MODIFIER LETTER SMALL J
 ᴶ  1D36	MODIFIER LETTER CAPITAL J
 ᶨ  1DA8	MODIFIER LETTER SMALL J WITH CROSSED-TAIL
 ᶡ  1DA1	MODIFIER LETTER SMALL DOTLESS J WITH STROKE
 ᴷ  1D37	MODIFIER LETTER CAPITAL K
 ᵏ  1D4F	MODIFIER LETTER SMALL K
 ˡ  02E1	MODIFIER LETTER SMALL L
 ᴸ  1D38	MODIFIER LETTER CAPITAL L
 ᶫ  1DAB	MODIFIER LETTER SMALL CAPITAL L
 ᶪ  1DAA	MODIFIER LETTER SMALL L WITH PALATAL HOOK
 ᶩ  1DA9	MODIFIER LETTER SMALL L WITH RETROFLEX HOOK
 ᴹ  1D39	MODIFIER LETTER CAPITAL M
 ᵐ  1D50	MODIFIER LETTER SMALL M
 ᶬ  1DAC	MODIFIER LETTER SMALL M WITH HOOK
 ᴺ  1D3A	MODIFIER LETTER CAPITAL N
 ⁿ  207F	SUPERSCRIPT LATIN SMALL LETTER N
 ᶰ  1DB0	MODIFIER LETTER SMALL CAPITAL N
 ᴻ  1D3B	MODIFIER LETTER CAPITAL REVERSED N
 ᶮ  1DAE	MODIFIER LETTER SMALL N WITH LEFT HOOK
 ᶯ  1DAF	MODIFIER LETTER SMALL N WITH RETROFLEX HOOK
 ᵑ  1D51	MODIFIER LETTER SMALL ENG
 ᴼ  1D3C	MODIFIER LETTER CAPITAL O
 ᵒ  1D52	MODIFIER LETTER SMALL O
 ᵓ  1D53	MODIFIER LETTER SMALL OPEN O
 ᵔ  1D54	MODIFIER LETTER SMALL TOP HALF O
 ᵕ  1D55	MODIFIER LETTER SMALL BOTTOM HALF O
 ᶱ  1DB1	MODIFIER LETTER SMALL BARRED O
 ᴽ  1D3D	MODIFIER LETTER CAPITAL OU
 ᴾ  1D3E	MODIFIER LETTER CAPITAL P
 ᵖ  1D56	MODIFIER LETTER SMALL P
 ᶲ  1DB2	MODIFIER LETTER SMALL PHI
 ʳ  02B3	MODIFIER LETTER SMALL R
 ᴿ  1D3F	MODIFIER LETTER CAPITAL R
 ʴ  02B4	MODIFIER LETTER SMALL TURNED R
 ʵ  02B5	MODIFIER LETTER SMALL TURNED R WITH HOOK
 ʶ  02B6	MODIFIER LETTER SMALL CAPITAL INVERTED R
 ˢ  02E2	MODIFIER LETTER SMALL S
 ᶳ  1DB3	MODIFIER LETTER SMALL S WITH HOOK
 ᶴ  1DB4	MODIFIER LETTER SMALL ESH
 ᵀ  1D40	MODIFIER LETTER CAPITAL T
 ᵗ  1D57	MODIFIER LETTER SMALL T
 ᶵ  1DB5	MODIFIER LETTER SMALL T WITH PALATAL HOOK
 ᵁ  1D41	MODIFIER LETTER CAPITAL U
 ᵘ  1D58	MODIFIER LETTER SMALL U
 ᶸ  1DB8	MODIFIER LETTER SMALL CAPITAL U
 ᵙ  1D59	MODIFIER LETTER SMALL SIDEWAYS U
 ᶶ  1DB6	MODIFIER LETTER SMALL U BAR
 ᶣ  1DA3	MODIFIER LETTER SMALL TURNED H
 ᵚ  1D5A	MODIFIER LETTER SMALL TURNED M
 ᶭ  1DAD	MODIFIER LETTER SMALL TURNED M WITH LONG LEG
 ᶷ  1DB7	MODIFIER LETTER SMALL UPSILON
 ᵛ  1D5B	MODIFIER LETTER SMALL V
 ⱽ  2C7D	MODIFIER LETTER CAPITAL V
 ᶹ  1DB9	MODIFIER LETTER SMALL V WITH HOOK
 ᶺ  1DBA	MODIFIER LETTER SMALL TURNED V
 ʷ  02B7	MODIFIER LETTER SMALL W
 ᵂ  1D42	MODIFIER LETTER CAPITAL W
 ˣ  02E3	MODIFIER LETTER SMALL X
 ʸ  02B8	MODIFIER LETTER SMALL Y
 ᶻ  1DBB	MODIFIER LETTER SMALL Z
 ᶼ  1DBC	MODIFIER LETTER SMALL Z WITH RETROFLEX HOOK
 ᶽ  1DBD	MODIFIER LETTER SMALL Z WITH CURL
 ᶾ  1DBE	MODIFIER LETTER SMALL EZH
 ꝰ  A770	MODIFIER LETTER US

 ᵜ  1D5C	MODIFIER LETTER SMALL AIN
 ᵝ  1D5D	MODIFIER LETTER SMALL BETA
 ᵞ  1D5E	MODIFIER LETTER SMALL GREEK GAMMA
 ᵟ  1D5F	MODIFIER LETTER SMALL DELTA
 ᶿ  1DBF	MODIFIER LETTER SMALL THETA
 ᵠ  1D60	MODIFIER LETTER SMALL GREEK PHI
 ᵡ  1D61	MODIFIER LETTER SMALL CHI
 ᵸ  1D78	MODIFIER LETTER CYRILLIC EN
 ჼ  10FC	MODIFIER LETTER GEORGIAN NAR
  ٰ  0670	ARABIC LETTER SUPERSCRIPT ALEF
  ܑ  0711	SYRIAC LETTER SUPERSCRIPT ALAPH

 ˀ  02C0	MODIFIER LETTER GLOTTAL STOP
 ʼ  02BC	MODIFIER LETTER APOSTROPHE
 ˮ  02EE	MODIFIER LETTER DOUBLE APOSTROPHE
 ʾ  02BE	MODIFIER LETTER RIGHT HALF RING
 ˤ  02E4	MODIFIER LETTER SMALL REVERSED GLOTTAL STOP
 ʿ  02BF	MODIFIER LETTER LEFT HALF RING
 ˁ  02C1	MODIFIER LETTER REVERSED GLOTTAL STOP

 ՙ  0559	ARMENIAN MODIFIER LETTER LEFT HALF RING
 ⵯ  2D6F	TIFINAGH MODIFIER LETTER LABIALIZATION MARK
 ꜀  A700	MODIFIER LETTER CHINESE TONE YIN PING
 ꜁  A701	MODIFIER LETTER CHINESE TONE YANG PING
 ꜂  A702	MODIFIER LETTER CHINESE TONE YIN SHANG
 ꜃  A703	MODIFIER LETTER CHINESE TONE YANG SHANG
 ꜄  A704	MODIFIER LETTER CHINESE TONE YIN QU
 ꜅  A705	MODIFIER LETTER CHINESE TONE YANG QU
 ꜆  A706	MODIFIER LETTER CHINESE TONE YIN RU
 ꜇  A707	MODIFIER LETTER CHINESE TONE YANG RU
 ꜈  A708	MODIFIER LETTER EXTRA-HIGH DOTTED TONE BAR
 ꜉  A709	MODIFIER LETTER HIGH DOTTED TONE BAR
 ꜊  A70A	MODIFIER LETTER MID DOTTED TONE BAR
 ꜋  A70B	MODIFIER LETTER LOW DOTTED TONE BAR
 ꜌  A70C	MODIFIER LETTER EXTRA-LOW DOTTED TONE BAR
 ꜍  A70D	MODIFIER LETTER EXTRA-HIGH DOTTED LEFT-STEM TONE BAR
 ꜎  A70E	MODIFIER LETTER HIGH DOTTED LEFT-STEM TONE BAR
 ꜏  A70F	MODIFIER LETTER MID DOTTED LEFT-STEM TONE BAR
 ꜐  A710	MODIFIER LETTER LOW DOTTED LEFT-STEM TONE BAR
 ꜑  A711	MODIFIER LETTER EXTRA-LOW DOTTED LEFT-STEM TONE BAR
 ꜒  A712	MODIFIER LETTER EXTRA-HIGH LEFT-STEM TONE BAR
 ꜓  A713	MODIFIER LETTER HIGH LEFT-STEM TONE BAR
 ꜔  A714	MODIFIER LETTER MID LEFT-STEM TONE BAR
 ꜕  A715	MODIFIER LETTER LOW LEFT-STEM TONE BAR
 ꜖  A716	MODIFIER LETTER EXTRA-LOW LEFT-STEM TONE BAR
 ꜗ  A717	MODIFIER LETTER DOT VERTICAL BAR
 ꜘ  A718	MODIFIER LETTER DOT SLASH
 ꜙ  A719	MODIFIER LETTER DOT HORIZONTAL BAR
 ꜚ  A71A	MODIFIER LETTER LOWER RIGHT CORNER ANGLE
 ꜛ  A71B	MODIFIER LETTER RAISED UP ARROW
 ꜜ  A71C	MODIFIER LETTER RAISED DOWN ARROW
 ꜝ  A71D	MODIFIER LETTER RAISED EXCLAMATION MARK
 ꜞ  A71E	MODIFIER LETTER RAISED INVERTED EXCLAMATION MARK
 ꜟ  A71F	MODIFIER LETTER LOW INVERTED EXCLAMATION MARK
 ꜠  A720	MODIFIER LETTER STRESS AND HIGH TONE
 ꜡  A721	MODIFIER LETTER STRESS AND LOW TONE
 ꞈ  A788	MODIFIER LETTER LOW CIRCUMFLEX ACCENT
 ꞉  A789	MODIFIER LETTER COLON
 ꞊  A78A	MODIFIER LETTER SHORT EQUALS SIGN
 ː  02D0	MODIFIER LETTER TRIANGULAR COLON
 ˑ  02D1	MODIFIER LETTER HALF TRIANGULAR COLON
 ꩰ  AA70	MYANMAR MODIFIER LETTER KHAMTI REDUPLICATION
 ʹ  02B9	MODIFIER LETTER PRIME
 ʺ  02BA	MODIFIER LETTER DOUBLE PRIME
 ˂  02C2	MODIFIER LETTER LEFT ARROWHEAD
 ˃  02C3	MODIFIER LETTER RIGHT ARROWHEAD
 ˄  02C4	MODIFIER LETTER UP ARROWHEAD
 ˅  02C5	MODIFIER LETTER DOWN ARROWHEAD
 ˆ  02C6	MODIFIER LETTER CIRCUMFLEX ACCENT
 ˈ  02C8	MODIFIER LETTER VERTICAL LINE
 ˉ  02C9	MODIFIER LETTER MACRON
 ˊ  02CA	MODIFIER LETTER ACUTE ACCENT
 ˋ  02CB	MODIFIER LETTER GRAVE ACCENT
 ˌ  02CC	MODIFIER LETTER LOW VERTICAL LINE
 ˍ  02CD	MODIFIER LETTER LOW MACRON
 ˎ  02CE	MODIFIER LETTER LOW GRAVE ACCENT
 ˏ  02CF	MODIFIER LETTER LOW ACUTE ACCENT
 ˒  02D2	MODIFIER LETTER CENTRED RIGHT HALF RING
 ˓  02D3	MODIFIER LETTER CENTRED LEFT HALF RING
 ˔  02D4	MODIFIER LETTER UP TACK
 ˕  02D5	MODIFIER LETTER DOWN TACK
 ˖  02D6	MODIFIER LETTER PLUS SIGN
 ˗  02D7	MODIFIER LETTER MINUS SIGN
 ˞  02DE	MODIFIER LETTER RHOTIC HOOK
 ˟  02DF	MODIFIER LETTER CROSS ACCENT
 ˥  02E5	MODIFIER LETTER EXTRA-HIGH TONE BAR
 ˦  02E6	MODIFIER LETTER HIGH TONE BAR
 ˧  02E7	MODIFIER LETTER MID TONE BAR
 ˨  02E8	MODIFIER LETTER LOW TONE BAR
 ˩  02E9	MODIFIER LETTER EXTRA-LOW TONE BAR
 ˪  02EA	MODIFIER LETTER YIN DEPARTING TONE MARK
 ˫  02EB	MODIFIER LETTER YANG DEPARTING TONE MARK
 ˬ  02EC	MODIFIER LETTER VOICING
 ˭  02ED	MODIFIER LETTER UNASPIRATED
 ˯  02EF	MODIFIER LETTER LOW DOWN ARROWHEAD
 ˰  02F0	MODIFIER LETTER LOW UP ARROWHEAD
 ˱  02F1	MODIFIER LETTER LOW LEFT ARROWHEAD
 ˲  02F2	MODIFIER LETTER LOW RIGHT ARROWHEAD
 ˳  02F3	MODIFIER LETTER LOW RING
 ˴  02F4	MODIFIER LETTER MIDDLE GRAVE ACCENT
 ˵  02F5	MODIFIER LETTER MIDDLE DOUBLE GRAVE ACCENT
 ˶  02F6	MODIFIER LETTER MIDDLE DOUBLE ACUTE ACCENT
 ˷  02F7	MODIFIER LETTER LOW TILDE
 ˸  02F8	MODIFIER LETTER RAISED COLON
 ˹  02F9	MODIFIER LETTER BEGIN HIGH TONE
 ˺  02FA	MODIFIER LETTER END HIGH TONE
 ˻  02FB	MODIFIER LETTER BEGIN LOW TONE
 ˼  02FC	MODIFIER LETTER END LOW TONE
 ˽  02FD	MODIFIER LETTER SHELF
 ˾  02FE	MODIFIER LETTER OPEN SHELF
 ˿  02FF	MODIFIER LETTER LOW LEFT ARROW
 ∇  2207	NABLA