#!/usr/bin/env perl
use 5.014;
use utf8;
use strict;
use autodie;
use warnings;
use warnings qw< FATAL utf8 >;
use open qw< :std :utf8 >;
use charnames qw< :full >;
use feature qw< unicode_strings >;
use re "/msux";
#############################################
use File::Basename qw< basename >;
use Carp qw< carp croak confess cluck >;
use Encode qw< encode decode >;
use Unicode::Normalize qw< NFD NFC NFKD NFKC >;
#############################################
sub compile ( );
sub convert_to_superscripts ( _ );
sub deQ ( $ );
sub deQQ ( $ );
sub dequeue ( $$ );
sub filter ( );
sub fix_encodings ( );
sub inits ( );
sub last_rites ( );
sub main ( );
#############################################
MAIN: {
main();
exit(0);
}
die "NOT REACHED";
#############################################
sub fix_encodings() {
if (grep /\P{ASCII}/ => @ARGV) {
@ARGV = map { decode("UTF-8", $_) } @ARGV;
}
}
sub inits() {
last_rites();
fix_encodings();
$0 = basename($0); # shorter messages
$| = 1;
}
sub last_rites() {
$SIG{__DIE__} = sub {
confess "Uncaught exception: @_" unless $^S;
};
$SIG{__WARN__} = sub {
if ($^S) { cluck "Trapped warning: @_" }
else { confess "Deadly warning: @_" }
};
}
sub main() {
inits();
compile();
filter();
}
sub compile() {
my $superscripts = q();
my $originals = q();
local $_;
binmode(DATA, ":utf8");
while (<DATA>) {
next if / \A [\h\v] + \z /;
next if / ^ \h* \N{NUMBER SIGN} /;
chomp;
die "bad data line: $_" unless m{
\A \h+
(?<CHAR>
\H
)
\h+
(?<CPNUM>
\p{ahex}{4,6}
)
\t
(?<NAME>
(?= \w )
[A-Z0-9\N{SPACE}\N{HYPHEN-MINUS}] +
(?<= \w )
)
\z
};
my($char, $cpnum, $name) = @+{qw[CHAR CPNUM NAME]};
my $nfkd = NFKD($char);
if ($char ne $nfkd && length($nfkd) == 1) {
$superscripts .= $char;
$originals .= $nfkd;
}
}
my $code = deQ<<'LITERAL' . deQQ<<"INTERPOLATED";
|Q|
|Q| use utf8;
|Q|
|Q| sub convert_to_superscripts (_) {
|Q| confess "argcount" unless @_ == 1;
|Q| my $string = $_[0];
|Q| confess "want string" if ref $_[0];
LITERAL
|QQ|
|QQ| \$string =~ tr[$originals][$superscripts];
|QQ| return \$string;
|QQ| }
|QQ|
|QQ| 'ig00'
|QQ|
INTERPOLATED
eval $code || die;
}
sub filter() {
if (@ARGV == 0 && -t STDIN) {
print STDERR "$0: reading from standard input\n"
if -t STDERR;
}
eval q{
END { close STDOUT }
1;
} || die;
while (my $line = <>) {
chomp $line;
my $nfline = NFD($line);
my $superb = convert_to_superscripts($nfline);
say $superb;
}
}
sub dequeue($$) {
my($leader, $body) = @_;
$body =~ s/^\s*\Q$leader\E ?//gm;
return $body;
}
sub deQ($) {
my $text = $_[0];
return dequeue q<|Q|>, $text;
}
sub deQQ($) {
my $text = $_[0];
return dequeue qq<|QQ|>, $text;
}
__END__
⁺ 207A SUPERSCRIPT PLUS SIGN
⁻ 207B SUPERSCRIPT MINUS
⁼ 207C SUPERSCRIPT EQUALS SIGN
⁽ 207D SUPERSCRIPT LEFT PARENTHESIS
⁾ 207E SUPERSCRIPT RIGHT PARENTHESIS
⁰ 2070 SUPERSCRIPT ZERO
¹ 00B9 SUPERSCRIPT ONE
² 00B2 SUPERSCRIPT TWO
³ 00B3 SUPERSCRIPT THREE
⁴ 2074 SUPERSCRIPT FOUR
⁵ 2075 SUPERSCRIPT FIVE
⁶ 2076 SUPERSCRIPT SIX
⁷ 2077 SUPERSCRIPT SEVEN
⁸ 2078 SUPERSCRIPT EIGHT
⁹ 2079 SUPERSCRIPT NINE
ᴬ 1D2C MODIFIER LETTER CAPITAL A
ᵃ 1D43 MODIFIER LETTER SMALL A
ᴭ 1D2D MODIFIER LETTER CAPITAL AE
ᵆ 1D46 MODIFIER LETTER SMALL TURNED AE
ᵄ 1D44 MODIFIER LETTER SMALL TURNED A
ᵅ 1D45 MODIFIER LETTER SMALL ALPHA
ᶛ 1D9B MODIFIER LETTER SMALL TURNED ALPHA
ᴮ 1D2E MODIFIER LETTER CAPITAL B
ᵇ 1D47 MODIFIER LETTER SMALL B
ᴯ 1D2F MODIFIER LETTER CAPITAL BARRED B
ᶜ 1D9C MODIFIER LETTER SMALL C
ᶝ 1D9D MODIFIER LETTER SMALL C WITH CURL
ᴰ 1D30 MODIFIER LETTER CAPITAL D
ᵈ 1D48 MODIFIER LETTER SMALL D
ᶞ 1D9E MODIFIER LETTER SMALL ETH
ᴱ 1D31 MODIFIER LETTER CAPITAL E
ᵉ 1D49 MODIFIER LETTER SMALL E
ᴲ 1D32 MODIFIER LETTER CAPITAL REVERSED E
ᵊ 1D4A MODIFIER LETTER SMALL SCHWA
ᵋ 1D4B MODIFIER LETTER SMALL OPEN E
ᶟ 1D9F MODIFIER LETTER SMALL REVERSED OPEN E
ᵌ 1D4C MODIFIER LETTER SMALL TURNED OPEN E
ᶠ 1DA0 MODIFIER LETTER SMALL F
ᴳ 1D33 MODIFIER LETTER CAPITAL G
ᵍ 1D4D MODIFIER LETTER SMALL G
ᶢ 1DA2 MODIFIER LETTER SMALL SCRIPT G
ˠ 02E0 MODIFIER LETTER SMALL GAMMA
ʰ 02B0 MODIFIER LETTER SMALL H
ᴴ 1D34 MODIFIER LETTER CAPITAL H
ʱ 02B1 MODIFIER LETTER SMALL H WITH HOOK
ʻ 02BB MODIFIER LETTER TURNED COMMA
ʽ 02BD MODIFIER LETTER REVERSED COMMA
ᴵ 1D35 MODIFIER LETTER CAPITAL I
ⁱ 2071 SUPERSCRIPT LATIN SMALL LETTER I
ᶦ 1DA6 MODIFIER LETTER SMALL CAPITAL I
ᵎ 1D4E MODIFIER LETTER SMALL TURNED I
ᶤ 1DA4 MODIFIER LETTER SMALL I WITH STROKE
ᶧ 1DA7 MODIFIER LETTER SMALL CAPITAL I WITH STROKE
ᶥ 1DA5 MODIFIER LETTER SMALL IOTA
ʲ 02B2 MODIFIER LETTER SMALL J
ᴶ 1D36 MODIFIER LETTER CAPITAL J
ᶨ 1DA8 MODIFIER LETTER SMALL J WITH CROSSED-TAIL
ᶡ 1DA1 MODIFIER LETTER SMALL DOTLESS J WITH STROKE
ᴷ 1D37 MODIFIER LETTER CAPITAL K
ᵏ 1D4F MODIFIER LETTER SMALL K
ˡ 02E1 MODIFIER LETTER SMALL L
ᴸ 1D38 MODIFIER LETTER CAPITAL L
ᶫ 1DAB MODIFIER LETTER SMALL CAPITAL L
ᶪ 1DAA MODIFIER LETTER SMALL L WITH PALATAL HOOK
ᶩ 1DA9 MODIFIER LETTER SMALL L WITH RETROFLEX HOOK
ᴹ 1D39 MODIFIER LETTER CAPITAL M
ᵐ 1D50 MODIFIER LETTER SMALL M
ᶬ 1DAC MODIFIER LETTER SMALL M WITH HOOK
ᴺ 1D3A MODIFIER LETTER CAPITAL N
ⁿ 207F SUPERSCRIPT LATIN SMALL LETTER N
ᶰ 1DB0 MODIFIER LETTER SMALL CAPITAL N
ᴻ 1D3B MODIFIER LETTER CAPITAL REVERSED N
ᶮ 1DAE MODIFIER LETTER SMALL N WITH LEFT HOOK
ᶯ 1DAF MODIFIER LETTER SMALL N WITH RETROFLEX HOOK
ᵑ 1D51 MODIFIER LETTER SMALL ENG
ᴼ 1D3C MODIFIER LETTER CAPITAL O
ᵒ 1D52 MODIFIER LETTER SMALL O
ᵓ 1D53 MODIFIER LETTER SMALL OPEN O
ᵔ 1D54 MODIFIER LETTER SMALL TOP HALF O
ᵕ 1D55 MODIFIER LETTER SMALL BOTTOM HALF O
ᶱ 1DB1 MODIFIER LETTER SMALL BARRED O
ᴽ 1D3D MODIFIER LETTER CAPITAL OU
ᴾ 1D3E MODIFIER LETTER CAPITAL P
ᵖ 1D56 MODIFIER LETTER SMALL P
ᶲ 1DB2 MODIFIER LETTER SMALL PHI
ʳ 02B3 MODIFIER LETTER SMALL R
ᴿ 1D3F MODIFIER LETTER CAPITAL R
ʴ 02B4 MODIFIER LETTER SMALL TURNED R
ʵ 02B5 MODIFIER LETTER SMALL TURNED R WITH HOOK
ʶ 02B6 MODIFIER LETTER SMALL CAPITAL INVERTED R
ˢ 02E2 MODIFIER LETTER SMALL S
ᶳ 1DB3 MODIFIER LETTER SMALL S WITH HOOK
ᶴ 1DB4 MODIFIER LETTER SMALL ESH
ᵀ 1D40 MODIFIER LETTER CAPITAL T
ᵗ 1D57 MODIFIER LETTER SMALL T
ᶵ 1DB5 MODIFIER LETTER SMALL T WITH PALATAL HOOK
ᵁ 1D41 MODIFIER LETTER CAPITAL U
ᵘ 1D58 MODIFIER LETTER SMALL U
ᶸ 1DB8 MODIFIER LETTER SMALL CAPITAL U
ᵙ 1D59 MODIFIER LETTER SMALL SIDEWAYS U
ᶶ 1DB6 MODIFIER LETTER SMALL U BAR
ᶣ 1DA3 MODIFIER LETTER SMALL TURNED H
ᵚ 1D5A MODIFIER LETTER SMALL TURNED M
ᶭ 1DAD MODIFIER LETTER SMALL TURNED M WITH LONG LEG
ᶷ 1DB7 MODIFIER LETTER SMALL UPSILON
ᵛ 1D5B MODIFIER LETTER SMALL V
ⱽ 2C7D MODIFIER LETTER CAPITAL V
ᶹ 1DB9 MODIFIER LETTER SMALL V WITH HOOK
ᶺ 1DBA MODIFIER LETTER SMALL TURNED V
ʷ 02B7 MODIFIER LETTER SMALL W
ᵂ 1D42 MODIFIER LETTER CAPITAL W
ˣ 02E3 MODIFIER LETTER SMALL X
ʸ 02B8 MODIFIER LETTER SMALL Y
ᶻ 1DBB MODIFIER LETTER SMALL Z
ᶼ 1DBC MODIFIER LETTER SMALL Z WITH RETROFLEX HOOK
ᶽ 1DBD MODIFIER LETTER SMALL Z WITH CURL
ᶾ 1DBE MODIFIER LETTER SMALL EZH
ꝰ A770 MODIFIER LETTER US
ᵜ 1D5C MODIFIER LETTER SMALL AIN
ᵝ 1D5D MODIFIER LETTER SMALL BETA
ᵞ 1D5E MODIFIER LETTER SMALL GREEK GAMMA
ᵟ 1D5F MODIFIER LETTER SMALL DELTA
ᶿ 1DBF MODIFIER LETTER SMALL THETA
ᵠ 1D60 MODIFIER LETTER SMALL GREEK PHI
ᵡ 1D61 MODIFIER LETTER SMALL CHI
ᵸ 1D78 MODIFIER LETTER CYRILLIC EN
ჼ 10FC MODIFIER LETTER GEORGIAN NAR
ٰ 0670 ARABIC LETTER SUPERSCRIPT ALEF
ܑ 0711 SYRIAC LETTER SUPERSCRIPT ALAPH
ˀ 02C0 MODIFIER LETTER GLOTTAL STOP
ʼ 02BC MODIFIER LETTER APOSTROPHE
ˮ 02EE MODIFIER LETTER DOUBLE APOSTROPHE
ʾ 02BE MODIFIER LETTER RIGHT HALF RING
ˤ 02E4 MODIFIER LETTER SMALL REVERSED GLOTTAL STOP
ʿ 02BF MODIFIER LETTER LEFT HALF RING
ˁ 02C1 MODIFIER LETTER REVERSED GLOTTAL STOP
ՙ 0559 ARMENIAN MODIFIER LETTER LEFT HALF RING
ⵯ 2D6F TIFINAGH MODIFIER LETTER LABIALIZATION MARK
꜀ A700 MODIFIER LETTER CHINESE TONE YIN PING
꜁ A701 MODIFIER LETTER CHINESE TONE YANG PING
꜂ A702 MODIFIER LETTER CHINESE TONE YIN SHANG
꜃ A703 MODIFIER LETTER CHINESE TONE YANG SHANG
꜄ A704 MODIFIER LETTER CHINESE TONE YIN QU
꜅ A705 MODIFIER LETTER CHINESE TONE YANG QU
꜆ A706 MODIFIER LETTER CHINESE TONE YIN RU
꜇ A707 MODIFIER LETTER CHINESE TONE YANG RU
꜈ A708 MODIFIER LETTER EXTRA-HIGH DOTTED TONE BAR
꜉ A709 MODIFIER LETTER HIGH DOTTED TONE BAR
꜊ A70A MODIFIER LETTER MID DOTTED TONE BAR
꜋ A70B MODIFIER LETTER LOW DOTTED TONE BAR
꜌ A70C MODIFIER LETTER EXTRA-LOW DOTTED TONE BAR
꜍ A70D MODIFIER LETTER EXTRA-HIGH DOTTED LEFT-STEM TONE BAR
꜎ A70E MODIFIER LETTER HIGH DOTTED LEFT-STEM TONE BAR
꜏ A70F MODIFIER LETTER MID DOTTED LEFT-STEM TONE BAR
꜐ A710 MODIFIER LETTER LOW DOTTED LEFT-STEM TONE BAR
꜑ A711 MODIFIER LETTER EXTRA-LOW DOTTED LEFT-STEM TONE BAR
꜒ A712 MODIFIER LETTER EXTRA-HIGH LEFT-STEM TONE BAR
꜓ A713 MODIFIER LETTER HIGH LEFT-STEM TONE BAR
꜔ A714 MODIFIER LETTER MID LEFT-STEM TONE BAR
꜕ A715 MODIFIER LETTER LOW LEFT-STEM TONE BAR
꜖ A716 MODIFIER LETTER EXTRA-LOW LEFT-STEM TONE BAR
ꜗ A717 MODIFIER LETTER DOT VERTICAL BAR
ꜘ A718 MODIFIER LETTER DOT SLASH
ꜙ A719 MODIFIER LETTER DOT HORIZONTAL BAR
ꜚ A71A MODIFIER LETTER LOWER RIGHT CORNER ANGLE
ꜛ A71B MODIFIER LETTER RAISED UP ARROW
ꜜ A71C MODIFIER LETTER RAISED DOWN ARROW
ꜝ A71D MODIFIER LETTER RAISED EXCLAMATION MARK
ꜞ A71E MODIFIER LETTER RAISED INVERTED EXCLAMATION MARK
ꜟ A71F MODIFIER LETTER LOW INVERTED EXCLAMATION MARK
꜠ A720 MODIFIER LETTER STRESS AND HIGH TONE
꜡ A721 MODIFIER LETTER STRESS AND LOW TONE
ꞈ A788 MODIFIER LETTER LOW CIRCUMFLEX ACCENT
꞉ A789 MODIFIER LETTER COLON
꞊ A78A MODIFIER LETTER SHORT EQUALS SIGN
ː 02D0 MODIFIER LETTER TRIANGULAR COLON
ˑ 02D1 MODIFIER LETTER HALF TRIANGULAR COLON
ꩰ AA70 MYANMAR MODIFIER LETTER KHAMTI REDUPLICATION
ʹ 02B9 MODIFIER LETTER PRIME
ʺ 02BA MODIFIER LETTER DOUBLE PRIME
˂ 02C2 MODIFIER LETTER LEFT ARROWHEAD
˃ 02C3 MODIFIER LETTER RIGHT ARROWHEAD
˄ 02C4 MODIFIER LETTER UP ARROWHEAD
˅ 02C5 MODIFIER LETTER DOWN ARROWHEAD
ˆ 02C6 MODIFIER LETTER CIRCUMFLEX ACCENT
ˈ 02C8 MODIFIER LETTER VERTICAL LINE
ˉ 02C9 MODIFIER LETTER MACRON
ˊ 02CA MODIFIER LETTER ACUTE ACCENT
ˋ 02CB MODIFIER LETTER GRAVE ACCENT
ˌ 02CC MODIFIER LETTER LOW VERTICAL LINE
ˍ 02CD MODIFIER LETTER LOW MACRON
ˎ 02CE MODIFIER LETTER LOW GRAVE ACCENT
ˏ 02CF MODIFIER LETTER LOW ACUTE ACCENT
˒ 02D2 MODIFIER LETTER CENTRED RIGHT HALF RING
˓ 02D3 MODIFIER LETTER CENTRED LEFT HALF RING
˔ 02D4 MODIFIER LETTER UP TACK
˕ 02D5 MODIFIER LETTER DOWN TACK
˖ 02D6 MODIFIER LETTER PLUS SIGN
˗ 02D7 MODIFIER LETTER MINUS SIGN
˞ 02DE MODIFIER LETTER RHOTIC HOOK
˟ 02DF MODIFIER LETTER CROSS ACCENT
˥ 02E5 MODIFIER LETTER EXTRA-HIGH TONE BAR
˦ 02E6 MODIFIER LETTER HIGH TONE BAR
˧ 02E7 MODIFIER LETTER MID TONE BAR
˨ 02E8 MODIFIER LETTER LOW TONE BAR
˩ 02E9 MODIFIER LETTER EXTRA-LOW TONE BAR
˪ 02EA MODIFIER LETTER YIN DEPARTING TONE MARK
˫ 02EB MODIFIER LETTER YANG DEPARTING TONE MARK
ˬ 02EC MODIFIER LETTER VOICING
˭ 02ED MODIFIER LETTER UNASPIRATED
˯ 02EF MODIFIER LETTER LOW DOWN ARROWHEAD
˰ 02F0 MODIFIER LETTER LOW UP ARROWHEAD
˱ 02F1 MODIFIER LETTER LOW LEFT ARROWHEAD
˲ 02F2 MODIFIER LETTER LOW RIGHT ARROWHEAD
˳ 02F3 MODIFIER LETTER LOW RING
˴ 02F4 MODIFIER LETTER MIDDLE GRAVE ACCENT
˵ 02F5 MODIFIER LETTER MIDDLE DOUBLE GRAVE ACCENT
˶ 02F6 MODIFIER LETTER MIDDLE DOUBLE ACUTE ACCENT
˷ 02F7 MODIFIER LETTER LOW TILDE
˸ 02F8 MODIFIER LETTER RAISED COLON
˹ 02F9 MODIFIER LETTER BEGIN HIGH TONE
˺ 02FA MODIFIER LETTER END HIGH TONE
˻ 02FB MODIFIER LETTER BEGIN LOW TONE
˼ 02FC MODIFIER LETTER END LOW TONE
˽ 02FD MODIFIER LETTER SHELF
˾ 02FE MODIFIER LETTER OPEN SHELF
˿ 02FF MODIFIER LETTER LOW LEFT ARROW
∇ 2207 NABLA