package Lingua::SA;
use 5.008;
use strict;
use warnings;
use English qw{-no_match_vars};
use Carp;
require Exporter;
our @ISA = qw(Exporter);
# Items to export into callers namespace by default. Note: do not export
# names by default without a very good reason. Use EXPORT_OK instead.
# Do not simply export all your public functions/methods/constants.
# This allows declaration use Lingua::SA ':all';
# If you do not need this, moving things directly into @EXPORT or @EXPORT_OK
# will save memory.
our %EXPORT_TAGS = ( 'all' => [ qw(
transliterate
vibhakti
sandhi
) ] );
our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );
our @EXPORT = qw(
);
our $VERSION = '0.08';
# Preloaded methods go here.
###########################################################
sub sandhi{
my ($in)=@_;
$in=~s/ \+ ([^\[])/$1/g; # replace + and surround spaces with nothing
$in=~s/aa/A/g; # replace aa with A
$in=~s/ii/I/g; # replace ii with I
$in=~s/uu/U/g; # replace uu with U
$in=~s/Ru/R/g; # replace Ru with R
return $in;
}
###########################################################
sub vibhakti {
####### This is currently only for svaraanta (halant will be 8000+)
# USAGE: my $response=vibhakti({naam=>$noun, vibhakti=>$vibhakti,
# linga=>$linga, vachana=>$vachana});
# 2008-06-05 Fixed 3351 and 3361 from inaaH to inaH (v 0.06)
my ($arg_ref) = @_;
confess "Argument naam not passed to vibhakti()" if !defined $arg_ref->{naam};
confess "Argument linga not passed to vibhakti()" if !defined $arg_ref->{linga};
confess "Argument vibhakti not passed to vibhakti()" if !defined $arg_ref->{vibhakti};
confess "Argument vachana not passed to vibhakti()" if !defined $arg_ref->{vachana};
my ( $noun, $vibhakti, $linga, $vachana ) =
( $arg_ref -> {naam}, $arg_ref -> {vibhakti}, $arg_ref -> {linga},
$arg_ref -> {vachana});
# The last character of noun is chopped to be aakaar
# (what happens when halant nouns are included?)
$noun = sandhi($noun);
my $aakaar = chop($noun);
$vibhakti = sandhi($vibhakti);
$linga = sandhi($linga);
my %aakaar = qw(0 0 a 1 A 2 i 3 I 4 u 5 U 6 R 7);
my %linga = qw(puM 1 strI 2 napuMsaka 3 1 1 2 2 3 3);
my %vachana = qw(ekavachana 1 dvivachana 2 bahuvachana 3 1 1 2 2 3 3);
my %vibhakti = qw#prathamA 1 dvitIyA 2 tRtIyA 3 chaturthI 4 paJchamI 5
ShaShThI 6 saptamI 7 sambodhana 8
1 1 2 2 3 3 4 4 5 5 6 6 7 7 8 8#;
confess "Unsupported noun supplied to vibhakti(): $noun$aakaar ending in $aakaar"
if !defined $aakaar{$aakaar};
confess "Invalid linga $linga supplied to vibhakti()"
if !defined $linga{$linga};
confess "Invalid vibhakti $vibhakti supplied to vibhakti()"
if !defined $vibhakti{$vibhakti};
confess "Invalid vachana $vachana supplied to vibhakti()"
if !defined $vachana{$vachana};
# coef for swarAnt nouns range from 1111 to 7373
# with 7 sets of 72 coefs posible (not all taken)
my $coef =
$aakaar{$aakaar} * 1000 +
$linga{$linga} * 100 +
$vibhakti{$vibhakti} * 10 +
$vachana{$vachana};
#### 1000 through 7000 aakaar. 1=a 2=A 3=i 4=I 5=u 6=U 7=Ru
##### 100 puM 200 strI 300 na
###### 10 through 80 8 vibhakti (8th being sambodhan)
####### 1 through 3 eka, dwi, bahuvachan
## possibilities for nouns are in the following series:
# Masculine:1100, 3100, 5100, 7100 # examples of 2100?
# Feminine: 2200, 3200, 4200, 5200, 6200, 7200
# Neutar: 1300, 3300, 5300, 7300
# 2100, 4100 exist, but I do not know the examples.
# not sure of 6100, 6300
# 1200, 2300, 4300 do not exist
# These are the noun suffixes
my %ending = qw(
1111 aH 1112 au 1113 AH
1121 am 1122 au 1123 An
1131 ena 1132 Abhyaam 1133 aiH
1141 Aya 1142 Abhyaam 1143 ebhyaH
1151 At 1152 Abhyaam 1153 ebhyaH
1161 asya 1162 ayoH 1163 Anaam
1171 e 1172 ayoH 1173 eSu
1181 a 1182 au 1183 AH
2111 AH 2112 au 2113 AH
2121 Am 2122 au 2123 aH
2131 A 2132 Abhyaam 2133 AbhiH
2141 e 2142 Abhyaam 2143 AbhyaH
2151 aH 2152 Abhyaam 2153 AbhyaH
2161 aH 2162 oH 2163 Am
2171 i 2172 oH 2173 Asu
2181 Am 2182 au 2183 AH
3111 iH 3112 I 3113 ayaH
3121 im 3122 I 3123 In
3131 inaa 3132 ibhyaam 3133 ibhiH
3141 aye 3142 ibhyaam 3143 ibhyaH
3151 eH 3152 ibhyaam 3153 ibhyaH
3161 eH 3162 yoH 3163 Inaam
3171 au 3172 yoH 3173 iSu
3181 e 3182 I 3183 ayaH
5111 uH 5112 U 5113 avaH
5121 um 5122 U 5123 Un
5131 unaa 5132 ubhyaam 5133 ubhiH
5141 ave 5142 ubhyaam 5143 ubhyaH
5151 oH 5152 ubhyaam 5153 ubhyaH
5161 oH 5162 voH 5163 Unaam
5171 au 5172 voH 5173 uSu
5181 o 5182 U 5183 avaH
7111 A 7112 Arau 7113 AraH
7121 Aram 7122 Arau 7123 RRn
7131 raa 7132 Rbhyaam 7133 RbhiH
7141 re 7142 Rbhyaam 7143 RbhyaH
7151 uH 7152 Rbhyaam 7153 RbhyaH
7161 uH 7162 roH 7163 RRNaam
7171 ari 7172 roH 7173 RSu
7181 aH|ar 7182 Arau 7183 AraH
2211 A 2212 e 2213 AH
2221 Am 2222 e 2223 AH
2231 ayaa 2232 Abhyaam 2233 AbhiH
2241 Ayai 2242 Abhyaam 2243 AbhyaaH
2251 AyaaH 2252 Abhyaam 2253 AbhyaaH
2261 AyaaH 2262 ayoH 2263 Anaam
2271 Ayaam 2272 ayoH 2273 Asu
2281 e 2282 e 2283 AH
3211 iH 3212 I 3213 ayaH
3221 im 3222 I 3223 IH
3231 yaa 3232 ibhyaam 3233 ibhiH
3241 yai|aye 3242 ibhyaam 3243 ibhyaH
3251 yaaH|eH 3252 ibhyaam 3253 ibhyaH
3261 yaaH|eH 3262 yoH 3263 Inaam
3271 yaam|au 3272 yoH 3273 iSu
3281 e 3282 I 3283 ayaH
4211 I 4212 yau 4213 yaH
4221 Im 4222 yau 4223 IH
4231 yaa 4232 Ibhyaam 4233 IbhiH
4241 yai 4242 Ibhyaam 4243 IbhyaH
4251 yaaH 4252 Ibhyaam 4253 IbhyaH
4261 yaaH 4262 yoH 4263 Inaam
4271 yaam 4272 yoH 4273 ISu
4281 i 4282 yau 4283 yaH
5211 uH 5212 U 5213 avaH
5221 um 5222 U 5223 UH
5231 vaa 5232 ubhyaam 5233 ubhiH
5241 ave|vai 5242 ubhyaam 5243 ubhyaH
5251 oH|vaaH 5252 ubhyaam 5253 ubhyaH
5261 oH|vaaH 5262 voH 5263 Unaam
5271 au|vaam 5272 voH 5273 uSu
5281 o 5282 U 5283 avaH
6211 UH 6212 vau 6213 vaH
6221 Um 6222 vau 6223 UH
6231 vaa 6232 Ubhyaam 6233 UbhiH
6241 vai 6242 Ubhyaam 6243 UbhyaH
6251 vaaH 6252 Ubhyaam 6253 UbhyaH
6261 vaaH 6262 voH 6263 Unaam
6271 vaam 6272 voH 6273 USu
6281 u 6282 vau 6283 vaH
7211 A 7212 arau 7213 araH
7221 aram 7222 arau 7223 RRH
7231 raa 7232 Rbhyaam 7233 RbhiH
7241 re 7242 Rbhyaam 7243 RbhyaH
7251 uH 7252 Rbhyaam 7253 RbhyaH
7261 uH 7262 roH 7263 RRNaam
7271 ari 7272 roH 7273 RSu
7281 aH|ar 7282 arau 7283 araH
1311 am 1312 e 1313 Ani
1321 am 1322 e 1323 Ani
1331 ena 1332 Abhyaam 1333 aiH
1341 Aya 1342 Abhyaam 1343 ebhyaH
1351 At 1352 Abhyaam 1353 ebhyaH
1361 asya 1362 ayoH 1363 Anaam
1371 e 1372 ayoH 1373 eSu
1381 a 1382 e 1383 Ani
3311 i 3312 inI 3313 Ini
3321 i 3322 inI 3323 Ini
3331 inaa 3332 ibhyaam 3333 ibhiH
3341 ine 3342 ibhyaam 3343 ibhyaH
3351 inaH 3352 ibhyaam 3353 ibhyaH
3361 inaH 3362 inoH 3363 Inaam
3371 ini 3372 inoH 3373 iSu
3381 i|e 3382 inI 3383 Ini
5311 u 5312 unI 5313 Uni
5321 u 5322 unI 5323 Uni
5331 unaa 5332 ubhyaam 5333 ubhiH
5341 une 5342 ubhyaam 5343 ubhyaH
5351 unaH 5352 ubhyaam 5353 ubhyaH
5361 unaH 5362 unoH 5363 Unaam
5371 uni 5372 unoH 5373 uSu
5381 o|u 5382 unI 5383 Uni
7311 R 7312 RNI 7313 RRNi
7321 R 7322 RNI 7323 RRNi
7331 raa|RNA 7332 Rbhyaam 7333 RbhiH
7341 re|RNe 7342 Rbhyaam 7343 RbhyaH
7351 uH|RNaH 7352 Rbhyaam 7353 RbhyaH
7361 uH|RNaH 7362 roH|RNoH 7363 RRNaam
7371 ari|RNi 7372 roH|RNoH 7373 RSu
7381 aH|R 7382 RNI 7383 RRNi
);
# Is 3263 above dirgha as stated? ## Yes, it is
confess "$linga nouns ending in $aakaar not supported"
if !defined $ending{$coef};
### This part can cater to irregular nouns
# ambA, akkA, allA have a-kaaraant sambodhana
if($noun eq "amb" or $noun eq "akk" or $noun eq "all"){
$ending{2281} = 'a';
}
my $endcoef = $ending{$coef};
# Natva results in converting n to N when an r, R, RR, or S are encountered in
# the noun, and the only letters between there and end are what are in Natva
# here (h y v k kh g gh ~N p ph b bh m and a pratyay (aa~N - not implemented)
# Additionally, n can not be halant
my $Natva = "h|y|v|k(h)?|g(h)?|G|p(h)?|b(h)?|m";
# vowel is as defined in split_word
my $vowel = "(A|H|I|M|R(R|u)?|U|a(a|i|u)?|i(i)?|e|lR|o(M)?|u(u)?|\\:|\\|(\\|)?)";
my $inflected;
### This part can be expanded to include exceptions/options
if ($noun =~ m/[rRS][$Natva|$vowel]*$/ ) {
$endcoef =~ s/n([a-zA-Z])/N$1/;
}
if ( $endcoef =~ m/\|/ ) {
my @foo = split( /\|/, $endcoef );
$inflected = "$noun + $foo[0]";
for my $counter ( 1 .. $#foo ) {
$inflected.= " | $noun + $foo[$counter]";
}
}
else {
$inflected = "$noun + $endcoef";
}
## if sambodhan, prepend he
if ( $coef % 100 > 80 ) {
if ( $endcoef =~ m/\|/ ) {
$inflected = "he \[ $inflected ]";
}
else {
$inflected = "he $inflected";
}
}
return $inflected;
} ## end sub vibhakti
###############################
sub transliterate {
# Takes a string as input. Separate it into words.
# Splits each word into syllables, and for each syllable appends its
# unicode to an array that is finally flattened and returned
my ($english) = @_;
my @transliterated;
my @x = split( /\s+/, $english ); # splt input string in to words
for my $x (@x) { # get unicoded syllables for each word
push( @transliterated, map( match_code($_), split_word($x) ), " " );
}
return join( "", @transliterated ); # flatten the array before returning
}
###############################
sub match_code {
my ($syllable_mcc) = @_;
my %letter_codes = (
"~a", "अ", "~aa", "आ", "~A", "आ",
"~i", "इ", "~ii", "ई", "~uu", "ऊ",
"ii", "ी", "~I", "ई", "~u", "उ",
"~U", "ऊ", "~R", "ऋ", "~Ru", "ऋ",
"~lR", "ऌ", "~RR", "ॠ", "~e", "ए",
"~ai", "ऐ", "~o", "ओ", "~au", "औ",
"a", "", "aa", "ा", "A", "ा",
"i", "ि", "I", "ी", "u", "ु",
"uu", "ू", "R", "ृ", "lR", "ॢ",
"e", "े", "ai", "ै",
"U", "ू", "R", "ृ", "Ru", "ृ",
"RR", "ॄ", "o", "ो", "au", "ौ",
"k", "क", "kh", "ख", "g", "ग",
"gh", "घ", "G", "ङ", "c", "च",
"ch", "च", "C", "छ", "Ch", "छ",
"j", "ज", "jh", "झ", "J", "ञ",
"T", "ट", "Th", "ठ", "D", "ड",
"Dh", "ढ", "N", "ण", "t", "त",
"th", "थ", "d", "द", "dh", "ध",
"n", "न", "p", "प", "ph", "फ",
"b", "ब", "bh", "भ", "m", "म",
"y", "य", "r", "र", "l", "ल",
"L", "ळ",
"v", "व", "z", "श", "sh", "श",
"S", "ष", "Sh", "ष", "s", "स",
"h", "ह", "H", "ः", ":", "ः",
"M", "ं", "|", "।", "||", "॥",
"oM", "ॐ", "~H", "ः", "~:", "ः",
"~M", "ं", "~|", "।", "~||", "॥",
"\$", "ऽ", "^", "॑", "_", "॒",
"`", "॓", "'", "॔", "\@", "॰",
"~oM", "ॐ", "*", "्", "CB", "ँ",
);
# RR 2400 lRR 2401 _lR 2402 _lRR 2403 chandra-bindu 2305
if ( defined $letter_codes{$syllable_mcc} ) {
return $letter_codes{$syllable_mcc};
}
else {
return $syllable_mcc;
}
} ## end sub match_code
########################################
sub split_word {
my ($word) = @_;
# vowels is copied as is in vibhakti
my $vowels = "(A|H|I|M|R(R|u)?|U|a(a|i|u)?|i(i)?|e|lR|o(M)?|u(u)?|\\:|\\|(\\|)?)";
my $consonants =
"(C(h|B)?|D(h)?|G|J|N|S(h)?|T(h)?|b(h)?|c(h)?|d(h)?|g(h)?|h|j(h)?|k(h)?|l|m|n|p(h)?|r|s(h)?|t(h)?|v|y|z|L)";
my @syllables;
my $vowel_start_p = 1;
my $matched;
my $index;
while ($word) { # begin out
unless ( $word =~ m/$vowels/ ) { $index = length($word); }
else { $index = length($`); }
if ( $index == 0 ) { # begin 3A
$matched = $1;
if ($vowel_start_p) { # begin 0A
push( @syllables, "~$matched" );
} # end 0A
else { # begin 0B
push( @syllables, $matched );
} # end 0B
$vowel_start_p = 1;
$word = substr( $word, length($matched) );
} # end 3A
else { # begin 3B
unless ( $word =~ m/$consonants/ ) { $index = length($word); }
else { $index = length($`); }
if ( $index == 0 ) { # begin 2A
$matched = $1;
push( @syllables, $matched );
$vowel_start_p = 0;
$word = substr( $word, length($matched) );
unless ( $word =~ m/$vowels/ ) { $index = length($word); }
else { $index = length($`); }
if ( $index or length($word) == 0 ) { # begin 1A
push( @syllables, "*" );
} # end 1A
else { # begin 1B
;
} # end 1B
} # end 2A
else { # begin 2B
push( @syllables, substr( $word, 0, 1 ) );
$word = substr( $word, 1 );
} # end 2B
} # end 3B
} # end out
return @syllables;
} ## end sub split_word
###########################
1;
__END__
# Below is stub documentation for Lingua::SA
=head1 NAME
Lingua::SA - Perl extension for the language Sanskrit
=head1 SYNOPSIS
use Lingua::SA qw(transliterate vibhakti sandhi);
print transliterate("raamaH");
# outputs: रामः
=head1 DESCRIPTION
The module exports functions to
(1) Obtain declinations of Sanskrit nouns
(2) Convert Roman into Unicode Sanskrit
print vibhakti({naam=>"raama", vibhakti=>"prathamA",
linga=>"puM", vachana=>"ekavachana"}),"\n";
# outputs: rAm + aH
print sandhi(vibhakti({naam=>"raama", vibhakti=>"prathamA",
linga=>"puM", vachana=>"ekavachana"})),"\n";
# outputs: rAmaH
print transliterate(sandhi(vibhakti({naam=>"raama",
vibhakti=>"prathamA", linga=>"puM",
vachana=>"ekavachana"}))),"\n";
or,
print transliterate(sandhi(vibhakti({naam=>"raama",
vibhakti=>1, linga=>1, vachana=>1}))),"\n";
# outputs: रामः
=head2 EXPORT
None by default.
The module optionally exports three functions:
(1) transliterate: This takes roman text as input and depicts it in UNICODE
devanAgarI. The character set used is that of Sanskrit. Ideal for use in cgi
scripts.
(2) vibhakti: This returns the declinations of nouns. Takes as input a noun
(naam), a case (vibhakti), a gender (linga) and number/plurality (vachana) and
returns the corresponding declination if the inputs are valid.
The vibhakti has to be one of:
prathamA dvitIyA tRutIyA chaturthI paJchamI ShaShThI saptamI sambodhana
Alternately, the numbers 1 through 8 are also allowed.
The vachana has to be one of: ekavachana dvivachana bahuvachana or 1 2 3
The linga has to be one of: puM strI napuMsaka or 1 2 3
The naam has to end in one of: a A i I u U R (aa, ii, uu are internally
converted to A, I, U respectively). Other endings not supported yet.
(3) sandhi: Merely "straightens" the output by doing a few minor operations to
make it look more like a proper "word". In particular, it removes ' + ' from
the "word + inflection" combinations. It also replaces aa with A, ii with I and
uu with U.
=head1 SEE ALSO
Currently write-only mailing list for feedback:
http://groups.google.com/group/sanskrit-module-feedback
You can see the module in operation through various cgi-bin links at:
http://www.astro.caltech.edu/~aam/sanskrit
You will also find there links to other Sanskrit resources including some from
which I have borrowed. You will also find there the transliteration mapping
used.
=head1 AUTHOR
Ashish Mahabal, E<lt>mahabal.ashish@gmail.comE<gt>
=head1 COPYRIGHT AND LICENSE
Copyright (C) 2008 by Ashish Mahabal
This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself, either Perl version 5.8.6 or,
at your option, any later version of Perl 5 you may have available.
=cut