blob: 083c64f8a9b04a3358927b4da00ec08716bce634 [file] [log] [blame]
<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE supplementalData SYSTEM "../../common/dtd/ldmlSupplemental.dtd">
<!--
Copyright © 1991-2017 Unicode, Inc.
CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/)
For terms of use, see http://www.unicode.org/copyright.html
-->
<supplementalData>
<version number="$Revision$"/>
<transforms>
<transform source="fa" target="fa_FONIPA" direction="forward" alias="fa-fonipa-t-fa">
<tRule><![CDATA[
[\u200c \u200d] → ; # Strip off ZWJ and ZWNJ.
::NFD;
# Rewrite similarly-looking Arabic letters to Persian.
ي → ی;
ى → ی;
ك → ک;
ە → ه;
::NULL;
$VOWEL = [ َ ِ ُ ٓ ا و ی];
$BOUNDARY = [^[:L:][:M:][:N:]];
$IPA_CONSONANT = [ m n p b t d k ɡ ʔ f v s z ʃ ʒ ʁ ɢ h χ {t͡ʃ} {d͡ʒ} l ɾ ];
# Vowels
یّ → jj;
وّ → vv;
([ َ ِ ُ])ّ → ّ | $1;
َیْ → æj;
ِی → ej;
یوْ → iːv;
{یو} ه $BOUNDARY → iːv;
{یو} هٔ $BOUNDARY → iːv;
یو → juː;
َوْ → av;
# Hamza forms
ء → ʔ;
ا َ ٔ → ʔæ; # Needed because NFD reorders fatha before hamza
ا ٔ → ʔ;
و ٔ → ʔ;
ی ْ ٔ → ʔ; # Needed because NFD reorders sukun before hamza
ی ِ ٔ → ʔe; # Needed because NFD reorders kasra before hamza
ی ٔ → ʔ;
{ َ ه} $BOUNDARY → æ;
[^ːeoæ] {هٔ} $BOUNDARY → eje;
[e] {هٔ} $BOUNDARY → je;
[^ːeoæ] {ه} $BOUNDARY → e;
[e] {ه} $BOUNDARY → ;
اَ → æ;
اً $BOUNDARY → æn;
َ → æ;
یه → je;
یٰ → ɒː;
$IPA_CONSONANT {وی} $VOWEL → uːj;
# If yeh is preceded by a consonant and followed by a vowel,
# it's pronounced /iːj/, but a sukun breaks that and makes it
# be pronounced just as /j/.
$IPA_CONSONANT {\u0652 یو} → juː;
$IPA_CONSONANT {\u0652 ی} $VOWEL → j;
$IPA_CONSONANT {ی} $VOWEL → iːj;
{ی} $VOWEL → j;
ی ْ → j;
ی → iː;
$BOUNDARY {ای} → iː;
ا\u0653 → ɒː;
آ → ɒː;
اِ → e;
$BOUNDARY {اُو} → o;
اُ → o;
$BOUNDARY {او} → uː;
او → ɒːv;
ا → ɒː; # Probably [^$BOUNDARY]
ِ → e;
ه ِ ّ → hhe; # Needed because NFD moves kasra before shadda
هِ → he;
خوا → χɒː;
خوی → χiː;
{و} $VOWEL → v;
{و} ه $BOUNDARY → v;
{و} هٔ $BOUNDARY → v;
$IPA_CONSONANT {و} → uː;
$IPA_CONSONANT \u0651 {و} → uː; # shadda after a consonant
ُ{و} $IPA_CONSONANT → uː;
$BOUNDARY {و} $BOUNDARY → va;
{ ُو} $VOWEL → ov;
ُ و ٔ → oʔ;
ُو → o;
ُ → o;
# Consonants
پ → p;
ب → b;
[ت ط] → t;
د → d;
ک → k;
گ → ɡ;
ع → ʔ;
چ → t͡ʃ;
ج → d͡ʒ;
ف → f;
[س ص ث] → s;
[ز ذ ض ظ] → z;
ش → ʃ;
ژ → ʒ;
خ → χ;
غ → ʁ;
ق → ɢ;
ح → h;
م → m;
ن → n;
ه → h;
ل → l;
ر → ɾ;
ْ → ;
::NULL;
# TODO: How to handle these?
([$IPA_CONSONANT|$VOWEL]){ّ} → $1;
[ ّ ٔ ً ٰ ] → ;
::NFC;
]]></tRule>
</transform>
</transforms>
</supplementalData>