| <?xml version="1.0" encoding="UTF-8" ?> |
| <!DOCTYPE supplementalData SYSTEM "../../common/dtd/ldmlSupplemental.dtd"> |
| <!-- |
| Copyright © 1991-2016 Unicode, Inc. |
| CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/) |
| For terms of use, see http://www.unicode.org/copyright.html |
| --> |
| <supplementalData> |
| <version number="$Revision$"/> |
| <transforms> |
| <transform source="und_FONIPA" target="ar" direction="forward" alias="ar-t-und-fonipa"> |
| <tRule><![CDATA[ |
| # Vowels |
| # ------ |
| # In these rules, we produce ي و ا both for short and for long vowels. |
| # This would be wrong for writing Arabic, but when transliterating |
| # foreign words and names, it is strongly preferred to vowel marks. |
| # However, we emit short schwa [ə] and a few other, schwa-like vowels. |
| |
| $IVowel = [i ɪ e {e̞}]; |
| $UVowel = [y {ɨ} {ʉ} ɯ u ʏ {ɪ̈} {ʊ̈} {ɯ̽} {ʊ} ø ɤ o {ø̞} {ɤ̞} {o̞} ɞ ɔ w {w̥} ʍ ʷ]; |
| $AVowel = [ɛ œ ɜ ʌ æ ɐ a ɶ {ä} {ɒ̈} ɑ ɒ]; |
| $SchwaVowel = [ɘ ɵ ə {ɵ̞}]; |
| $Vowel = [$IVowel $UVowel $AVowel $SchwaVowel]; |
| $Click = [ʘ ɋ ǀ ʇ ǃ ʗ ǂ ʄ ǁ ʖ]; |
| $Boundary = [^[:L:][:M:][:N:]]; |
| |
| ::NFD; |
| [ʰ ʱ ʼ ̃ ̰ ̋ ́ ̄ ̀ ̏ ̌ ̂ ˥ ˦ ˧ ˨ ˩ ꜜ ꜛ ↗ ↘ ͡ ͜ ̯] → ; |
| ʲ → j; |
| ᵐ → m; |
| ⁿ → n; |
| ᵑ → ŋ; |
| ::NFC; |
| |
| # TODO: Diphthongs probably need more work. |
| |
| # Romanian [sekujesk] → [sekujask], for emitting سيكوياسك not سيكويسك |
| $UVowel [j $IVowel] [e {e̞} $SchwaVowel] → uia; |
| |
| # Kazakh Аягөз [ɑjɑɡy̯ʉz] → [ɑjɑɡiuz], to emit TODO |
| yʉ → iu; |
| |
| ::NULL; |
| |
| # Vowels |
| $Boundary {ʔ? $IVowel ː} → إِي; |
| $Boundary {ʔ? $IVowel} → إِ; |
| {$IVowel ʔ} $Boundary → ئ; |
| {$IVowel ː ʔ} $Boundary → يء; |
| {$IVowel ː ʔ} [$Vowel] → ئ; |
| $IVowel ː? → ي; |
| |
| $Boundary {ʔ? $UVowel ː} → أو; |
| $Boundary {ʔ? $UVowel} → أ; |
| {$UVowel ʔ} $Boundary → ؤ; |
| {$UVowel ː ʔ} $Boundary → وء; |
| $UVowel ː? → و; |
| |
| $Boundary {ʔ? $AVowel ː} → آ; |
| $Boundary {ʔ? $AVowel} → أ; |
| {$AVowel ʔ} $Boundary → أ; |
| {$AVowel ː ʔ} $Boundary → اء; |
| $AVowel ː? ʔ $AVowel ː? → اءا; |
| $AVowel ː? → ا; |
| |
| $Boundary {ʔ? $SchwaVowel ː} → إِي; |
| $Boundary {ʔ? $SchwaVowel} → أ; |
| $SchwaVowel ː → ي; |
| $SchwaVowel → ; |
| |
| # TODO: Handle glottal stop. |
| ʔ → ; |
| |
| # Shadda for long (geminated) consonants |
| ː → ّ; |
| |
| # Affricates |
| [{t͡ʃ} ʧ] → تْش; |
| |
| # Clicks |
| [ɡ g ɠ k] $Click → كْش; |
| $Click → تْش; |
| |
| # Nasal stops |
| [{m̥} m ɱ] → م; |
| [{n̼̊} {n̼} {n̥} n {ɳ̊} ɳ {ɲ̊} {ɲ̥} ɲ] → ن; |
| [{ŋ̊} ŋ {ɴ̥} ɴ] k → نك; |
| [{ŋ̊} ŋ {ɴ̥} ɴ] [ɡ g ɠ]? → نْغ; |
| |
| # Non-nasal stops |
| [p b {p̪} {b̪} ɓ] → ب; |
| [{d̼} d ɗ ᶑ] → د; |
| [{t̼} t] → ت; |
| [ʈ] → ط; |
| [ɖ] → ض; |
| c → تْش; |
| ɟ → دج; |
| k → ك; |
| [ɡ g ɠ] → غ; |
| [q ɢ ʡ ʛ] → ق; |
| |
| # Sibilant fricatives |
| s → س; |
| z → ز; |
| [ʃ ʂ ɕ ʄ] → ش; |
| [ʒ ʐ ʑ] → ج; |
| |
| # Non-sibilant fricatives |
| [ɸ f v] → ف; |
| β → ب; |
| [{θ̼} θ {θ̱}] → ث; |
| [{ð̼} ð {ð̠}] → ذ; |
| ç → ش; |
| ʝ $IVowel? ː? → ي; |
| [x χ] → خ; |
| [ɣ ʁ] → غ; |
| ħ → ح; |
| ʕ → ع; |
| [h ɦ {ʔ̞}] → ه; |
| |
| # Approximants, trills, flaps |
| ʋ → و; |
| ʙ → بر; |
| {r̝} → رش; |
| [{ɹ̥} {ɹ} {ɻ̊} {ɻ} {ɾ̥} ɾ {ɽ̊} ɽ {r̼} {r̥} r] → ر; |
| [{ʀ̥} ʀ] → غ; |
| ʜ → ح; |
| ʢ → ع; |
| j $IVowel? ː? → ي; |
| |
| # Laterals |
| ɬ → شْل; |
| ɮ → جْل; |
| {[{ʎ̥} ʎ]} [^ $IVowel j ʝ] → لي; |
| [{l̼} {l̥} l {ɭ̊} ɭ {ʎ̥} ʎ] → ل; |
| [ʟ {ʟ̠}] → غ; |
| |
| # Independent pass for misc cleanup. |
| ::NULL; |
| |
| # Strip off syllable markers |
| \. → ; |
| |
| # Sequences of three or more ووو look very confusing; we shorten them. |
| # Polish Darłowo [darwɔvɔ] → داروو → داروووو |
| ووو+ → وو; |
| ]]></tRule> |
| </transform> |
| </transforms> |
| </supplementalData> |