| <?xml version="1.0" encoding="UTF-8" ?> |
| <!DOCTYPE supplementalData SYSTEM "../../common/dtd/ldmlSupplemental.dtd"> |
| <!-- |
| Copyright © 1991-2013 Unicode, Inc. |
| CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/) |
| For terms of use, see http://www.unicode.org/copyright.html |
| --> |
| <supplementalData> |
| <version number="$Revision$"/> |
| <transforms> |
| <transform source="Arab" target="Latn" direction="both" alias="Arabic-Latin und-Latn-t-und-arab" backwardAlias="Latin-Arabic und-Arab-t-und-latn"> |
| <tRule><![CDATA[ |
| # Generally follows UNGEGN |
| # http://www.eki.ee/wgrs/rom1_ar.pdf |
| # Occasionally deviates in the direction of ISO 233 |
| # http://homepage.mac.com/sirbinks/pdf/Arabic.pdf |
| # a) where required for disambiguation. |
| # b) with underdot instead of cedilla for letter like SAD, |
| # since those are explicitly in Unicode for transliteration. |
| # c) with extra non-Arabic-language letters, like PEH |
| # |
| # Does *not* do assimilation of "al", nor hyphenation. |
| # While it could be done, we need to determine whether a prefix "al" could |
| # occur other than as the definite article (since no space is used). |
| :: [[:Arabic:][:block=ARABIC:][ⁿ،؛؟ـً-ٕ٠-٬۰-۹﷼ښ]] ; |
| :: NFKD (NFC); |
| $disambig = ̱ ; |
| $disambig2 = ̰ ; |
| $under = ̣ ; |
| $descender = ˌ; |
| $notAbove = [[:^ccc=0:] & [:^ccc=230:]]; |
| |
| # non-letters |
| [:Nd:]{٫}[:Nd:] ↔ [:Nd:]{','}[:Nd:] ; # ARABIC DECIMAL SEPARATOR |
| [:Nd:]{٬}[:Nd:] ↔ [:Nd:]{'.'}[:Nd:] ; # ARABIC THOUSANDS SEPARATOR |
| ٫ ↔ ',' $disambig ; # ARABIC DECIMAL SEPARATOR |
| ٬ ↔ '.' $disambig ; # ARABIC THOUSANDS SEPARATOR |
| # ٭ ↔ ; # ARABIC FIVE POINTED STAR // no need to transliterate |
| ، ↔ ',' ; # ARABIC COMMA |
| ؛ ↔ ';' ; # ARABIC SEMICOLON |
| ؟ ↔ '?' ; # ARABIC QUESTION MARK |
| ٪ ↔ '%' ; # ARABIC PERCENT SIGN |
| ۰ ↔ 0 $disambig ; # EXTENDED ARABIC-INDIC DIGIT ZERO |
| ۱ ↔ 1 $disambig ; # EXTENDED ARABIC-INDIC DIGIT ONE |
| ۲ ↔ 2 $disambig ; # EXTENDED ARABIC-INDIC DIGIT TWO |
| ۳ ↔ 3 $disambig ; # EXTENDED ARABIC-INDIC DIGIT THREE |
| ۴ ↔ 4 $disambig ; # EXTENDED ARABIC-INDIC DIGIT FOUR |
| ۵ ↔ 5 $disambig ; # EXTENDED ARABIC-INDIC DIGIT FIVE |
| ۶ ↔ 6 $disambig ; # EXTENDED ARABIC-INDIC DIGIT SIX |
| ۷ ↔ 7 $disambig ; # EXTENDED ARABIC-INDIC DIGIT SEVEN |
| ۸ ↔ 8 $disambig ; # EXTENDED ARABIC-INDIC DIGIT EIGHT |
| ۹ ↔ 9 $disambig ; # EXTENDED ARABIC-INDIC DIGIT NINE |
| ٠ ↔ 0 ; # ARABIC-INDIC DIGIT ZERO |
| ١ ↔ 1 ; # ARABIC-INDIC DIGIT ONE |
| ٢ ↔ 2 ; # ARABIC-INDIC DIGIT TWO |
| ٣ ↔ 3 ; # ARABIC-INDIC DIGIT THREE |
| ٤ ↔ 4 ; # ARABIC-INDIC DIGIT FOUR |
| ٥ ↔ 5 ; # ARABIC-INDIC DIGIT FIVE |
| ٦ ↔ 6 ; # ARABIC-INDIC DIGIT SIX |
| ٧ ↔ 7 ; # ARABIC-INDIC DIGIT SEVEN |
| ٨ ↔ 8 ; # ARABIC-INDIC DIGIT EIGHT |
| ٩ ↔ 9 ; # ARABIC-INDIC DIGIT NINE |
| |
| ؉ ↔ ‰ ; # U+0609 ARABIC-INDIC PER MILLE SIGN |
| ؊ ↔ ‱ ; # U+060A ARABIC-INDIC PER TEN THOUSAND SIGN |
| ۔ ↔ '.' ; # U+06D4 ARABIC FULL STOP |
| |
| # letters |
| # long vowels |
| َا↔ ā ; # ARABIC FATHA, ARABIC LETTER ALEF |
| ُو ↔ ū ; # ARABIC DAMMA, ARABIC LETTER WAW |
| ِي ↔ ī ; # ARABIC KASRA, ARABIC LETTER YEH |
| # longer items moved here to prevent masking |
| ث ↔ t h $disambig ; # ARABIC LETTER THEH |
| ذ ↔ d h $disambig ; # ARABIC LETTER THAL |
| ش ↔ s h $disambig ; # ARABIC LETTER SHEEN |
| ص ↔ s $under ; # ARABIC LETTER SAD |
| ض ↔ d $under ; # ARABIC LETTER DAD |
| ط ↔ t $under ; # ARABIC LETTER TAH |
| ظ ↔ z $under ; # ARABIC LETTER ZAH |
| غ ↔ g h $disambig ; # ARABIC LETTER GHAIN |
| |
| # WARNING: special case |
| # ←t, umlaut, half-ring below→ will be canonically ordered as ←t, half-ring below, umlaut→ |
| # so on the return, we have to skip over (but preserve) the half-ring below (or others like it) |
| # ةٕ ← ẗ̹ ; # LATIN SMALL LETTER T, COMBINING RIGHT HALF RING BELOW, COMBINING DIAERESIS |
| ة ↔ t ̈ ; # ARABIC LETTER TEH MARBUTA |
| ة | $1 ← t ($notAbove+) ̈ ; # ARABIC LETTER TEH MARBUTA |
| |
| # non-Arabic language |
| ژ ↔ z h $disambig ; # ARABIC LETTER JEH |
| ڭ ↔ n $disambig g ; # ARABIC LETTER NG |
| ۋ ↔ v $disambig ; # ARABIC LETTER VE |
| ی ↔ y $disambig2 ; # ARABIC LETTER FARSI YEH |
| ښ ↔ s $descender; |
| |
| # Arabic language |
| ء ↔ ʾ ; # ARABIC LETTER HAMZA |
| ا ↔ a $under; # ARABIC LETTER ALEF |
| ب ↔ b ; # ARABIC LETTER BEH |
| ت ↔ t ; # ARABIC LETTER TEH |
| ج ↔ j ; # ARABIC LETTER JEEM |
| ح ↔ h $under ; # ARABIC LETTER HAH |
| خ ↔ k h $disambig ; # ARABIC LETTER KHAH |
| د ↔ d ; # ARABIC LETTER DAL |
| ر ↔ r ; # ARABIC LETTER REH |
| ز ↔ z ; # ARABIC LETTER ZAIN |
| س ↔ s ; # ARABIC LETTER SEEN |
| ع ↔ ʿ ; # ARABIC LETTER AIN |
| ـ → ; # ARABIC TATWEEL |
| ف ↔ f ; # ARABIC LETTER FEH |
| ق ↔ q ; # ARABIC LETTER QAF |
| ک ↔ k $disambig ; # ARABIC LETTER KEHEH |
| ك ↔ k ; # ARABIC LETTER KAF |
| ل ↔ l ; # ARABIC LETTER LAM |
| م ↔ m ; # ARABIC LETTER MEEM |
| ن ↔ n ; # ARABIC LETTER NOON |
| ه ↔ h ; # ARABIC LETTER HEH |
| و ↔ w ; # ARABIC LETTER WAW |
| ى ↔ y $disambig ; # ARABIC LETTER ALEF MAKSURA |
| ي ↔ y ; # ARABIC LETTER YEH |
| ً ↔ aⁿ ; # ARABIC FATHATAN |
| ٌ ↔ uⁿ ; # ARABIC DAMMATAN |
| ٍ ↔ iⁿ ; # ARABIC KASRATAN |
| َ ↔ a ; # ARABIC FATHA |
| ُ ↔ u ; # ARABIC DAMMA |
| ِ ↔ i ; # ARABIC KASRA |
| ّ ↔ ̃ ; # ARABIC SHADDA |
| ْ ↔ ̊ ; # ARABIC SUKUN |
| |
| # special combining marks |
| ٓ ↔ ̂ ; # ARABIC MADDAH ABOVE |
| ٔ ↔ ̉ ; # ARABIC HAMZA ABOVE |
| ٕ ↔ ̹ ; # ARABIC HAMZA BELOW |
| |
| # Some non-Arabic language (not in UNGEGN) |
| پ ↔ p ; # ARABIC LETTER PEH |
| چ ↔ c h $disambig ; # ARABIC LETTER TCHEH |
| ڤ ↔ v ; # ARABIC LETTER VEH |
| # ڥ ↔ v $disambig ; # ARABIC LETTER FEH WITH THREE DOTS BELOW |
| # ڢ ↔ f $disambig ; # ARABIC LETTER FEH WITH DOT MOVED BELOW |
| گ ↔ g ; # ARABIC LETTER GAF |
| |
| # fallbacks TODO roundtrip where possible, using diacritics to distinguish |
| #https://en.wikipedia.org/wiki/Sindhi_transliteration |
| ٺ→ṭh; |
| ٿ→th; |
| ٽ→ṭ; |
| ڙ→ṛ; |
| ڦ→ph; |
| ڻ→ṇ; |
| ڱ→ṅ; |
| ڃ→ñ; |
| ڪ→k; |
| ڄ→j̈; |
| ۃ→ẖ; |
| ڳ→g̤; |
| ڍ→ḍh; |
| ڌ→dh; |
| ڏ→d̤; |
| ڊ→ḍ; |
| ڇ→ch; |
| ڀ→bh; |
| ٻ→ḇ; |
| ۽→'&'; |
| ۾→'mn'; |
| |
| #https://en.wiktionary.org/wiki/Wiktionary:Urdu_transliteration |
| ھ → ʱ ; |
| ں → ◌̃ ; |
| ے → ai ; |
| ڈ → ḍ ; |
| ڑ → ṛ ; |
| ٹ → ṭ ; |
| |
| #https://www.eki.ee/wgrs/rom2_ps.htm |
| #https://en.wikipedia.org/wiki/Pashto_alphabet |
| ټ → ṯ ; |
| ځ → dz ; |
| څ → ts ; |
| ډ → ḏ ; |
| ړ → ṟ ; |
| ږ → z͟h ; |
| ګ → g ; |
| ڼ → ṉ ; |
| ۍ → ạy ; |
| ې → e ; |
| |
| #https://www.eki.ee/wgrs/rom1_ug.pdf |
| ہ → ḥ ; |
| ە → ĥ ; |
| |
| # fallbacks |
| | s ← c } [eiy]; |
| | k ← c ; |
| | i ← e ; |
| | u ← o ; |
| | ks ← x ; |
| | n ← ⁿ; |
| :: (lower) ; |
| ::NFC (NFD); |
| :: ( [[:Latin:] [%,.0-9;?ʾ-ʿ̂-̄̈-̣̰̊-̱̹;ˌ]] ); |
| ]]></tRule> |
| </transform> |
| </transforms> |
| </supplementalData> |