| <?xml version="1.0" encoding="UTF-8" ?> |
| <!DOCTYPE supplementalData SYSTEM "../../common/dtd/ldmlSupplemental.dtd"> |
| <!-- |
| Copyright © 1991-2013 Unicode, Inc. |
| CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/) |
| For terms of use, see http://www.unicode.org/copyright.html |
| --> |
| <supplementalData> |
| <version number="$Revision$"/> |
| <transforms> |
| <transform source="ar" target="ar_Latn" variant="BGN" direction="forward" draft="contributed" alias="Arabic-Latin/BGN ar-Latn-t-ar-m0-bgn"> |
| <tRule> |
| # |
| ######################################################################## |
| # BGN/PCGN 1956 System |
| # |
| # This system was adopted by the BGN in 1946 and by the PCGN |
| # in 1956 and has been applied in the systematic romanization |
| # of geographic names in Bahrain, Egypt, Iraq, Jordan, |
| # Kuwait, Lebanon, Libya, Oman, Qatar, Saudi Arabia, Sudan, |
| # Syria, Tunisia, the United Arab Emirates, and Yemen, all |
| # of which has been covered by published BGN engineers. |
| # |
| # Originally prepared by Michael Everson <[email protected]> |
| ######################################################################## |
| # |
| # MINIMAL FILTER: Arabic-Latin |
| # |
| |
| :: [[:arabic:][:block=ARABIC:][ءآابةتثجحخدذرزسشصضطظعغفقكلمنهوىيًٌٍَُِّْ٠١٢٣٤٥٦٧٨٩ٱ]] ; |
| :: NFKD (NFC) ; |
| # |
| # |
| ######################################################################## |
| |
| |
| # |
| ######################################################################## |
| # |
| # Define All Transformation Variables |
| # |
| ######################################################################## |
| # |
| |
| $alef = ’; |
| $ayin = ‘; |
| $disambig = ̱ ; |
| # |
| # |
| # Use this $wordBoundary until bug 2034 is fixed in ICU: |
| # http://bugs.icu-project.org/cgi-bin/icu-bugs/transliterate?id=2034;expression=boundary;user=guest |
| # |
| |
| $wordBoundary = [^[:L:][:M:][:N:]] ; |
| # |
| # |
| ######################################################################## |
| |
| # non-letters |
| [:Nd:]{٫}[:Nd:] ↔ [:Nd:]{','}[:Nd:] ; # ARABIC DECIMAL SEPARATOR |
| [:Nd:]{٬}[:Nd:] ↔ [:Nd:]{'.'}[:Nd:] ; # ARABIC THOUSANDS SEPARATOR |
| ٫ ↔ ',' $disambig ; # ARABIC DECIMAL SEPARATOR |
| ٬ ↔ '.' $disambig ; # ARABIC THOUSANDS SEPARATOR |
| # ٭ ↔ ; # ARABIC FIVE POINTED STAR // no need to transliterate |
| ، ↔ ',' ; # ARABIC COMMA |
| ؛ ↔ ';' ; # ARABIC SEMICOLON |
| ؟ ↔ '?' ; # ARABIC QUESTION MARK |
| ٪ ↔ '%' ; # ARABIC PERCENT SIGN |
| ۰ ↔ 0 $disambig ; # EXTENDED ARABIC-INDIC DIGIT ZERO |
| ۱ ↔ 1 $disambig ; # EXTENDED ARABIC-INDIC DIGIT ONE |
| ۲ ↔ 2 $disambig ; # EXTENDED ARABIC-INDIC DIGIT TWO |
| ۳ ↔ 3 $disambig ; # EXTENDED ARABIC-INDIC DIGIT THREE |
| ۴ ↔ 4 $disambig ; # EXTENDED ARABIC-INDIC DIGIT FOUR |
| ۵ ↔ 5 $disambig ; # EXTENDED ARABIC-INDIC DIGIT FIVE |
| ۶ ↔ 6 $disambig ; # EXTENDED ARABIC-INDIC DIGIT SIX |
| ۷ ↔ 7 $disambig ; # EXTENDED ARABIC-INDIC DIGIT SEVEN |
| ۸ ↔ 8 $disambig ; # EXTENDED ARABIC-INDIC DIGIT EIGHT |
| ۹ ↔ 9 $disambig ; # EXTENDED ARABIC-INDIC DIGIT NINE |
| ٠ ↔ 0 ; # ARABIC-INDIC DIGIT ZERO |
| ١ ↔ 1 ; # ARABIC-INDIC DIGIT ONE |
| ٢ ↔ 2 ; # ARABIC-INDIC DIGIT TWO |
| ٣ ↔ 3 ; # ARABIC-INDIC DIGIT THREE |
| ٤ ↔ 4 ; # ARABIC-INDIC DIGIT FOUR |
| ٥ ↔ 5 ; # ARABIC-INDIC DIGIT FIVE |
| ٦ ↔ 6 ; # ARABIC-INDIC DIGIT SIX |
| ٧ ↔ 7 ; # ARABIC-INDIC DIGIT SEVEN |
| ٨ ↔ 8 ; # ARABIC-INDIC DIGIT EIGHT |
| ٩ ↔ 9 ; # ARABIC-INDIC DIGIT NINE |
| |
| # |
| ######################################################################## |
| # |
| # Rules moved to front to avoid masking |
| # |
| ######################################################################## |
| |
| |
| # |
| ######################################################################## |
| # |
| # BGN Page 8 Rule 5 |
| # |
| # The character sequences ت , كه , ته , and سه may be romanized t·h, k·h, |
| # d·h, and s·h in order to differentiate those romanizations from the |
| # digraphs th, kh, dh, and sh. |
| # |
| ######################################################################## |
| # |
| |
| ته → t·h ; # ARABIC LETTER TEH + HEH |
| كه → k·h ; # ARABIC LETTER KAF + HEH |
| ده → d·h ; # ARABIC LETTER DAL + HEH |
| سه → s·h ; # ARABIC LETTER SEEN + HEH |
| # |
| # |
| ######################################################################## |
| # |
| # End Rule 5 |
| # |
| ######################################################################## |
| |
| |
| ######################################################################## |
| |
| # |
| # |
| # BGN Page 8 Rule 9 |
| # |
| # Doubles consonant sounds are represented in Arabic script by placing |
| # a shaddah ( ّ ) over a consonant character. In romanization the letter |
| # should be doubled. [The remainder of this rule deals with the definite |
| # article and is lexical.] |
| # |
| ######################################################################## |
| # |
| |
| بّ → bb ; # ARABIC LETTER BEH + SHADDA |
| تّ → tt ; # ARABIC LETTER TEH + SHADDA |
| ثّ → thth ; # ARABIC LETTER THEH + SHADDA |
| جّ → jj ; # ARABIC LETTER JEEM + SHADDA |
| حّ → ḥḥ ; # ARABIC LETTER HAH + SHADDA |
| خّ → khkh ; # ARABIC LETTER KHAH + SHADDA |
| دّ → dd ; # ARABIC LETTER DAL + SHADDA |
| ذّ → dhdh ; # ARABIC LETTER THAL + SHADDA |
| رّ → rr ; # ARABIC LETTER REH + SHADDA |
| زّ → zz ; # ARABIC LETTER ZAIN + SHADDA |
| سّ → ss ; # ARABIC LETTER SEEN + SHADDA |
| شّ → shsh ; # ARABIC LETTER SHEEN + SHADDA |
| صّ → ṣṣ ; # ARABIC LETTER SAD + SHADDA |
| ضّ → ḍḍ ; # ARABIC LETTER DAD + SHADDA |
| طّ → ṭṭ ; # ARABIC LETTER TAH + SHADDA |
| ظّ → ẓẓ ; # ARABIC LETTER ZAH + SHADDA |
| عّ → $ayin $ayin ; # ARABIC LETTER AIN + SHADDA |
| غّ → ghgh ; # ARABIC LETTER GHAIN + SHADDA |
| فّ → ff ; # ARABIC LETTER FEH + SHADDA |
| قّ → qq ; # ARABIC LETTER QAF + SHADDA |
| كّ → kk ; # ARABIC LETTER KAF + SHADDA |
| لّ → ll ; # ARABIC LETTER LAM + SHADDA |
| مّ → mm ; # ARABIC LETTER MEEM + SHADDA |
| نّ → nn ; # ARABIC LETTER NOON + SHADDA |
| هّ → hh ; # ARABIC LETTER HEH + SHADDA |
| وّ → ww ; # ARABIC LETTER WAW + SHADDA |
| ىّ → yy ; # ARABIC LETTER YEH + SHADDA |
| # |
| # |
| ######################################################################## |
| # |
| # End Rule 9 |
| # |
| ######################################################################## |
| |
| |
| # |
| ######################################################################## |
| # |
| # Start of Transformations |
| # |
| ######################################################################## |
| # |
| |
| $wordBoundary{ء → ; # ARABIC LETTER HAMZA |
| ء → $alef ; # ARABIC LETTER HAMZA |
| $wordBoundary{ا → ; # ARABIC LETTER ALEF |
| ٱ → $alef ; # ARABIC LETTER ALEF WASLA |
| $wordBoundary{آ → ā ; # ARABIC LETTER ALEF WITH MADDA ABOVE |
| آ → $alef ā ; # ARABIC LETTER ALEF WITH MADDA ABOVE |
| ب → b ; # ARABIC LETTER BEH |
| ت → t ; # ARABIC LETTER TEH |
| ة → h ; # ARABIC LETTER TEH MARBUTA |
| ث → th ; # ARABIC LETTER THEH |
| ج → j ; # ARABIC LETTER JEEM |
| ح → ḩ ; # ARABIC LETTER HAH |
| خ → kh ; # ARABIC LETTER KHAH |
| د → d ; # ARABIC LETTER DAL |
| ذ → dh ; # ARABIC LETTER THAL |
| ر → r ; # ARABIC LETTER REH |
| ز → z ; # ARABIC LETTER ZAIN |
| س → s ; # ARABIC LETTER SEEN |
| ش → sh ; # ARABIC LETTER SHEEN |
| ص → ş ; # ARABIC LETTER SAD |
| ض → ḑ ; # ARABIC LETTER DAD |
| ط → ţ ; # ARABIC LETTER TAH |
| ظ → z̧ ; # ARABIC LETTER ZAH |
| ع → $ayin ; # ARABIC LETTER AIN |
| غ → gh ; # ARABIC LETTER GHAIN |
| ف → f ; # ARABIC LETTER FEH |
| ق → q ; # ARABIC LETTER QAF |
| ک ↔ k $disambig ; # ARABIC LETTER KEHEH |
| ك ↔ k ; # ARABIC LETTER KAF |
| ل → l ; # ARABIC LETTER LAM |
| م → m ; # ARABIC LETTER MEEM |
| ن → n ; # ARABIC LETTER NOON |
| ه → h ; # ARABIC LETTER HEH |
| و → w ; # ARABIC LETTER WAW |
| ى → y ; # ARABIC LETTER YEH |
| |
| َا → ā ; # ARABIC FATHA + ALEF |
| َى → á ; # ARABIC FATHA + ALEF MAKSURA |
| َيْ → ay ; # ARABIC FATHA + YEH + SUKUN |
| َوْ → aw ; # ARABIC FATHA + WAW + SUKUN |
| َ → a ; # ARABIC FATHA |
| |
| ِي → ī ; # ARABIC KASRA + YEH |
| ِ → i ; # ARABIC KASRA |
| |
| ُو → ū ; # ARABIC DAMMA + WAW |
| ُ → u ; # ARABIC DAMMA |
| |
| ْ → ; # ARABIC SUKUN |
| ً → aⁿ ; # ARABIC FATHATAN |
| ٍ → iⁿ ; # ARABIC KASRATAN |
| ٌ → uⁿ ; # ARABIC DAMMATAN |
| ::NFC (NFD) ; |
| |
| # |
| # |
| ######################################################################## |
| |
| </tRule> |
| </transform> |
| </transforms> |
| </supplementalData> |