| <?xml version="1.0" encoding="UTF-8" ?> |
| <!DOCTYPE supplementalData SYSTEM "../../common/dtd/ldmlSupplemental.dtd"> |
| <!-- |
| Copyright © 1991-2013 Unicode, Inc. |
| CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/) |
| For terms of use, see http://www.unicode.org/copyright.html |
| --> |
| <supplementalData> |
| <version number="$Revision$"/> |
| <transforms> |
| <transform source="ThaiLogical" target="Latin" direction="both" visibility="internal"> |
| <tRule><![CDATA[ |
| # Thai-Latin |
| # This set of rules follows ISO 11940 |
| # see http://homepage.mac.com/sirbinks/pdf/Thai.r2.pdf |
| # except that that does not mention an implicit vowel, so we use ọ |
| # |
| # The transcription is fairly ugly, so we ought to also do the UNGEGN version |
| # see: http://www.eki.ee/wgrs/rom1_th.pdf |
| # and probably make that the main variant. |
| # |
| # Note: this is an internal file. The NFD/NFC is handled externally, in the index |
| # The insertion of spaces between words, the reversal of the vowels |
| # and the conversion of space to semicolon are done *outside* of these rules. |
| # So as far as these rules are concerned, the vowels are in logical order! |
| # insert implicit vowel (and remove it going the other way) |
| # COMMENTED out: the implicit vowel positions cannot be predicted algorithmically |
| #$consonant = [ก-ฮ]; |
| #$vowel = [ะ-ฺเ-ไ็]; |
| #{ ( $consonant ) } [^$vowel \uE000] → | $1 \uE000 ; |
| #\uE000 → ọ ; |
| # ← ọ ; |
| $notAbove = [^\p{ccc=0}\p{ccc=above}] ; |
| $notBelow = [^\p{ccc=0}\p{ccc=below}] ; |
| # Consonants |
| # Warning: the 'h's need to be handled carefully! |
| # What we really want to say is the following, but we can't |
| # $notHAccent = !($notAbove* ̄ | $notBelow* ̣) ; |
| # Since the only accents we care about that could cause problems are free-standing accents below, we use instead: |
| $freeStandingBelow = [̥ ]; |
| $hAccent = [ ̄ ̣]; |
| $notHAccent0 = [^$freeStandingBelow$hAccent]; |
| $notHAccent1 = $freeStandingBelow [^$hAccent]; |
| ห → h̄ ; # THAI CHARACTER HO HIP |
| ห | $1 ← h ($notAbove*) ̄; # backward case, account for reordering |
| ฮ ↔ ḥ ; # THAI CHARACTER HO NOKHUK |
| ข ↔ k̄h ; # THAI CHARACTER KHO KHAI |
| ฃ ↔ ḳ̄h ; # THAI CHARACTER KHO KHUAT |
| ฅ ↔ kʹh ; # THAI CHARACTER KHO KHON |
| ฆ ↔ ḳh ; # THAI CHARACTER KHO RAKHANG |
| ค ← kh } $notHAccent1 ; # THAI CHARACTER KHO KHWAI |
| ค ↔ kh } $notHAccent0 ; # THAI CHARACTER KHO KHWAI |
| ก ↔ k ; # THAI CHARACTER KO KAI |
| ภ ↔ p̣h ; # THAI CHARACTER PHO SAMPHAO |
| ผ ↔ p̄h ; # THAI CHARACTER PHO PHUNG |
| พ ← ph } $notHAccent1 ; # THAI CHARACTER PHO PHAN |
| พ ↔ ph } $notHAccent0 ; # THAI CHARACTER PHO PHAN |
| ป ↔ p ; # THAI CHARACTER PO PLA |
| ฉ ↔ c̄h ; # THAI CHARACTER CHO CHING |
| ฌ ↔ c̣h ; # THAI CHARACTER CHO CHOE |
| ช ← ch } $notHAccent1 ; # THAI CHARACTER CHO CHANG |
| ช ↔ ch } $notHAccent0 ; # THAI CHARACTER CHO CHANG |
| จ ↔ c ; # THAI CHARACTER CHO CHAN |
| ฐ ↔ ṭ̄h ; # THAI CHARACTER THO THAN |
| ฑ ↔ ṯh ; # THAI CHARACTER THO NANGMONTHO |
| ฒ ↔ tʹh ; # THAI CHARACTER THO PHUTHAO |
| ถ ↔ t̄h ; # THAI CHARACTER THO THUNG |
| ธ ↔ ṭh ; # THAI CHARACTER THO THONG |
| ท ← th } $notHAccent1 ; # THAI CHARACTER THO THAHAN |
| ท ↔ th } $notHAccent0 ; # THAI CHARACTER THO THAHAN |
| #Note: TO PATAK deviates from ISO since t-dotunder + h would be ambigous. So it uses vertical tick. |
| ฏ ↔ t̩ ; # THAI CHARACTER TO PATAK |
| ต ↔ t ; # THAI CHARACTER TO TAO |
| # since there is no singleton g (generated), don't worry about that. |
| ง ↔ ng ; # THAI CHARACTER NGO NGU |
| ณ ↔ ṇ ; # THAI CHARACTER NO NEN |
| น ↔ n ; # THAI CHARACTER NO NU |
| ญ ↔ ỵ ; # THAI CHARACTER YO YING |
| ฎ ↔ ḍ ; # THAI CHARACTER DO CHADA |
| ด ↔ d ; # THAI CHARACTER DO DEK |
| บ ↔ b ; # THAI CHARACTER BO BAIMAI |
| ฝ ↔ f̄ ; # THAI CHARACTER FO FA |
| ฝ | $1 ← f ($notAbove*) ̄; # backward case, account for reordering |
| ม ↔ m ; # THAI CHARACTER MO MA |
| ย ↔ y ; # THAI CHARACTER YO YAK |
| ร ↔ r ; # THAI CHARACTER RO RUA |
| ฤ ↔ v ; # THAI CHARACTER RU |
| ฦ ↔ ł ; # THAI CHARACTER LU |
| ว ↔ w ; # THAI CHARACTER WO WAEN |
| ศ ↔ ṣ̄ ; # THAI CHARACTER SO SALA*** |
| ศ | $1 ← s ̣ ($notAbove*) ̄; # backward case, account for reordering |
| ษ ↔ s̄ʹ ; # THAI CHARACTER SO RUSI |
| ส → s̄ ; # THAI CHARACTER SO SUA*** |
| ส | $1 ← s ($notAbove*) ̄; # backward case, account for reordering |
| ฬ ↔ ḷ ; # THAI CHARACTER LO CHULA |
| ล ↔ l ; # THAI CHARACTER LO LING |
| ฟ ↔ f ; # THAI CHARACTER FO FAN |
| อ ↔ x ; # THAI CHARACTER O ANG |
| ซ ↔ s ; # THAI CHARACTER SO SO |
| # vowels |
| ั ↔ ạ ; # THAI CHARACTER MAI HAN-AKAT |
| า → ā ; # THAI CHARACTER SARA AA |
| า | $1 ← a ($notAbove*) ̄; # backward case, account for reordering |
| # We deviate from ISO for SARA AM for disambiguation |
| ำ → a ̉; # THAI CHARACTER SARA AM |
| ำ | $1 ← a ($notAbove*) ̉ ; # backward case, account for reordering |
| ะ ↔ a ; # THAI CHARACTER SARA A |
| ี ↔ ī ; # THAI CHARACTER SARA II |
| ี | $1 ← i ($notAbove*) ̄ ; # backward case, account for reordering |
| ื ↔ ụ̄ ; # THAI CHARACTER SARA UEE |
| ื | $1 ← u ̣ ($notAbove*) ̄ ; # backward case, account for reordering |
| ึ ↔ ụ ; # THAI CHARACTER SARA UE |
| ู ↔ ū ; # THAI CHARACTER SARA UU |
| ู | $1 ← u ($notAbove*) ̄ ; # backward case, account for reordering |
| ุ ↔ u ; # THAI CHARACTER SARA U |
| ฯ ↔ ‡ ; # THAI CHARACTER PAIYANNOI |
| # ฿ ↔ XXX ; # THAI CURRENCY SYMBOL BAHT |
| เ ↔ e ; # THAI CHARACTER SARA E |
| แ ↔ æ ; # THAI CHARACTER SARA AE |
| โ ↔ o ; # THAI CHARACTER SARA O |
| ใ ↔ ı ; # THAI CHARACTER SARA AI MAIMUAN |
| ไ ↔ ị ; # THAI CHARACTER SARA AI MAIMALAI |
| ๅ ↔ ɨ ; # THAI CHARACTER LAKKHANGYAO |
| ็ ↔ ̆ ; # THAI CHARACTER MAITAIKHU |
| ่ ↔ ̀ ; # THAI CHARACTER MAI EK |
| ้ ↔ ̂ ; # THAI CHARACTER MAI THO |
| ๊ ↔ ́ ; # THAI CHARACTER MAI TRI |
| ๋ ↔ ̌ ; # THAI CHARACTER MAI CHATTAWA |
| ์ ↔ ̒ ; # THAI CHARACTER THANTHAKHAT |
| ๎ ↔ '~' ; # THAI CHARACTER YAMAKKAN |
| # We deviate from ISO for disambiguation |
| ํ ↔ ̊ ; # THAI CHARACTER NIKHAHIT |
| ๏ ↔ '§' ; # THAI CHARACTER FONGMAN |
| ๐ ↔ 0 ; # THAI DIGIT ZERO |
| ๑ ↔ 1 ; # THAI DIGIT ONE |
| ๒ ↔ 2 ; # THAI DIGIT TWO |
| ๓ ↔ 3 ; # THAI DIGIT THREE |
| ๔ ↔ 4 ; # THAI DIGIT FOUR |
| ๕ ↔ 5 ; # THAI DIGIT FIVE |
| ๖ ↔ 6 ; # THAI DIGIT SIX |
| ๗ ↔ 7 ; # THAI DIGIT SEVEN |
| ๘ ↔ 8 ; # THAI DIGIT EIGHT |
| ๙ ↔ 9 ; # THAI DIGIT NINE |
| ๚ ↔ '||' ; # THAI CHARACTER ANGKHANKHU |
| ๛ ↔ » ; # THAI CHARACTER KHOMUT |
| ๆ ↔ « ; # THAI CHARACTER MAIYAMOK |
| # moved down to make shorter first |
| #Note: PHINTHU deviates from ISO since underring causes canonical problems. So it uses spacing tick below. |
| ฺ ↔ ˌ ; # THAI CHARACTER PHINTHU |
| ิ ↔ i ; # THAI CHARACTER SARA I |
| # fallbacks |
| | k ← g ; |
| | k ← h ; |
| | c ← j ; |
| | k ← q ; |
| | s ← z ; |
| :: (lower); |
| ]]></tRule> |
| </transform> |
| </transforms> |
| </supplementalData> |