common/transforms/fa-fa_FONIPA.xml - platform/external/cldr - Git at Google

 <?xml version="1.0" encoding="UTF-8" ?>
 <!DOCTYPE supplementalData SYSTEM "../../common/dtd/ldmlSupplemental.dtd">
 <!--
 Copyright © 1991-2017 Unicode, Inc.
 CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/)
 For terms of use, see http://www.unicode.org/copyright.html
 -->
 <supplementalData>
 	<version number="$Revision$"/>
 	<transforms>
 		<transform source="fa" target="fa_FONIPA" direction="forward" alias="fa-fonipa-t-fa">
 			<tRule><![CDATA[
 [\u200c \u200d] → ;  # Strip off ZWJ and ZWNJ.
 ::NFD;

 # Rewrite similarly-looking Arabic letters to Persian.
 ي → ی;
 ى → ی;
 ك → ک;
 ە → ه;
 ::NULL;

 $VOWEL = [ َ ِ  ُ  ٓ ا و ی];
 $BOUNDARY = [^[:L:][:M:][:N:]];
 $IPA_CONSONANT = [ m n p b t d k ɡ ʔ f v s z ʃ ʒ ʁ ɢ h χ {t͡ʃ} {d͡ʒ} l ɾ ];

 # Vowels
 یّ → jj;
 وّ → vv;
 ([ َ ِ  ُ])ّ → ّ | $1;
 َیْ → æj;
 ِی → ej;

 یوْ → iːv;
 {یو} ه $BOUNDARY → iːv;
 {یو} هٔ $BOUNDARY → iːv;
 یو → juː;
 َوْ → av;

 # Hamza forms
 ء → ʔ;
 ا َ ٔ → ʔæ; # Needed because NFD reorders fatha before hamza
 ا ٔ → ʔ;
 و ٔ → ʔ;
 ی ْ ٔ → ʔ; # Needed because NFD reorders sukun before hamza
 ی ِ ٔ → ʔe; # Needed because NFD reorders kasra before hamza
 ی ٔ → ʔ;

 { َ ه} $BOUNDARY → æ;
 [^ːeoæ] {هٔ} $BOUNDARY → eje;
 [e] {هٔ} $BOUNDARY → je;
 [^ːeoæ] {ه} $BOUNDARY → e;
 [e] {ه} $BOUNDARY → ;
 اَ → æ;
 اً $BOUNDARY → æn;
 َ → æ;
 یه → je;
 یٰ → ɒː;
 $IPA_CONSONANT {وی} $VOWEL → uːj;
 # If yeh is preceded by a consonant and followed by a vowel,
 # it's pronounced /iːj/, but a sukun breaks that and makes it
 # be pronounced just as /j/.
 $IPA_CONSONANT {\u0652 یو} → juː;
 $IPA_CONSONANT {\u0652 ی} $VOWEL → j;
 $IPA_CONSONANT {ی} $VOWEL → iːj;
 {ی} $VOWEL → j;
 ی ْ → j;
 ی → iː;

 $BOUNDARY {ای} → iː;
 ا\u0653 → ɒː;
 آ → ɒː;
 اِ → e;
 $BOUNDARY {اُو} → o;
 اُ → o;
 $BOUNDARY {او} → uː;
 او → ɒːv;
 ا → ɒː; # Probably [^$BOUNDARY]
 ِ → e;
 ه ِ ّ → hhe; # Needed because NFD moves kasra before shadda
 هِ → he;

 خوا → χɒː;
 خوی → χiː;
 {و} $VOWEL → v;
 {و} ه $BOUNDARY → v;
 {و} هٔ $BOUNDARY → v;
 $IPA_CONSONANT {و} → uː;
 $IPA_CONSONANT \u0651 {و} → uː; # shadda after a consonant
 ُ{و} $IPA_CONSONANT → uː;

 $BOUNDARY {و} $BOUNDARY → va;
 { ُو} $VOWEL → ov;
 ُ و ٔ → oʔ;
  ُو → o;
 ُ → o;

 # Consonants
 پ → p;
 ب → b;
 [ت ط] → t;
 د → d;
 ک → k;
 گ → ɡ;
 ع → ʔ;
 چ → t͡ʃ;
 ج → d͡ʒ;
 ف → f;
 [س ص ث] → s;
 [ز ذ ض ظ] → z;
 ش → ʃ;
 ژ → ʒ;
 خ → χ;
 غ → ʁ;
 ق → ɢ;
 ح → h;
 م → m;
 ن → n;
 ه → h;
 ل → l;
 ر → ɾ;

 ْ → ;
 ::NULL;

 # TODO: How to handle these?
 ([$IPA_CONSONANT|$VOWEL]){ّ} → $1;

 [ ّ ٔ ً ٰ ] → ;

 ::NFC;

 			]]></tRule>
 		</transform>
 	</transforms>
 </supplementalData>
	<?xml version="1.0" encoding="UTF-8" ?>
	<!DOCTYPE supplementalData SYSTEM "../../common/dtd/ldmlSupplemental.dtd">
	<!--
	Copyright © 1991-2017 Unicode, Inc.
	CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/)
	For terms of use, see http://www.unicode.org/copyright.html
	-->
	<supplementalData>
	<version number="$Revision$"/>
	<transforms>
	<transform source="fa" target="fa_FONIPA" direction="forward" alias="fa-fonipa-t-fa">
	<tRule><![CDATA[
	[\u200c \u200d] → ; # Strip off ZWJ and ZWNJ.
	::NFD;

	# Rewrite similarly-looking Arabic letters to Persian.
	ي → ی;
	ى → ی;
	ك → ک;
	ە → ه;
	::NULL;

	$VOWEL = [ َ ِ ُ ٓ ا و ی];
	$BOUNDARY = [^[:L:][:M:][:N:]];
	$IPA_CONSONANT = [ m n p b t d k ɡ ʔ f v s z ʃ ʒ ʁ ɢ h χ {t͡ʃ} {d͡ʒ} l ɾ ];

	# Vowels
	یّ → jj;
	وّ → vv;
	([ َ ِ ُ])ّ → ّ \| $1;
	َیْ → æj;
	ِی → ej;

	یوْ → iːv;
	{یو} ه $BOUNDARY → iːv;
	{یو} هٔ $BOUNDARY → iːv;
	یو → juː;
	َوْ → av;

	# Hamza forms
	ء → ʔ;
	ا َ ٔ → ʔæ; # Needed because NFD reorders fatha before hamza
	ا ٔ → ʔ;
	و ٔ → ʔ;
	ی ْ ٔ → ʔ; # Needed because NFD reorders sukun before hamza
	ی ِ ٔ → ʔe; # Needed because NFD reorders kasra before hamza
	ی ٔ → ʔ;

	{ َ ه} $BOUNDARY → æ;
	[^ːeoæ] {هٔ} $BOUNDARY → eje;
	[e] {هٔ} $BOUNDARY → je;
	[^ːeoæ] {ه} $BOUNDARY → e;
	[e] {ه} $BOUNDARY → ;
	اَ → æ;
	اً $BOUNDARY → æn;
	َ → æ;
	یه → je;
	یٰ → ɒː;
	$IPA_CONSONANT {وی} $VOWEL → uːj;
	# If yeh is preceded by a consonant and followed by a vowel,
	# it's pronounced /iːj/, but a sukun breaks that and makes it
	# be pronounced just as /j/.
	$IPA_CONSONANT {\u0652 یو} → juː;
	$IPA_CONSONANT {\u0652 ی} $VOWEL → j;
	$IPA_CONSONANT {ی} $VOWEL → iːj;
	{ی} $VOWEL → j;
	ی ْ → j;
	ی → iː;

	$BOUNDARY {ای} → iː;
	ا\u0653 → ɒː;
	آ → ɒː;
	اِ → e;
	$BOUNDARY {اُو} → o;
	اُ → o;
	$BOUNDARY {او} → uː;
	او → ɒːv;
	ا → ɒː; # Probably [^$BOUNDARY]
	ِ → e;
	ه ِ ّ → hhe; # Needed because NFD moves kasra before shadda
	هِ → he;

	خوا → χɒː;
	خوی → χiː;
	{و} $VOWEL → v;
	{و} ه $BOUNDARY → v;
	{و} هٔ $BOUNDARY → v;
	$IPA_CONSONANT {و} → uː;
	$IPA_CONSONANT \u0651 {و} → uː; # shadda after a consonant
	ُ{و} $IPA_CONSONANT → uː;

	$BOUNDARY {و} $BOUNDARY → va;
	{ ُو} $VOWEL → ov;
	ُ و ٔ → oʔ;
	ُو → o;
	ُ → o;

	# Consonants
	پ → p;
	ب → b;
	[ت ط] → t;
	د → d;
	ک → k;
	گ → ɡ;
	ع → ʔ;
	چ → t͡ʃ;
	ج → d͡ʒ;
	ف → f;
	[س ص ث] → s;
	[ز ذ ض ظ] → z;
	ش → ʃ;
	ژ → ʒ;
	خ → χ;
	غ → ʁ;
	ق → ɢ;
	ح → h;
	م → m;
	ن → n;
	ه → h;
	ل → l;
	ر → ɾ;

	ْ → ;
	::NULL;

	# TODO: How to handle these?
	([$IPA_CONSONANT\|$VOWEL]){ّ} → $1;

	[ ّ ٔ ً ٰ ] → ;

	::NFC;

	]]></tRule>
	</transform>
	</transforms>
	</supplementalData>