TMW->Unicode conversions have changed; now using U+0F6A for the stacks

whose EWTS transliteration begins with "R+". ACIP->* conversions and test baselines were updated to deal with the "r+..."=>"R+..." change.
2004-04-10 16:03:25 +00:00 · 2004-04-10 16:03:25 +00:00 · 7eca276a62
commit 7eca276a62
parent aff34174ab
4 changed files with 100 additions and 20 deletions
--- a/source/org/thdl/tib/text/tibwn.ini
+++ b/source/org/thdl/tib/text/tibwn.ini
@ -610,16 +610,35 @@ r+b+p~48,4~~7,56~1,109~8,121~1,123~1,125~8,107~8,114~f62,fa6,fa4
 r+b+b~49,4~~7,57~1,109~8,121~1,123~1,125~8,107~8,114~f62,fa6,fa6
 r+b+h~50,4~~7,58~1,110~8,124~1,123~1,125~8,110~8,117~f62,fa6,fb7
 r+m+m~51,4~~7,59~1,110~8,121~1,123~1,125~8,107~8,114~f62,fa8,fa8
-R+Y~52,4~~7,60~1,110~8,120~1,123~1,125~8,106~8,113~f62,fbb
-R+W~196,4~~7,61~1,109~8,120~1,123~1,125~8,106~8,113~f62,fba
-R+sh~53,4~~7,62~1,109~8,120~1,123~1,125~8,106~8,113~f62,fb4
-R+sh+y~54,4~~7,63~1,109~8,122~1,123~1,125~8,108~8,115~f62,fb4,fb1
-R+Sh~55,4~~7,64~1,109~8,120~1,123~1,125~8,106~8,113~f62,fb5
-R+Sh+N~56,4~~7,65~1,109~8,123~1,123~1,125~8,109~8,116~f62,fb5,f9e
-R+Sh+N+y~57,4~~7,66~1,109~8,126~1,123~1,125~8,112~8,119~f62,fb5,f9e,fb1
-R+Sh+m~58,4~~7,67~1,109~8,124~1,123~1,125~8,110~8,117~f62,fb5,fa8
-R+Sh+y~59,4~~7,68~1,109~8,123~1,123~1,125~8,109~8,116~f62,fb5,fb1
-R+s~60,4~~7,69~1,109~8,120~1,123~1,125~8,106~8,113~f62,fb6
+
+// Should we use U+0F62 or U+0F6A for "R+..."?  That is the question.
+// The Unicode 4.0 standard says the following on page 254:
+//
+// 'Some instances of "ra" in the head position require that the
+// consonant be represented as a full-formed "ra" that never
+// changes. This is not standard usage for the Tibetan language
+// itself, but occurs in transliteration and transcription. Only in
+// these cases should the character U+0F6A ... be used instead of
+// U+0F62.... Note that the fixed-form "ra" should be used only in
+// combinations where "ra" would normally transform into a short form
+// but the user specifically wants to prevent that change.'
+//
+// Because "R+..." occurs only in non-standard stacks, i.e. stacks
+// used for transliteration and transcription, we use U+0F6A.
+//
+// Note that TPairList.java's unicodeExceptionsMap must be updated if
+// we change who uses U+0F6A.
+R+Y~52,4~~7,60~1,110~8,120~1,123~1,125~8,106~8,113~f6a,fbb
+// R+W is mentioned in ACIPRules.java:
+R+W~196,4~~7,61~1,109~8,120~1,123~1,125~8,106~8,113~f6a,fba
+R+sh~53,4~~7,62~1,109~8,120~1,123~1,125~8,106~8,113~f6a,fb4
+R+sh+y~54,4~~7,63~1,109~8,122~1,123~1,125~8,108~8,115~f6a,fb4,fb1
+R+Sh~55,4~~7,64~1,109~8,120~1,123~1,125~8,106~8,113~f6a,fb5
+R+Sh+N~56,4~~7,65~1,109~8,123~1,123~1,125~8,109~8,116~f6a,fb5,f9e
+R+Sh+N+y~57,4~~7,66~1,109~8,126~1,123~1,125~8,112~8,119~f6a,fb5,f9e,fb1
+R+Sh+m~58,4~~7,67~1,109~8,124~1,123~1,125~8,110~8,117~f6a,fb5,fa8
+R+Sh+y~59,4~~7,68~1,109~8,123~1,123~1,125~8,109~8,116~f6a,fb5,fb1
+R+s~60,4~~7,69~1,109~8,120~1,123~1,125~8,106~8,113~f6a,fb6
 r+h~61,4~~7,70~1,109~8,121~1,123~1,125~8,107~8,114~f62,fb7
 r+k+Sh~62,4~~7,71~1,109~8,121~1,123~1,125~8,107~8,114~f62,f90,fb5
 l+g+w~63,4~~7,72~1,109~8,122~1,123~1,125~8,108~8,115~f63,f92,fad
@ -632,6 +651,7 @@ l+h+w~197,4~~7,78~1,109~8,121~1,123~1,125~8,106~8,113~f63,fb7,fad
 w+y~69,4~~7,79~1,109~8,121~1,123~1,125~8,107~8,114~f5d,fb1
 w+r~70,4~~7,80~1,109~8,121~1,123~1,125~8,107~8,114~f5d,fb2
 w+n~195,4~~7,81~1,109~8,120~1,123~1,125~8,106~8,113~f5d,fa3
+// w+W is mentioned in ACIPRules.java:
 w+W~194,4~~7,82~1,109~8,120~1,123~1,125~8,106~8,113~f5d,fba
 sh+ts~71,4~~7,83~1,109~8,120~1,123~1,125~8,106~8,113~f64,fa9
 sh+ts+y~72,4~~7,84~1,109~8,122~1,123~1,125~8,108~8,115~f64,fa9,fb1
@ -990,6 +1010,7 @@ dz+h~227,5~~10,98~1,110~~1,125~1,126~~~0FAC
 zh~229,5~~10,100~1,109~~1,123~1,125~~~0FAE
 z~230,5~~10,101~1,109~~1,123~1,125~~~0FAF
 '~231,5~~10,102~1,109~~1,123~1,125~~~0FB0
+// Hey emacs: fontify this: '
 l~234,5~~10,105~1,109~~1,123~1,125~~~0FB3
 sh~235,5~~10,106~1,109~~1,123~1,125~~~0FB4
 Sh~236,5~~10,107~1,109~~1,123~1,125~~~0FB5