From b74af71efcc3ad3f0100fe5a3b502a4cf0119a31 Mon Sep 17 00:00:00 2001 From: dchandler Date: Wed, 6 Jul 2005 22:26:55 +0000 Subject: [PATCH] Better, but still flawed, handling of EWTS [^] (i.e., U+0F39). --- source/org/thdl/tib/text/tibwn.ini | 3 +- source/org/thdl/tib/text/ttt/EWTSTest.java | 20 ++++++++++-- source/org/thdl/tib/text/ttt/EWTSTraits.java | 34 +++++++------------- 3 files changed, 31 insertions(+), 26 deletions(-) diff --git a/source/org/thdl/tib/text/tibwn.ini b/source/org/thdl/tib/text/tibwn.ini index beb52e3..2c06cab 100644 --- a/source/org/thdl/tib/text/tibwn.ini +++ b/source/org/thdl/tib/text/tibwn.ini @@ -265,7 +265,7 @@ R~233,5~~10,104~1,109~~1,123~1,125~~~0FBC // horizontal stroke goes upward on U+0F5F and downward on U+0F5B. \uF031~146,5~~10,42~~~~~~~none f~153,5~~10,58~1,110~1,118~1,124~1,126~10,114~10,123~0F55,0F39 -v~154,5~~10,59~1,110~1,118~1,124~1,126~10,114~10,123~0F56,f39 +v~154,5~~10,59~1,110~1,118~1,124~1,126~10,114~10,123~0F56,0F39 T~170,1~~1,64~1,109~1,120~1,123~1,125~10,115~10,124~0F4A Th~171,1~~1,65~1,109~1,118~1,123~1,125~10,114~10,123~0F4B D~172,1~~1,66~1,109~1,120~1,123~1,125~10,115~10,124~0F4C @@ -1201,4 +1201,3 @@ r~176,4~~8,71~~~~~~~0FB2 \tmw2059~57,2~~3,59~1,109~4,120~1,125~1,123~4,106~4,113~f88,f90 // utsama kha: \tmw2060~58,2~~3,60~1,109~4,120~1,125~1,123~4,106~4,113~f88,f91 - diff --git a/source/org/thdl/tib/text/ttt/EWTSTest.java b/source/org/thdl/tib/text/ttt/EWTSTest.java index 4f9b870..8781e93 100644 --- a/source/org/thdl/tib/text/ttt/EWTSTest.java +++ b/source/org/thdl/tib/text/ttt/EWTSTest.java @@ -148,11 +148,27 @@ public class EWTSTest extends TestCase { } } + public void test0F39() { + ewts2uni_test("v", "\u0F56\u0F39"); + ewts2uni_test("f", "\u0F55\u0F39"); + ewts2uni_test("a^", "\u0f68\u0f39"); + ewts2uni_test("hUM^", "\u0f67\u0f71\u0f74\u0f7e\u0f39"); + ewts2uni_test("ph^", "\u0f55\u0f39"); + ewts2uni_test("phe^", "\u0f55\u0f7a\u0f39"); // TODO(DLC)[EWTS->Tibetan]: does order of U+0F39 matter? + ewts2uni_test("ph^e", "\u0f55\u0f39\u0f68\u0f7a"); // TODO(DLC)[EWTS->Tibetan]: This is no good! We don't even warn, do we!? + ewts2uni_test("f+beM", "\u0f55\u0f39\u0fa6\u0f7a\u0f7e"); + ewts2uni_test("faM", "\u0f55\u0f39\u0f7e"); + ewts2uni_test("vaM", "\u0f56\u0f39\u0f7e"); + ewts2uni_test("k+fa", "\u0f40\u0fa5\u0f39"); + ewts2uni_test("f+va", "\u0f55\u0f39\u0fa6\u0f39"); + ewts2uni_test("ph+veM", "\u0f55\u0fa6\u0f39\u0f7a\u0f7e"); + if (RUN_FAILING_TESTS) ewts2uni_test("ph^+beM", "\u0f55\u0f39\u0fa6\u0f7a\u0f7e"); + } + /** Tests that the EWTS->unicode converter isn't completely braindead. */ public void testEwtsBasics() { ewts2uni_test("g.yogs", "\u0f42\u0f61\u0f7c\u0f42\u0f66"); - if (RUN_FAILING_TESTS) ewts2uni_test("hUM^", "TODO(DLC)[EWTS->Tibetan]: DLC NOW"); ewts2uni_test("brgyad", "\u0f56\u0f62\u0f92\u0fb1\u0f51"); ewts2uni_test("brjod", "\u0f56\u0f62\u0f97\u0f7c\u0f51"); ewts2uni_test("drwa", "\u0f51\u0fb2\u0fad"); @@ -690,7 +706,7 @@ public class EWTSTest extends TestCase { ewts2uni_test("\\u0F36", "\u0F36"); if (RUN_FAILING_TESTS) ewts2uni_test("X", "\u0F37"); // TODO(DLC)[EWTS->Tibetan]: error combiner ewts2uni_test("\\u0F38", "\u0F38"); - if (RUN_FAILING_TESTS) ewts2uni_test("^", "\u0F39"); // TODO(DLC)[EWTS->Tibetan]: error combiner + assert_EWTS_error("^"); // If you want \u0f68\u0f39, use [a^] ewts2uni_test("<", "\u0F3A"); ewts2uni_test(">", "\u0F3B"); ewts2uni_test("(", "\u0F3C"); diff --git a/source/org/thdl/tib/text/ttt/EWTSTraits.java b/source/org/thdl/tib/text/ttt/EWTSTraits.java index 274f1eb..d9ced7c 100644 --- a/source/org/thdl/tib/text/ttt/EWTSTraits.java +++ b/source/org/thdl/tib/text/ttt/EWTSTraits.java @@ -68,12 +68,11 @@ public final class EWTSTraits implements TTraits { * three. */ public int maxConsonantLength() { return 3; } - /** {-i~M`}, in a tie for the longest wowel, has 5 characters, so - * this is five. (No, 'l-i' and 'r-i' are not wowels (but '-i' - * is). (TODO(DLC)[EWTS->Tibetan]: this is crap! you can put arbitrary wowels - * together using plus signs or Unicode escapes) */ - public int maxWowelLength() { return 3; /* a~M` (TODO(DLC)[EWTS->Tibetan]:! why the 'a'?) */} - + /** Wowels can be arbitrarily long via stacking. But each + * component is no longer, in characters, than this. [~M`] is + * the current winner. */ + public int maxWowelLength() { return 3; } + public boolean isUnicodeConsonant(char ch) { return ((ch != '\u0f48' && ch >= '\u0f40' && ch <= '\u0f6a') || (ch != '\u0f98' && ch >= '\u0f90' && ch <= '\u0fbc') @@ -86,6 +85,7 @@ public final class EWTSTraits implements TTraits { public boolean isUnicodeWowel(char ch) { // TODO(DLC)[EWTS->Tibetan]: what about combiners that combine only with digits? TEST return ((ch >= '\u0f71' && ch <= '\u0f84') + || '\u0f39' == ch || isUnicodeWowelThatRequiresAChen(ch)); } @@ -269,21 +269,15 @@ public final class EWTSTraits implements TTraits { if ("-I".equals(wowel)) return "\u0f81"; if ("I".equals(wowel)) return "\u0f71\u0f72"; // \u0f73 is discouraged - // TODO(DLC)[EWTS->Tibetan]: fix me! - // DLC say ah if ("aM".equals(wowel)) return "\u0f7e"; + // TODO(DLC)[EWTS->Tibetan]: test, test, test. if ("M".equals(wowel)) return "\u0f7e"; - // DLC say ah if ("aH".equals(wowel)) return "\u0f7f"; if ("H".equals(wowel)) return "\u0f7f"; - // DLC say ah if ("a?".equals(wowel)) return "\u0f84"; if ("?".equals(wowel)) return "\u0f84"; - // DLC say ah if ("a~M".equals(wowel)) return "\u0f83"; if ("~M".equals(wowel)) return "\u0f83"; - // DLC say ah if ("a~M`".equals(wowel)) return "\u0f82"; if ("~M`".equals(wowel)) return "\u0f82"; - // DLC say ah if ("aX".equals(wowel)) return "\u0f37"; if ("X".equals(wowel)) return "\u0f37"; - // DLC say ah if ("a~X".equals(wowel)) return "\u0f35"; if ("~X".equals(wowel)) return "\u0f35"; + if ("^".equals(wowel)) return "\u0f39"; return null; } @@ -362,6 +356,9 @@ public final class EWTSTraits implements TTraits { if ("h".equals(l)) return "\u0FB7"; if ("a".equals(l)) return "\u0FB8"; if ("k+Sh".equals(l)) return "\u0FB9"; + + if ("f".equals(l)) return "\u0FA5\u0F39"; + if ("v".equals(l)) return "\u0FA6\u0F39"; return null; } else { if ("R".equals(l)) return "\u0f6a"; @@ -426,17 +423,10 @@ public final class EWTSTraits implements TTraits { public boolean isWowelThatRequiresAChen(String s) { // TODO(DLC)[EWTS->Tibetan]: fix me! return ((s.length() == 1 && (isUnicodeWowelThatRequiresAChen(s.charAt(0)) - || "?MHX".indexOf(s.charAt(0)) >= 0)) - // DLC say ah || "aM".equals(s) // DLC funny... (DLC NOW too funny! affects longest wowel length!) - // DLC say ah || "a?".equals(s) // DLC funny... - // DLC say ah || "aH".equals(s) // DLC funny... - // DLC say ah || "aX".equals(s) // DLC funny... + || "?MHX^".indexOf(s.charAt(0)) >= 0)) || "~X".equals(s) - // DLC say ah || "a~X".equals(s) // DLC funny... || "~M".equals(s) - // DLC say ah || "a~M".equals(s) // DLC funny... || "~M`".equals(s) - // DLC say ah || "a~M`".equals(s) // DLC funny... ); }