From 33fc836e814b19db715bc001a24e88b4754dda07 Mon Sep 17 00:00:00 2001 From: dchandler Date: Sun, 10 Jul 2005 05:01:03 +0000 Subject: [PATCH] EWTS->Unicode for // now produces \u0f0e as it should. --- source/org/thdl/tib/text/ttt/EWTSTest.java | 4 ++- source/org/thdl/tib/text/ttt/EWTSTraits.java | 1 + .../tib/text/ttt/EWTSTshegBarScanner.java | 28 +++++++++++-------- 3 files changed, 21 insertions(+), 12 deletions(-) diff --git a/source/org/thdl/tib/text/ttt/EWTSTest.java b/source/org/thdl/tib/text/ttt/EWTSTest.java index 2dc673d..ed55d1a 100644 --- a/source/org/thdl/tib/text/ttt/EWTSTest.java +++ b/source/org/thdl/tib/text/ttt/EWTSTest.java @@ -676,7 +676,9 @@ public class EWTSTest extends TestCase { ewts2uni_test(" ", "\u0F0B"); ewts2uni_test("*", "\u0F0C"); ewts2uni_test("/", "\u0F0D"); - if (RUN_FAILING_TESTS) ewts2uni_test("//", "\u0F0E"); + ewts2uni_test("//", "\u0F0E"); + ewts2uni_test("////", "\u0F0E\u0f0e"); + ewts2uni_test("/////", "\u0F0E\u0f0e\u0f0d"); ewts2uni_test(";", "\u0F0F"); ewts2uni_test("\\u0F10", "\u0F10"); ewts2uni_test("|", "\u0F11"); diff --git a/source/org/thdl/tib/text/ttt/EWTSTraits.java b/source/org/thdl/tib/text/ttt/EWTSTraits.java index 803b52c..d489076 100644 --- a/source/org/thdl/tib/text/ttt/EWTSTraits.java +++ b/source/org/thdl/tib/text/ttt/EWTSTraits.java @@ -397,6 +397,7 @@ public final class EWTSTraits implements TTraits { if ("R".equals(l)) return "\u0f6a"; if ("Y".equals(l)) return "\u0f61"; if ("W".equals(l)) return "\u0f5d"; + if ("//".equals(l)) return "\u0f0e"; if (!TibetanMachineWeb.isKnownHashKey(l)) { // System.err.println("Getting unicode for the following is hard: '" diff --git a/source/org/thdl/tib/text/ttt/EWTSTshegBarScanner.java b/source/org/thdl/tib/text/ttt/EWTSTshegBarScanner.java index 5447e89..f6201c7 100644 --- a/source/org/thdl/tib/text/ttt/EWTSTshegBarScanner.java +++ b/source/org/thdl/tib/text/ttt/EWTSTshegBarScanner.java @@ -79,7 +79,7 @@ class EWTSTshegBarScanner extends TTshegBarScanner { // TODO(DLC)[EWTS->Tibetan]:: '@#', in ewts->tmw, is not working // TODO(DLC)[EWTS->Tibetan]:: 'jamX 'jam~X one is not working in ->tmw mode // TODO(DLC)[EWTS->Tibetan]:: dzaHsogs is not working - for (int i = 0; i < sl; i++) { + for (int i = 0; i < sl; i++) { // i is modified in the loop, also if (isValidInsideTshegBar(sb.charAt(i))) { StringBuffer tbsb = new StringBuffer(); for (; i < sl; i++) { @@ -96,16 +96,22 @@ class EWTSTshegBarScanner extends TTshegBarScanner { // NOTE: It's questionable, but we treat // \u0f00 like punctuation because it was // easier coding that way. - if ((sb.charAt(i) >= EWTSTraits.PUA_MIN - && sb.charAt(i) <= EWTSTraits.PUA_MAX) - || (sb.charAt(i) >= '\u0f00' && sb.charAt(i) <= '\u0f17') - || (sb.charAt(i) >= '\u0f1a' && sb.charAt(i) <= '\u0f1f') - || (sb.charAt(i) >= '\u0fbe' && sb.charAt(i) <= '\u0fcc') - || (sb.charAt(i) >= '\u0fcf' && sb.charAt(i) <= '\u0fd1') - || (EWTSTraits.SAUVASTIKA == sb.charAt(i)) - || (EWTSTraits.SWASTIKA == sb.charAt(i)) - || (" /;|!:=_@#$%<>()*&\r\n\t\u0f36\u0f38\u0f89\u0f8a\u0f8b".indexOf(sb.charAt(i)) - >= 0)) { + if (i + 1 < sl + && sb.charAt(i) == '/' + && sb.charAt(i + 1) == '/') { + al.add(new TString("EWTS", "//", + TString.TIBETAN_PUNCTUATION)); + ++i; + } else if ((sb.charAt(i) >= EWTSTraits.PUA_MIN + && sb.charAt(i) <= EWTSTraits.PUA_MAX) + || (sb.charAt(i) >= '\u0f00' && sb.charAt(i) <= '\u0f17') + || (sb.charAt(i) >= '\u0f1a' && sb.charAt(i) <= '\u0f1f') + || (sb.charAt(i) >= '\u0fbe' && sb.charAt(i) <= '\u0fcc') + || (sb.charAt(i) >= '\u0fcf' && sb.charAt(i) <= '\u0fd1') + || (EWTSTraits.SAUVASTIKA == sb.charAt(i)) + || (EWTSTraits.SWASTIKA == sb.charAt(i)) + || (" /;|!:=_@#$%<>()*&\r\n\t\u0f36\u0f38\u0f89\u0f8a\u0f8b".indexOf(sb.charAt(i)) + >= 0)) { al.add(new TString("EWTS", sb.substring(i, i+1), TString.TIBETAN_PUNCTUATION)); } else {