From 581643cf59d4fbcdce9bdfc40d09b7e3b3751452 Mon Sep 17 00:00:00 2001 From: dchandler Date: Wed, 10 Dec 2003 06:55:16 +0000 Subject: [PATCH] {DAN,\nLHAG} used to be treated like {DAN, LHAG} but that got broken. Fixed. Added tests for lexer's handling of ACIP spaces etc. --- .../org/thdl/tib/text/ttt/ACIPTshegBarScanner.java | 11 +++++++---- source/org/thdl/tib/text/ttt/PackageTest.java | 12 ++++++++++++ 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/source/org/thdl/tib/text/ttt/ACIPTshegBarScanner.java b/source/org/thdl/tib/text/ttt/ACIPTshegBarScanner.java index 2c5050c..b1d40ca 100644 --- a/source/org/thdl/tib/text/ttt/ACIPTshegBarScanner.java +++ b/source/org/thdl/tib/text/ttt/ACIPTshegBarScanner.java @@ -111,16 +111,19 @@ public class ACIPTshegBarScanner { /** Helper. Here because ACIP {MTHAR%\nKHA} should be treated the same w.r.t. tsheg insertion regardless of the lex errors and lex warnings found. */ - private static boolean lastNonExceptionalThingWasNonPunctish(ArrayList al) { + private static boolean lastNonExceptionalThingWasAdornmentOr(ArrayList al, int kind) { int i = al.size() - 1; while (i >= 0 && (((TString)al.get(i)).getType() == TString.WARNING || ((TString)al.get(i)).getType() == TString.ERROR)) --i; return (i >= 0 && // FIXME: or maybe i < 0 || ... - (((TString)al.get(i)).getType() == TString.TIBETAN_NON_PUNCTUATION + (((TString)al.get(i)).getType() == kind || ((TString)al.get(i)).getType() == TString.TSHEG_BAR_ADORNMENT)); } + // DLC FIXME "H:\n\n" becomes "H: \n\n", wrongly I think. See + // Tibetan! 5.1 section on formatting Tibetan texts. + /** Returns a list of {@link TString TStrings} corresponding * to s, possibly the empty list (when the empty string is the * input). Each String is either a Latin comment, some Latin @@ -808,7 +811,7 @@ public class ACIPTshegBarScanner { if (('\r' == ch || ('\n' == ch && i > 0 && s.charAt(i - 1) != '\r')) && !al.isEmpty() - && lastNonExceptionalThingWasNonPunctish(al)) { + && lastNonExceptionalThingWasAdornmentOr(al, TString.TIBETAN_NON_PUNCTUATION)) { al.add(new TString(" ", TString.TIBETAN_PUNCTUATION)); } @@ -816,7 +819,7 @@ public class ACIPTshegBarScanner { if (('\r' == ch || ('\n' == ch && i > 0 && s.charAt(i - 1) != '\r')) && !al.isEmpty() - && lastNonExceptionalThingWasNonPunctish(al) + && lastNonExceptionalThingWasAdornmentOr(al, TString.TIBETAN_PUNCTUATION) && ((TString)al.get(al.size() - 1)).getText().equals(",") && s.charAt(i-1) == ',' && (i + (('\r' == ch) ? 2 : 1) < sl diff --git a/source/org/thdl/tib/text/ttt/PackageTest.java b/source/org/thdl/tib/text/ttt/PackageTest.java index fd841f4..0c97c6d 100644 --- a/source/org/thdl/tib/text/ttt/PackageTest.java +++ b/source/org/thdl/tib/text/ttt/PackageTest.java @@ -7526,6 +7526,18 @@ M+NA "\u0f04\u0f05\u0f04\u0f05\u0f05\u0f67\u0f74\u0f7e\u0f7f\u0f0b\u0f42\u0fa2\u0f7d\u0f0b\u0f42\u0fb2\u0f7c\u0f08\u0f11\u0f0c\u0f0d"); uhelp("*#HUm: K+DHA GRO`;.,", "\u0f04\u0f05\u0f04\u0f05\u0f05\u0f67\u0f74\u0f7e\u0f7f\u0f0b\u0f40\u0fa2\u0f0b\u0f42\u0fb2\u0f7c\u0f08\u0f11\u0f0c\u0f0d"); + uhelp("HA,\nHA\n\nHA", "\u0f67\u0f0d \u0f67\u0f0b\n\n\u0f67"); + uhelp("NGA,", "\u0f44\u0f0c\u0f0d"); + uhelp("NGA,\nHA\n\nHA", "\u0f44\u0f0c\u0f0d \u0f67\u0f0b\n\n\u0f67"); + uhelp("MDO,\n\nKA\n\nHA", "\u0f58\u0f51\u0f7c\u0f0d\n\n\u0f40\u0f0b\n\n\u0f67"); + uhelp("GA ,HA", "\u0f42 \u0f0d\u0f67"); + uhelp("GA ,HA", "\u0f42 \u0f0d\u0f67"); + uhelp("GU ,HA", "\u0f42\u0f74\u0f0b\u0f0d\u0f67"); + uhelp("MA ,HA", "\u0f58\u0f0b\u0f0d\u0f67"); + uhelp("GA HA", "\u0f42\u0f0b\u0f67"); + uhelp("GU, ,KHO", "\u0f42\u0f74\u0f0d \u0f0d\u0f41\u0f7c"); + uhelp("GU ,KHO", "\u0f42\u0f74\u0f0b \u0f0d\u0f41\u0f7c"); // FIXME: missing a shad after GU, warn about that. + uhelp("GA HA", "\u0f42\u0f0b \u0f67"); } public void testFixedFormSubjoinedConsonants() { // Usual subjoined RA: