diff --git a/source/org/thdl/tib/text/ttt/ACIPString.java b/source/org/thdl/tib/text/ttt/ACIPString.java index d47d1d0..f05c0b5 100644 --- a/source/org/thdl/tib/text/ttt/ACIPString.java +++ b/source/org/thdl/tib/text/ttt/ACIPString.java @@ -34,11 +34,14 @@ public class ACIPString { public static final int COMMENT = 0; /** For Folio markers like @012B */ public static final int FOLIO_MARKER = 1; + /** For Latin letters and numbers etc. [*LINE BREAK?] uses this, + * for example. */ + public static final int LATIN = 2; /** For Tibetan letters and numbers etc. */ - public static final int TIBETAN_NON_PUNCTUATION = 2; + public static final int TIBETAN_NON_PUNCTUATION = 3; /** For tshegs, whitespace and the like, but not combining * punctutation like %, o, :, m, and x */ - public static final int TIBETAN_PUNCTUATION = 3; + public static final int TIBETAN_PUNCTUATION = 4; /** For the start of a [*probable correction] or [*possible correction?] */ public static final int CORRECTION_START = 5; /** Denotes the end of a [*probable correction] */ @@ -65,7 +68,7 @@ public class ACIPString { public static final int END_PAREN = 16; /** For things that are not legal syntax, such as a file that * contains just "[# HALF A COMMEN" */ - public static final int ERROR = 17; /* DLC let the user know. */ + public static final int ERROR = 17; /** Returns true if and only if this string is Latin (usually * English). Returns false if this string is transliteration of @@ -105,6 +108,7 @@ public class ACIPString { String typeString = "HUH?????"; if (type == COMMENT) typeString = "COMMENT"; if (type == FOLIO_MARKER) typeString = "FOLIO_MARKER"; + if (type == LATIN) typeString = "LATIN"; if (type == TIBETAN_NON_PUNCTUATION) typeString = "TIBETAN_NON_PUNCTUATION"; if (type == TIBETAN_PUNCTUATION) typeString = "TIBETAN_PUNCTUATION"; if (type == CORRECTION_START) typeString = "CORRECTION_START"; @@ -120,6 +124,6 @@ public class ACIPString { if (type == START_PAREN) typeString = "START_PAREN"; if (type == END_PAREN) typeString = "END_PAREN"; if (type == ERROR) typeString = "ERROR"; - return typeString + ":\"" + getText() + "\""; + return typeString + ":{" + getText() + "}"; } } diff --git a/source/org/thdl/tib/text/ttt/ACIPTshegBarScanner.java b/source/org/thdl/tib/text/ttt/ACIPTshegBarScanner.java index 30362bc..10690c6 100644 --- a/source/org/thdl/tib/text/ttt/ACIPTshegBarScanner.java +++ b/source/org/thdl/tib/text/ttt/ACIPTshegBarScanner.java @@ -33,14 +33,20 @@ import org.thdl.util.ThdlDebug; * @author David Chandler */ public class ACIPTshegBarScanner { - // DLC DOC + /** Useful for testing. Gives error messages on standard output + * about why we can't scan the document perfectly and exits with + * non-zero return code, or says "Good scan!" otherwise and exits + * with code zero.

FIXME: not so efficient; copies the whole + * file into memory first. */ public static void main(String[] args) throws IOException { - if (args.length != 1) { - System.out.println("Bad args! Need just the ACIP file's path."); + boolean strict = true; + if (args.length != 2 + || (!(strict = "--strict".equals(args[0])) && !"--lenient".equals(args[0]))) { + System.out.println("Bad args! Need '--strict filename' or '--lenient filename'."); System.exit(1); } StringBuffer errors = new StringBuffer(); - ArrayList al = scanFile(args[0], errors); + ArrayList al = scanFile(args[1], errors, strict); if (errors.length() > 0) { System.out.println("Errors scanning ACIP input file: "); @@ -52,20 +58,26 @@ public class ACIPTshegBarScanner { System.out.println("Good scan!"); System.exit(0); } - - // DLC DOC - // DLC FIXME: not so efficient; copies the whole file into memory first - public static ArrayList scanFile(String fname, StringBuffer errors) throws IOException { + + /** Scans an ACIP file with path fname into tsheg bars. If errors + * is non-null, error messages will be appended to it. If strict + * is true, then you're more likely to see error + * messages. Returns a list of ACIPStrings that is the + * scan.

FIXME: not so efficient; copies the whole file into + * memory first. + * @throws IOException if we cannot read in the ACIP input file */ + public static ArrayList scanFile(String fname, StringBuffer errors, boolean strict) throws IOException { StringBuffer s = new StringBuffer(); char ch[] = new char[8192]; BufferedReader in - = new BufferedReader(new InputStreamReader(new FileInputStream(fname))); // DLC FIXME: specify encoding. + = new BufferedReader(new InputStreamReader(new FileInputStream(fname), + "US-ASCII")); int amt; while (-1 != (amt = in.read(ch))) { s.append(ch, 0, amt); } - return scan(s.toString(), errors); + return scan(s.toString(), errors, !strict); } /** Returns a list of {@link ACIPString ACIPStrings} corresponding @@ -81,14 +93,18 @@ public class ACIPTshegBarScanner { * errors, each followed by a '\n'. There is at least one case * where no ERROR ACIPString will appear but errors will be * modified. + * @param lenientPeriods if and only if this is true, periods + * will never cause errors, even if iffy text like "PAS... LA " + * appears. */ - public static ArrayList scan(String s, StringBuffer errors) { + public static ArrayList scan(String s, StringBuffer errors, boolean lenientPeriods) { // the size depends on whether it's mostly Tibetan or mostly // Latin and a number of other factors. This is meant to be // an underestimate, but not too much of an underestimate. ArrayList al = new ArrayList(s.length() / 10); + boolean waitingForMatchingIllegalClose = false; int sl = s.length(); int currentType = ACIPString.ERROR; int startOfString = 0; @@ -101,11 +117,11 @@ public class ACIPTshegBarScanner { ch = s.charAt(i); if (ACIPString.COMMENT == currentType && ch != ']') { if ('[' == ch) { - al.add(new ACIPString("Found an open square bracket, [, within a [#COMMENT]-style comment. Square brackets may not appear in comments.\n", + al.add(new ACIPString("Found an open bracket within a [#COMMENT]-style comment. Brackets may not appear in comments.\n", ACIPString.ERROR)); if (null != errors) errors.append("Offset " + i + ": " - + "Found an open square bracket, [, within a [#COMMENT]-style comment. Square brackets may not appear in comments.\n"); + + "Found an open bracket within a [#COMMENT]-style comment. Brackets may not appear in comments.\n"); } continue; } @@ -119,24 +135,42 @@ public class ACIPTshegBarScanner { currentType)); } al.add(new ACIPString(s.substring(i, i+1), ACIPString.ERROR)); + if (!waitingForMatchingIllegalClose) { + if (null != errors) { + errors.append("Offset " + i + ": " + + "Found a truly unmatched close bracket, [ or {.\n"); + } + } + waitingForMatchingIllegalClose = false; if (null != errors) errors.append("Offset " + i + ": " - + "Found a closing square bracket, ], without a matching open square bracket, [. Perhaps a [#COMMENT] incorrectly written as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], caused this.\n"); + + "Found a closing bracket without a matching open bracket. Perhaps a [#COMMENT] incorrectly written as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], caused this.\n"); startOfString = i+1; currentType = ACIPString.ERROR; } else { int stackTop = ((Integer)bracketTypeStack.pop()).intValue(); - String text = s.substring(startOfString, i+1); + int end = startOfString; if (ACIPString.CORRECTION_START == stackTop) { + + // This definitely indicates a new token. char prevCh = s.charAt(i-1); + if (prevCh == '?') + end = i - 1; + else + end = i; + if (startOfString < end) { + al.add(new ACIPString(s.substring(startOfString, end), + currentType)); + } + if ('?' != prevCh) { currentType = ACIPString.PROBABLE_CORRECTION; } else { currentType = ACIPString.POSSIBLE_CORRECTION; } } - al.add(new ACIPString(text, currentType)); + al.add(new ACIPString(s.substring(end, i+1), currentType)); startOfString = i+1; currentType = ACIPString.ERROR; } @@ -208,8 +242,10 @@ public class ACIPTshegBarScanner { } else { // We see comments appear not as [#COMMENT], but // as [COMMENT] sometimes. We make special cases - // for some English comments. DLC FIXME: put - // these in a config file. + // for some English comments. There's no need to + // make this mechanism extensible, because you + // can easily edit the ACIP text so that it uses + // [#COMMENT] notation instead of [COMMENT]. String[] englishComments = new String[] { "FIRST", "SECOND", // S5274I.ACT @@ -227,6 +263,7 @@ public class ACIPTshegBarScanner { "THE FOLLOWING TEXT HAS INCOMPLETE SECTIONS, WHICH ARE ON ORDER", // SE6260A.INC "@DATA INCOMPLETE HERE", // SE6260A.INC "@DATA MISSING HERE", // SE6260A.INC + "LINE APPARENTLY MISSING THIS PAGE", // TD4035I.INC "DATA INCOMPLETE HERE", // TD4226I2.INC "DATA MISSING HERE", // just being consistent "FOLLOWING SECTION WAS NOT AVAILABLE WHEN THIS EDITION WAS\nPRINTED, AND IS SUPPLIED FROM ANOTHER, PROBABLY THE ORIGINAL:", // S0018N.ACT @@ -255,6 +292,74 @@ public class ACIPTshegBarScanner { break; } } + if (!foundOne && i+1 < sl && s.charAt(i+1) == '*') { + // Identify [*LINE BREAK?] as an English + // correction. Every correction not on this + // list is considered to be Tibetan. DLC + // FIXME: make this extensible via a config + // file or at least a System property (which + // could be a comma-separated list of these + // creatures. + + // If "LINE" is in the list below, then [* + // LINE], [* LINE?], [*LINE], [*LINE?], [* + // LINE OUT ?], etc. will be considered + // English corrections. I.e., whitespace + // before and anything after doesn't prevent a + // match. + String[] englishCorrections = new String[] { + "LINE", // KD0001I1.ACT + "DATA", // KL0009I2.INC + "BLANK", // KL0009I2.INC + "NOTE", // R0001F.ACM + "alternate", // R0018F.ACE + "02101-02150 missing", // R1003A3.INC + "51501-51550 missing", // R1003A52.ACT + "BRTAGS ETC", // S0002N.ACT + "TSAN, ETC", // S0015N.ACT + "SNYOMS, THROUGHOUT", // S0016N.ACT + "KYIS ETC", // S0019N.ACT + "MISSING", // S0455M.ACT + "this", // S6850I1B.ALT + "THIS", // S0057M.ACT + }; + int begin; + for (begin = i+2; begin < sl; begin++) { + if (!isWhitespace(s.charAt(begin))) + break; + } + int end; + for (end = i+2; end < sl; end++) { + if (s.charAt(end) == ']') + break; + } + int realEnd = end; + if (end < sl && s.charAt(end-1) == '?') + --realEnd; + if (end < sl && begin < realEnd) { + String interestingSubstring + = s.substring(begin, realEnd); + for (int ec = 0; ec < englishCorrections.length; ec++) { + if (interestingSubstring.startsWith(englishCorrections[ec])) { + al.add(new ACIPString(s.substring(i, i+2), + ACIPString.CORRECTION_START)); + al.add(new ACIPString(s.substring(i+2, realEnd), + ACIPString.LATIN)); + if (s.charAt(end - 1) == '?') { + al.add(new ACIPString(s.substring(end-1, end+1), + ACIPString.POSSIBLE_CORRECTION)); + } else { + al.add(new ACIPString(s.substring(end, end+1), + ACIPString.PROBABLE_CORRECTION)); + } + foundOne = true; + startOfString = end+1; + i = startOfString - 1; + break; + } + } + } + } if (foundOne) break; } @@ -269,6 +374,11 @@ public class ACIPTshegBarScanner { if ('*' == nextCh) { currentType = ACIPString.CORRECTION_START; bracketTypeStack.push(new Integer(currentType)); + al.add(new ACIPString(s.substring(i, i+2), + ACIPString.CORRECTION_START)); + currentType = ACIPString.ERROR; + startOfString = i+2; + i = startOfString - 1; break; } else if ('#' == nextCh) { currentType = ACIPString.COMMENT; @@ -276,18 +386,31 @@ public class ACIPTshegBarScanner { break; } } - // This is an error. DLC FIXME: in practice - // [COMMENTS APPEAR WITHOUT # MARKS]. Though - // "... [" could cause this too. + // This is an error. Sometimes [COMMENTS APPEAR + // WITHOUT # MARKS]. Though "... [" could cause + // this too. al.add(new ACIPString(s.substring(i, i+1), ACIPString.ERROR)); + if (waitingForMatchingIllegalClose) { + if (null != errors) { + errors.append("Offset " + i + ": " + + "Found a truly unmatched open bracket, [ or {, prior to this current illegal open bracket.\n"); + } + } + waitingForMatchingIllegalClose = true; if (null != errors) { String inContext = s.substring(i, i+Math.min(sl-i, 10)); - if (sl-i > 10) { - inContext = inContext + "..."; + if (inContext.indexOf("\r") >= 0) { + inContext = inContext.substring(0, inContext.indexOf("\r")); + } else if (inContext.indexOf("\n") >= 0) { + inContext = inContext.substring(0, inContext.indexOf("\n")); + } else { + if (sl-i > 10) { + inContext = inContext + "..."; + } } errors.append("Offset " + i + ": " - + "Found an illegal open square bracket, [ (in context, this is " + inContext + "). Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open square bracket?\n"); + + "Found an illegal open bracket (in context, this is " + inContext + "). Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open bracket?\n"); } startOfString = i + 1; currentType = ACIPString.ERROR; @@ -303,10 +426,15 @@ public class ACIPTshegBarScanner { currentType = ACIPString.ERROR; } - // We look for @N[AB], @NN[AB], @NNN[AB], @NNNN[AB], - // @NNNNN[AB], and @NNNNNN[AB] only, that is from one - // to six digits. - for (int numdigits = 1; numdigits <= 5; numdigits++) { + // We look for {@N{AB}, @NN{AB}, ..., @NNNNNN{AB}}, + // {@[N{AB}], @[NN{AB}], ..., @[NNNNNN{AB}]}, + // {@N{AB}.N, @NN{AB}.N, ..., @NNNNNN{AB}.N}, {@N, + // @NN, ..., @NNNNNN}, and {@{AB}N, @{AB}NN, + // ... @{AB}NNNNNN} only, that is from one to six + // digits. Each of these folio marker format occurs + // in practice. + for (int numdigits = 6; numdigits >= 1; numdigits--) { + // @NNN{AB} and @NNN{AB}.N cases: if (i+numdigits+1 < sl && (s.charAt(i+numdigits+1) == 'A' || s.charAt(i+numdigits+1) == 'B')) { boolean allAreNumeric = true; @@ -316,6 +444,73 @@ public class ACIPTshegBarScanner { break; } } + if (allAreNumeric) { + // Is this "@012B " or "@012B.3 "? + int extra; + if (i+numdigits+2 < sl && s.charAt(i+numdigits+2) == '.') { + if (!(i+numdigits+4 < sl && isNumeric(s.charAt(i+numdigits+3)) + && !isNumeric(s.charAt(i+numdigits+4)))) { + al.add(new ACIPString(s.substring(i, i+numdigits+3), ACIPString.ERROR)); + String inContext = s.substring(i, i+Math.min(sl-i, 10)); + if (inContext.indexOf("\r") >= 0) { + inContext = inContext.substring(0, inContext.indexOf("\r")); + } else if (inContext.indexOf("\n") >= 0) { + inContext = inContext.substring(0, inContext.indexOf("\n")); + } else { + if (sl-i > 10) { + inContext = inContext + "..."; + } + } + if (null != errors) + errors.append("Offset " + i + ": " + + "Found an illegal at sign, @ (in context, this is " + inContext + "). This folio marker has a period, '.', at the end of it, which is illegal.\n"); + startOfString = i+numdigits+3; + i = startOfString - 1; + currentType = ACIPString.ERROR; + break; + } + if (i+numdigits+4 < sl && (s.charAt(i+numdigits+4) == '.' || s.charAt(i+numdigits+4) == 'A' || s.charAt(i+numdigits+4) == 'B' || s.charAt(i+numdigits+4) == 'a' || s.charAt(i+numdigits+4) == 'b' || isNumeric(s.charAt(i+numdigits+4)))) { + al.add(new ACIPString(s.substring(i, i+1), ACIPString.ERROR)); + String inContext = s.substring(i, i+Math.min(sl-i, 10)); + if (inContext.indexOf("\r") >= 0) { + inContext = inContext.substring(0, inContext.indexOf("\r")); + } else if (inContext.indexOf("\n") >= 0) { + inContext = inContext.substring(0, inContext.indexOf("\n")); + } else { + if (sl-i > 10) { + inContext = inContext + "..."; + } + } + if (null != errors) + errors.append("Offset " + i + ": " + + "Found an illegal at sign, @ (in context, this is " + inContext + "). This folio marker is not followed by whitespace, as is expected.\n"); + startOfString = i+1; // DLC FIXME: skip over more? + currentType = ACIPString.ERROR; + break; + } + extra = 4; + } else { + extra = 2; + } + al.add(new ACIPString(s.substring(i, i+numdigits+extra), + ACIPString.FOLIO_MARKER)); + startOfString = i+numdigits+extra; + i = startOfString - 1; + currentType = ACIPString.ERROR; + break; + } + } + + // @{AB}NNN case: + if (i+numdigits+1 < sl + && (s.charAt(i+1) == 'A' || s.charAt(i+1) == 'B')) { + boolean allAreNumeric = true; + for (int k = 1; k <= numdigits; k++) { + if (!isNumeric(s.charAt(i+1+k))) { + allAreNumeric = false; + break; + } + } if (allAreNumeric) { al.add(new ACIPString(s.substring(i, i+numdigits+2), ACIPString.FOLIO_MARKER)); @@ -325,8 +520,8 @@ public class ACIPTshegBarScanner { break; } } - // System.out.println("DLC NOW HERE xxx y:" + (i+numdigits+3 < sl) + " z:" + s.charAt(i+1) + s.charAt(i+numdigits+2) + s.charAt(i+numdigits+3)); + // @[NNN{AB}] case: if (i+numdigits+3 < sl && s.charAt(i+1) == '[' && s.charAt(i+numdigits+3) == ']' && (s.charAt(i+numdigits+2) == 'A' || s.charAt(i+numdigits+2) == 'B')) { @@ -346,12 +541,41 @@ public class ACIPTshegBarScanner { break; } } + + // This case, @NNN, must come after the @NNN{AB} case. + if (i+numdigits+1 < sl && s.charAt(i+numdigits+1) == ' ') { + boolean allAreNumeric = true; + for (int k = 1; k <= numdigits; k++) { + if (!isNumeric(s.charAt(i+k))) { + allAreNumeric = false; + break; + } + } + if (allAreNumeric) { + al.add(new ACIPString(s.substring(i, i+numdigits+1), + ACIPString.FOLIO_MARKER)); + startOfString = i+numdigits+1; + i = startOfString - 1; + currentType = ACIPString.ERROR; + break; + } + } } if (startOfString == i) { al.add(new ACIPString(s.substring(i, i+1), ACIPString.ERROR)); + String inContext = s.substring(i, i+Math.min(sl-i, 10)); + if (inContext.indexOf("\r") >= 0) { + inContext = inContext.substring(0, inContext.indexOf("\r")); + } else if (inContext.indexOf("\n") >= 0) { + inContext = inContext.substring(0, inContext.indexOf("\n")); + } else { + if (sl-i > 10) { + inContext = inContext + "..."; + } + } if (null != errors) errors.append("Offset " + i + ": " - + "Found an illegal at sign, @. @012B is an example of a legal folio marker.\n"); + + "Found an illegal at sign, @ (in context, this is " + inContext + "). @012B is an example of a legal folio marker.\n"); startOfString = i+1; currentType = ACIPString.ERROR; } @@ -391,7 +615,7 @@ public class ACIPTshegBarScanner { currentType = ACIPString.ERROR; } - // DLC support nesting like (NYA (BA))? + // We do not support nesting like (NYA (BA)). if (startParenIndex >= 0) { if (ch == '(') { @@ -421,7 +645,8 @@ public class ACIPTshegBarScanner { break; // end '(',')' case case '?': - if (bracketTypeStack.empty()) { + if (bracketTypeStack.empty() || i+1>=sl + || (s.charAt(i+1) != ']' && s.charAt(i+1) != '}')) { // The tsheg bar ends here; new token. if (startOfString < i) { al.add(new ACIPString(s.substring(startOfString, i), @@ -443,18 +668,25 @@ public class ACIPTshegBarScanner { startOfString = i; currentType = ACIPString.ERROR; } - // . is used for a non-breaking tsheg, such as in {NGO.,} and {....,DAM}. We give an error unless , or . follows '.'. - if (i + 1 < sl && (s.charAt(i+1) == '.' || s.charAt(i+1) == ',')) { + // . is used for a non-breaking tsheg, such as in + // {NGO.,} and {....,DAM}. We give an error unless , + // or ., or [A-Za-z] follows '.'. + if (lenientPeriods + || (i + 1 < sl + && (s.charAt(i+1) == '.' || s.charAt(i+1) == ',' + || (s.charAt(i+1) == '\r' || s.charAt(i+1) == '\n') + || (s.charAt(i+1) >= 'a' && s.charAt(i+1) <= 'z') + || (s.charAt(i+1) >= 'A' && s.charAt(i+1) <= 'Z')))) { al.add(new ACIPString(s.substring(i, i+1), ACIPString.TIBETAN_PUNCTUATION)); } else { - al.add(new ACIPString("A non-breaking tsheg, '.', appeared, but not like \"...,\" or \".,\".", + al.add(new ACIPString(s.substring(i, i+1), ACIPString.ERROR)); if (null != errors) errors.append("Offset " + i + ": " - + "A non-breaking tsheg, '.', appeared, but not like \"...,\" or \".,\".\n"); - + + "A non-breaking tsheg, '.', appeared, but not like \"...,\" or \".,\" or \".dA\" or \".DA\".\n"); } + startOfString = i+1; break; // end '.' case // Classic tsheg bar enders: @@ -493,9 +725,15 @@ public class ACIPTshegBarScanner { } al.add(new ACIPString(s.substring(i, i+1), ACIPString.ERROR)); - if (null != errors) - errors.append("Offset " + i + ": " - + "Found an illegal character, " + ch + "\n"); + if (null != errors) { + if ((int)ch == 65533) { + errors.append("Offset " + i + ": " + + "Found an illegal, unprintable character.\n"); + } else { + errors.append("Offset " + i + ": " + + "Found an illegal character, " + ch + ", with ordinal " + (int)ch + ".\n"); + } + } startOfString = i+1; currentType = ACIPString.ERROR; } else { @@ -510,16 +748,24 @@ public class ACIPTshegBarScanner { al.add(new ACIPString(s.substring(startOfString, sl), currentType)); } + if (waitingForMatchingIllegalClose) { + al.add(new ACIPString("UNEXPECTED END OF INPUT", + ACIPString.ERROR)); + if (null != errors) { + errors.append("Offset END: " + + "Truly unmatched open bracket found.\n"); + } + } if (!bracketTypeStack.empty()) { al.add(new ACIPString("UNEXPECTED END OF INPUT", ACIPString.ERROR)); if (null != errors) { if (ACIPString.COMMENT == currentType) { errors.append("Offset END: " - + "Unmatched open square bracket, [, found. A comment does not terminate.\n"); + + "Unmatched open bracket found. A comment does not terminate.\n"); } else { errors.append("Offset END: " - + "Unmatched open square bracket, [, found. A correction does not terminate.\n"); + + "Unmatched open bracket found. A correction does not terminate.\n"); } } } @@ -545,6 +791,11 @@ public class ACIPTshegBarScanner { return ch >= '0' && ch <= '9'; } + /** See implementation. */ + private static boolean isWhitespace(char ch) { + return ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n'; + } + /** See implementation. */ private static boolean isAlpha(char ch) { return ch == '\'' // 23rd consonant @@ -554,6 +805,8 @@ public class ACIPTshegBarScanner { || ch == 'o' || ch == 'x' || ch == ':' + || ch == '^' + || ch == '\\' || ch == '-' || ch == '+' diff --git a/source/org/thdl/tib/text/ttt/PackageTest.java b/source/org/thdl/tib/text/ttt/PackageTest.java index ed83876..3095b68 100644 --- a/source/org/thdl/tib/text/ttt/PackageTest.java +++ b/source/org/thdl/tib/text/ttt/PackageTest.java @@ -23,6 +23,8 @@ package org.thdl.tib.text.ttt; import org.thdl.util.ThdlOptions; +import java.util.ArrayList; + import junit.framework.TestCase; @@ -6921,4 +6923,1748 @@ tstHelper("ZUNGS"); tstHelper("ZUR"); } + + private static void shelp(String s, String expectedErrors) { + shelp(s, expectedErrors, null); + } + + private static void shelp(String s, String expectedErrors, String expectedScan) { + shelp(s, expectedErrors, false, expectedScan); + } + + private static void shelp(String s, String expectedErrors, boolean lenientPeriods, String expectedScan) { + StringBuffer errors = new StringBuffer(); + ArrayList al = ACIPTshegBarScanner.scan(s, errors, lenientPeriods); + if (null != expectedScan) { + if (!al.toString().equals(expectedScan)) { + System.out.println("Scanning " + s + " into tsheg bars was expected to cause the following scan:"); + System.out.println(expectedScan); + System.out.println("Instead, it caused the following scan:"); + System.out.println(al); + assertTrue(false); + } + } + if (null != expectedErrors) { + if (!expectedErrors.equals(errors.toString())) { + System.out.println("Scanning " + s + " into tsheg bars was expected to cause the following errors:"); + System.out.print(expectedErrors); + System.out.println("Instead, it caused the following errors:"); + System.out.print(errors); + assertTrue(false); + } + } + } + + /** Tests {@link ACIPTshegBarScanner#scan(String, StringBuffer, boolean)}. */ + public void testScanner() { + shelp("LA...SGRUB", + "", + "[TIBETAN_NON_PUNCTUATION:{LA}, TIBETAN_PUNCTUATION:{.}, TIBETAN_PUNCTUATION:{.}, TIBETAN_PUNCTUATION:{.}, TIBETAN_NON_PUNCTUATION:{SGRUB}]"); // DLC FIXME + shelp("PAS... LA", + "Offset 5: A non-breaking tsheg, '.', appeared, but not like \"...,\" or \".,\" or \".dA\" or \".DA\".\n", + "[TIBETAN_NON_PUNCTUATION:{PAS}, TIBETAN_PUNCTUATION:{.}, TIBETAN_PUNCTUATION:{.}, ERROR:{.}, TIBETAN_PUNCTUATION:{ }, TIBETAN_NON_PUNCTUATION:{LA}]"); + shelp("PAS... LA", + "", + true, + "[TIBETAN_NON_PUNCTUATION:{PAS}, TIBETAN_PUNCTUATION:{.}, TIBETAN_PUNCTUATION:{.}, TIBETAN_PUNCTUATION:{.}, TIBETAN_PUNCTUATION:{ }, TIBETAN_NON_PUNCTUATION:{LA}]"); + shelp("^GONG SA,", + "", + "[TIBETAN_NON_PUNCTUATION:{^GONG}, TIBETAN_PUNCTUATION:{ }, TIBETAN_NON_PUNCTUATION:{SA}, TIBETAN_PUNCTUATION:{,}]"); + shelp("^ GONG SA,", + "", + "[TIBETAN_NON_PUNCTUATION:{^}, TIBETAN_PUNCTUATION:{ }, TIBETAN_NON_PUNCTUATION:{GONG}, TIBETAN_PUNCTUATION:{ }, TIBETAN_NON_PUNCTUATION:{SA}, TIBETAN_PUNCTUATION:{,}]"); + // DLC FIXME: test that ^ and ^GONG are handled correctly on the whole. + shelp("", "", "[]"); + shelp("[DD]", ""); + shelp("[", + "Offset 0: Found an illegal open bracket (in context, this is [). Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open bracket?\nOffset END: Truly unmatched open bracket found.\n"); + shelp("{", + "Offset 0: Found an illegal open bracket (in context, this is {). Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open bracket?\nOffset END: Truly unmatched open bracket found.\n"); + shelp("DD", ""); + shelp("DD]", + "Offset 2: Found a truly unmatched close bracket, [ or {.\nOffset 2: Found a closing bracket without a matching open bracket. Perhaps a [#COMMENT] incorrectly written as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], caused this.\n"); + + shelp("///NYA", "Offset END: Slashes are supposed to occur in pairs, but the input had an unmatched '/' character.\n"); + shelp("/NYA/", ""); + shelp("[?][BP][LS][DD1][DD2][DDD][DR][# (<{A COMMENT)}>]", ""); + shelp("[LS][# A [[[[[COMMENT][LS]", + "Offset 9: Found an open bracket within a [#COMMENT]-style comment. Brackets may not appear in comments.\n" + + "Offset 10: Found an open bracket within a [#COMMENT]-style comment. Brackets may not appear in comments.\n" + + "Offset 11: Found an open bracket within a [#COMMENT]-style comment. Brackets may not appear in comments.\n" + + "Offset 12: Found an open bracket within a [#COMMENT]-style comment. Brackets may not appear in comments.\n" + + "Offset 13: Found an open bracket within a [#COMMENT]-style comment. Brackets may not appear in comments.\n"); + shelp("[ILLEGAL COMMENT]", + "Offset 0: Found an illegal open bracket (in context, this is [ILLEGAL C...). Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open bracket?\nOffset 16: Found a closing bracket without a matching open bracket. Perhaps a [#COMMENT] incorrectly written as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], caused this.\n"); + shelp("(BSKYABS GRO)", ""); // DLC WHAT ARE THESE FOR? + shelp("BSKYABS GRO)", "Offset 11: Unexpected closing parenthesis, ), found.\n"); + shelp("BSKYABS GRO(", "Offset END: Unmatched open parenthesis, (, found.\n"); + shelp("((NESTAGE))", "Offset 1: Found an illegal open parenthesis, (. Nesting of parentheses is not allowed.\nOffset 10: Unexpected closing parenthesis, ), found.\n"); + shelp("(BA)(PA)NYA(CA)", ""); + shelp("NYAx", ""); + shelp("NYA x", ""); + shelp("[# A PARTIAL COM", "Offset END: Unmatched open bracket found. A comment does not terminate.\n"); + shelp("[* BSKYABS ", "Offset END: Unmatched open bracket found. A correction does not terminate.\n"); + shelp("SKYABS [*BSKYABS?] GRO [?]", ""); + shelp(" SKYABS GRO ", ""); + shelp("SKYABS [*BSKYABS] GRO [?]", "", "[TIBETAN_NON_PUNCTUATION:{SKYABS}, TIBETAN_PUNCTUATION:{ }, CORRECTION_START:{[*}, TIBETAN_NON_PUNCTUATION:{BSKYABS}, PROBABLE_CORRECTION:{]}, TIBETAN_PUNCTUATION:{ }, TIBETAN_NON_PUNCTUATION:{GRO}, TIBETAN_PUNCTUATION:{ }, QUESTION:{[?]}]"); + shelp("[*RVA]", "", "[CORRECTION_START:{[*}, TIBETAN_NON_PUNCTUATION:{RVA}, PROBABLE_CORRECTION:{]}]"); + shelp("[*RVA?]", "", "[CORRECTION_START:{[*}, TIBETAN_NON_PUNCTUATION:{RVA}, POSSIBLE_CORRECTION:{?]}]"); + shelp("[* RVA ]", "", "[CORRECTION_START:{[*}, TIBETAN_PUNCTUATION:{ }, TIBETAN_NON_PUNCTUATION:{RVA}, TIBETAN_PUNCTUATION:{ }, PROBABLE_CORRECTION:{]}]"); + shelp("[*RVA ?]", "", "[CORRECTION_START:{[*}, TIBETAN_NON_PUNCTUATION:{RVA}, TIBETAN_PUNCTUATION:{ }, POSSIBLE_CORRECTION:{?]}]"); + shelp("[*RVA? ]", "", "[CORRECTION_START:{[*}, TIBETAN_NON_PUNCTUATION:{RVA}, QUESTION:{?}, TIBETAN_PUNCTUATION:{ }, PROBABLE_CORRECTION:{]}]"); + shelp("[*LINE BREAK]", "", "[CORRECTION_START:{[*}, LATIN:{LINE BREAK}, PROBABLE_CORRECTION:{]}]"); + shelp("[*LINE BREAK?]", "", "[CORRECTION_START:{[*}, LATIN:{LINE BREAK}, POSSIBLE_CORRECTION:{?]}]"); + shelp("[*\n\t\r LINEYO ?]", "", "[CORRECTION_START:{[*}, LATIN:{\n\t\r LINEYO }, POSSIBLE_CORRECTION:{?]}]"); + shelp("[*\n\t\r LINEYO ]", "", "[CORRECTION_START:{[*}, LATIN:{\n\t\r LINEYO }, PROBABLE_CORRECTION:{]}]"); + shelp("[*DATA INCOMPLETE HERE?]", "", "[CORRECTION_START:{[*}, LATIN:{DATA INCOMPLETE HERE}, POSSIBLE_CORRECTION:{?]}]"); + shelp("[*THIS\r\nWAS SUPPOSED TO BE THE SIXTH CATEGORY; THE CATEGORIES MENTIONED\r\nABOVE SEEM TO BE OUT OF ORDER THROUGH THIS SECTION]\r\n", ""); + + // DLC test ACIP files containing just "x", "o", ":", "m" and "%" + shelp("x o % : m", ""); + shelp("AAx AAo AA% AA: AAm", ""); + + shelp("/NYA ", "Offset END: Slashes are supposed to occur in pairs, but the input had an unmatched '/' character.\n"); + shelp("(NYA ", "Offset END: Unmatched open parenthesis, (, found.\n"); + shelp("[*NYA ", "Offset END: Unmatched open bracket found. A correction does not terminate.\n"); + shelp("?", "", "[QUESTION:{?}]"); + shelp("KHAN~ BAR ", "Offset 4: Found an illegal character, ~, with ordinal 126.\n"); + shelp("[* Correction with []]", + "Offset 19: Found an illegal open bracket (in context, this is []]). Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open bracket?\nOffset 21: Found a closing bracket without a matching open bracket. Perhaps a [#COMMENT] incorrectly written as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], caused this.\n"); + + // DLC FIXME: the line SDIG PA'I GROGS PO'I LAG TU SON PAR 'GYUR PA is followed by a blank line. Note that it's "PA", not "PA ", ending it. Autocorrect to the latter. + + // DLC FIXME: @0B1 isn't handled correctly! + + shelp(",NGES ? PA", "", "[TIBETAN_PUNCTUATION:{,}, TIBETAN_NON_PUNCTUATION:{NGES}, TIBETAN_PUNCTUATION:{ }, QUESTION:{?}, TIBETAN_PUNCTUATION:{ }, TIBETAN_NON_PUNCTUATION:{PA}]"); + shelp("K\\,", "", "[TIBETAN_NON_PUNCTUATION:{K\\}, TIBETAN_PUNCTUATION:{,}]"); + shelp("MTHAR%", "", "[TIBETAN_NON_PUNCTUATION:{MTHAR%}]"); + shelp("PHYIR;", "", "[TIBETAN_NON_PUNCTUATION:{PHYIR}, TIBETAN_PUNCTUATION:{;}]"); + shelp("......,DAM ", + "", + "[TIBETAN_PUNCTUATION:{.}, TIBETAN_PUNCTUATION:{.}, TIBETAN_PUNCTUATION:{.}, TIBETAN_PUNCTUATION:{.}, TIBETAN_PUNCTUATION:{.}, TIBETAN_PUNCTUATION:{.}, TIBETAN_PUNCTUATION:{,}, TIBETAN_NON_PUNCTUATION:{DAM}, TIBETAN_PUNCTUATION:{ }]"); + shelp("NGO.,", "", "[TIBETAN_NON_PUNCTUATION:{NGO}, TIBETAN_PUNCTUATION:{.}, TIBETAN_PUNCTUATION:{,}]"); + + // Test that we handle some known comments that occur in + // illegal syntax: + shelp("[text missing]", "", "[COMMENT:{[#text missing]}]"); + shelp("[FIRST][SECOND][MISSING PAGE][MISSING FOLIO]", ""); + shelp("[THE INITIAL PART OF THIS TEXT WAS INPUT BY THE SERA MEY LIBRARY IN\nTIBETAN FONT AND NEEDS TO BE REDONE BY DOUBLE INPUT]\r\n", "", "[COMMENT:{[#THE INITIAL PART OF THIS TEXT WAS INPUT BY THE SERA MEY LIBRARY IN\nTIBETAN FONT AND NEEDS TO BE REDONE BY DOUBLE INPUT]}, TIBETAN_PUNCTUATION:{\r}, TIBETAN_PUNCTUATION:{\n}]"); + shelp("[THE INITIAL PART OF THIS TEXT WAS INPUT BY THE SERA MEY LIBRARY IN\r\nTIBETAN FONT AND NEEDS TO BE REDONE BY DOUBLE INPUT]\r\n", "", "[COMMENT:{[#THE INITIAL PART OF THIS TEXT WAS INPUT BY THE SERA MEY LIBRARY IN\r\nTIBETAN FONT AND NEEDS TO BE REDONE BY DOUBLE INPUT]}, TIBETAN_PUNCTUATION:{\r}, TIBETAN_PUNCTUATION:{\n}]"); + + // Test folio markers: + shelp("@01A.3 ", "", "[FOLIO_MARKER:{@01A.3}, TIBETAN_PUNCTUATION:{ }]"); + shelp("@001 ", "", "[FOLIO_MARKER:{@001}, TIBETAN_PUNCTUATION:{ }]"); + shelp("@19-20A", + "Offset 0: Found an illegal at sign, @ (in context, this is @19-20A). @012B is an example of a legal folio marker.\n", + "[ERROR:{@}, TIBETAN_NON_PUNCTUATION:{19-20A}]"); // DLC FIXME: yes it occurs in the kangyur. + shelp("@[7B]", ""); + shelp("@012A.3KA", + "", + "[FOLIO_MARKER:{@012A.3}, TIBETAN_NON_PUNCTUATION:{KA}]"); + shelp("@012A.34", + "Offset 0: Found an illegal at sign, @ (in context, this is @012A.34). This folio marker has a period, '.', at the end of it, which is illegal.\n", + "[ERROR:{@012A.}, TIBETAN_NON_PUNCTUATION:{34}]"); + shelp("@[07B]", ""); + shelp("@[00007B]", ""); + shelp("@7B", ""); + shelp("@07B", ""); + shelp("@00007B", "", "[FOLIO_MARKER:{@00007B}]"); + shelp("@00007 ", "", "[FOLIO_MARKER:{@00007}, TIBETAN_PUNCTUATION:{ }]"); + shelp("@B00007KA", "", "[FOLIO_MARKER:{@B00007}, TIBETAN_NON_PUNCTUATION:{KA}]"); + shelp("@[00007A]KA", "", "[FOLIO_MARKER:{@[00007A]}, TIBETAN_NON_PUNCTUATION:{KA}]"); + + shelp("GA-YENG", "", "[TIBETAN_NON_PUNCTUATION:{GA-YENG}]"); + shelp("N+YA", "", "[TIBETAN_NON_PUNCTUATION:{N+YA}]"); + + shelp("{ DD }", "", "[DD:{{ DD }}]"); // TD3790E2.ACT + shelp("{ BP }", "", "[BP:{{ BP }}]"); // TD3790E2.ACT + + } + + /** Tests some more tsheg bars, these from Dr. Lacey's critical + edition of Mahavyutpatti. + +

These are courtesy Peter E. Hauer, Linguasoft. Taken from + ACIP's website, but that copy was overridden by one with + corrections sent to Peter by Robert Chilton. */ + public void testMV() { + System.out.println(""); + System.out.println(""); + System.out.println(""); + System.out.println("From MV:"); + System.out.println(""); + System.out.println(""); + System.out.println(""); + + tstHelper("'BANGS"); + tstHelper("'BAR"); + tstHelper("'BLTAS"); + tstHelper("'BRA"); + tstHelper("'BRA'I"); + tstHelper("'BRANG"); + tstHelper("'BRAS"); + tstHelper("'BRED"); + tstHelper("'BREG"); + tstHelper("'BREL"); + tstHelper("'BRIM"); + tstHelper("'BRU'I"); + tstHelper("'BUL"); + tstHelper("'BUS"); + tstHelper("'BYAMS"); + tstHelper("'BYED"); + tstHelper("'BYES"); + tstHelper("'BYIN"); + tstHelper("'BYOD"); + tstHelper("'BYOR"); + tstHelper("'BYUNG"); + tstHelper("'CHED"); + tstHelper("'CHI"); + tstHelper("'CHING"); + tstHelper("'CHONGS"); + tstHelper("'CHOS"); + tstHelper("'DAB"); + tstHelper("'DAGS"); + tstHelper("'DAS"); + tstHelper("'DEBS"); + tstHelper("'DI"); + tstHelper("'DOD"); + tstHelper("'DOM"); + tstHelper("'DOMS"); + tstHelper("'DON"); + tstHelper("'DOR"); + tstHelper("'DRA"); + tstHelper("'DRAD"); + tstHelper("'DRAMS"); + tstHelper("'DRANG"); + tstHelper("'DRED"); + tstHelper("'DREL"); + tstHelper("'DRI"); + tstHelper("'DRIM"); + tstHelper("'DROS"); + tstHelper("'DRUB"); + tstHelper("'DRUBS"); + tstHelper("'DU"); + tstHelper("'DUD"); + tstHelper("'DUG"); + tstHelper("'DUGS"); + tstHelper("'DZAM"); + tstHelper("'DZE"); + tstHelper("'DZEM"); + tstHelper("'DZES"); + tstHelper("'DZIN"); + tstHelper("'DZUM"); + tstHelper("'DZUMS"); + tstHelper("'GAG"); + tstHelper("'GED"); + tstHelper("'GI"); + tstHelper("'GOG"); + tstHelper("'GRAMS"); + tstHelper("'GRES"); + tstHelper("'GRIM"); + tstHelper("'GRIMS"); + tstHelper("'GRO"); + tstHelper("'GRO'O"); + tstHelper("'GRON"); + tstHelper("'GRUS"); + tstHelper("'GU"); + tstHelper("'GYED"); + tstHelper("'GYEGS"); + tstHelper("'GYOD"); + tstHelper("'GYUG"); + tstHelper("'GYUR"); + tstHelper("'JAL"); + tstHelper("'JAM"); + tstHelper("'JAS"); + tstHelper("'JIG"); + tstHelper("'JIGS"); + tstHelper("'JOMS"); + tstHelper("'JUG"); + tstHelper("'KHAS"); + tstHelper("'KHOG"); + tstHelper("'KHOGS"); + tstHelper("'KHOR"); + tstHelper("'KHRAMS"); + tstHelper("'KHRANG"); + tstHelper("'KHRAS"); + tstHelper("'KHREN"); + tstHelper("'KHRUG"); + tstHelper("'KHRUMS"); + tstHelper("'KHUMS"); + tstHelper("'KHYAN"); + tstHelper("'KHYIL"); + tstHelper("'KOD"); + tstHelper("'KRAM"); + tstHelper("'KRIGS"); + tstHelper("'LREG"); + tstHelper("'MTHUN"); + tstHelper("'MTSAMS"); + tstHelper("'OD"); + tstHelper("'OG"); + tstHelper("'ONGS"); + tstHelper("'PHAGS"); + tstHelper("'PHEN"); + tstHelper("'PHO"); + tstHelper("'PHOG"); + tstHelper("'PHOGS"); + tstHelper("'PHONGS"); + tstHelper("'PHRED"); + tstHelper("'PHREN"); + tstHelper("'PHROG"); + tstHelper("'PHROGS"); + tstHelper("'PHRUL"); + tstHelper("'PHYAM"); + tstHelper("'PHYES"); + tstHelper("'PHYIS"); + tstHelper("'PRUL"); + tstHelper("'SPONG"); + tstHelper("'THAB"); + tstHelper("'THABS"); + tstHelper("'THAL"); + tstHelper("'THAM"); + tstHelper("'THAMS"); + tstHelper("'THANG"); + tstHelper("'THANGS"); + tstHelper("'THAR"); + tstHelper("'THO"); + tstHelper("'THOB"); + tstHelper("'THOG"); + tstHelper("'THOP"); + tstHelper("'THUL"); + tstHelper("'THUN"); + tstHelper("'THUNG"); + tstHelper("'TSAM"); + tstHelper("'TSAMS"); + tstHelper("'TSE"); + tstHelper("'TSEB"); + tstHelper("'TSED"); + tstHelper("'TSO"); + tstHelper("'TSO'O"); + tstHelper("'TSOL"); + tstHelper("'TSOR"); + tstHelper("'TSOS"); + tstHelper("'TUN"); + tstHelper("'TUNG"); + tstHelper("'UR"); + tstHelper("1"); + tstHelper("A'M"); + tstHelper("AA"); + tstHelper("AAE"); + tstHelper("AAI"); + tstHelper("AAMRA'I"); + tstHelper("AAR"); + tstHelper("AASMA"); + tstHelper("AE"); + tstHelper("AIN"); + tstHelper("AINDA"); + tstHelper("AIndRANYILA"); + tstHelper("AOOL"); + tstHelper("AOOS"); + tstHelper("AOm"); + tstHelper("AU"); + tstHelper("AUDPA"); + tstHelper("AUDPALA"); + tstHelper("AUDPALA'I"); + tstHelper("AUT"); + tstHelper("AUTPALA'I"); + tstHelper("B'I"); + tstHelper("B. (DLC!)"); + tstHelper("BA"); + tstHelper("BA'"); + tstHelper("BA'A"); + tstHelper("BA'AR"); + tstHelper("BA'I"); + tstHelper("BA'O"); + tstHelper("BA'THUNG"); + tstHelper("BAA'"); + tstHelper("BAG"); + tstHelper("BAI"); + tstHelper("BAL"); + tstHelper("BAM"); + tstHelper("BAN"); + tstHelper("BANDU"); + tstHelper("BANG"); + tstHelper("BAR"); + tstHelper("BAR'"); + tstHelper("BAS"); + tstHelper("BAT"); + tstHelper("BCA'"); + tstHelper("BCAD"); + tstHelper("BCAGS"); + tstHelper("BCAS"); + tstHelper("BCDU"); + tstHelper("BCHA"); + tstHelper("BCHU"); + tstHelper("BCO"); + tstHelper("BCOR"); + tstHelper("BCU"); + tstHelper("BCUD"); + tstHelper("BCUG"); + tstHelper("BCUGS"); + tstHelper("BCUR"); + tstHelper("BCUS"); + tstHelper("BDA"); + tstHelper("BDAG"); + tstHelper("BDAR"); + tstHelper("BDE"); + tstHelper("BDEG"); + tstHelper("BDUD"); + tstHelper("BDUN"); + tstHelper("BE'I"); + tstHelper("BE'U"); + tstHelper("BEE"); + tstHelper("BEEdURYA'I"); + tstHelper("BGAG"); + tstHelper("BGANG"); + tstHelper("BGCUD"); + tstHelper("BGE"); + tstHelper("BGO"); + tstHelper("BGOD"); + tstHelper("BGRAD"); + tstHelper("BGRANG"); + tstHelper("BGREL"); + tstHelper("BGRES"); + tstHelper("BHA"); + tstHelper("BHA'I"); + tstHelper("BHI"); + tstHelper("BHINTA"); + tstHelper("BHINY"); + tstHelper("BI"); + tstHelper("BIM"); + tstHelper("BIMBA"); + tstHelper("BIN"); + tstHelper("BING"); + tstHelper("BIsnU"); + tstHelper("BKABS"); + tstHelper("BKHI"); + tstHelper("BKOD"); + tstHelper("BKRAM"); + tstHelper("BKRES"); + tstHelper("BKRI"); + tstHelper("BKRIS"); + tstHelper("BKROL"); + tstHelper("BKRUGS"); + tstHelper("BKYANG"); + tstHelper("BLA"); + tstHelper("BLAS"); + tstHelper("BLO"); + tstHelper("BLTAN"); + tstHelper("BLUGS"); + tstHelper("BNYIS"); + tstHelper("BO"); + tstHelper("BO'I"); + tstHelper("BO'O"); + tstHelper("BON"); + tstHelper("BONG"); + tstHelper("BOR"); + tstHelper("BPAG"); + tstHelper("BRAG"); + tstHelper("BRAL"); + tstHelper("BRAM"); + tstHelper("BRDEG"); + tstHelper("BRDUM"); + tstHelper("BRDUNGS"); + tstHelper("BRDZES"); + tstHelper("BRGOD"); + tstHelper("BRGYA"); + tstHelper("BRGYA'I"); + tstHelper("BRGYAD"); + tstHelper("BRGYAL"); + tstHelper("BRING"); + tstHelper("BRJID"); + tstHelper("BRJOD"); + tstHelper("BRKU"); + tstHelper("BRKUR"); + tstHelper("BRLAD"); + tstHelper("BRLANG"); + tstHelper("BRLANGS"); + tstHelper("BRNGAS"); + tstHelper("BRNGOD"); + tstHelper("BRNGOGS"); + tstHelper("BRNGOS"); + tstHelper("BRNGUBS"); + tstHelper("BRNYAN"); + tstHelper("BRONG"); + tstHelper("BRTAN"); + tstHelper("BRTEG"); + tstHelper("BRTEGS"); + tstHelper("BRTEN"); + tstHelper("BRTON"); + tstHelper("BRTUL"); + tstHelper("BRTZAGS"); + tstHelper("BRTZAN"); + tstHelper("BRTZANG"); + tstHelper("BRTZEGS"); + tstHelper("BRTZENGS"); + tstHelper("BRTZER"); + tstHelper("BRTZI"); + tstHelper("BRTZIBS"); + tstHelper("BRTZIS"); + tstHelper("BRTZOGS"); + tstHelper("BRTZON"); + tstHelper("BRUNGS"); + tstHelper("BSAGS"); + tstHelper("BSAL"); + tstHelper("BSANGS"); + tstHelper("BSBRUGS"); + tstHelper("BSDUNGS"); + tstHelper("BSDUS"); + tstHelper("BSEG"); + tstHelper("BSGRAGS"); + tstHelper("BSGRE"); + tstHelper("BSGRENG"); + tstHelper("BSGRES"); + tstHelper("BSGRUB"); + tstHelper("BSGRUNGS"); + tstHelper("BSGUR"); + tstHelper("BSGYINGS"); + tstHelper("BSGYUR"); + tstHelper("BSHAD"); + tstHelper("BSHAMS"); + tstHelper("BSHES"); + tstHelper("BSHUNG"); + tstHelper("BSIG"); + tstHelper("BSIL"); + tstHelper("BSING"); + tstHelper("BSKHYED"); + tstHelper("BSKOD"); + tstHelper("BSKOR"); + tstHelper("BSKOS"); + tstHelper("BSKRANGS"); + tstHelper("BSKRI"); + tstHelper("BSKRIS"); + tstHelper("BSKROD"); + tstHelper("BSKYANG"); + tstHelper("BSKYED"); + tstHelper("BSKYENG"); + tstHelper("BSKYIMS"); + tstHelper("BSKYIS"); + tstHelper("BSKYOD"); + tstHelper("BSLAS"); + tstHelper("BSNAN"); + tstHelper("BSNGAGS"); + tstHelper("BSNGAL"); + tstHelper("BSNUM"); + tstHelper("BSNUN"); + tstHelper("BSNYAGS"); + tstHelper("BSNYUNG"); + tstHelper("BSREG"); + tstHelper("BSREL"); + tstHelper("BSRUBS"); + tstHelper("BSRUNG"); + tstHelper("BSTAN"); + tstHelper("BSTOBS"); + tstHelper("BSTOD"); + tstHelper("BSTUS"); + tstHelper("BSTZIS"); + tstHelper("BTA'"); + tstHelper("BTAB"); + tstHelper("BTAM"); + tstHelper("BTANG"); + tstHelper("BTER"); + tstHelper("BTUB"); + tstHelper("BTUL"); + tstHelper("BTZAM"); + tstHelper("BTZAN"); + tstHelper("BTZER"); + tstHelper("BTZI"); + tstHelper("BTZUB"); + tstHelper("BTZUGS"); + tstHelper("BTZVA"); + tstHelper("BU"); + tstHelper("BU'I"); + tstHelper("BU'O"); + tstHelper("BUR"); + tstHelper("BYA"); + tstHelper("BYA'A"); + tstHelper("BYA'O"); + tstHelper("BYAL"); + tstHelper("BYAN"); + tstHelper("BYANG"); + tstHelper("BYAS"); + tstHelper("BYE"); + tstHelper("BYE'U"); + tstHelper("BYED"); + tstHelper("BYENG"); + tstHelper("BYIN"); + tstHelper("BYIS"); + tstHelper("BYOD"); + tstHelper("BYOL"); + tstHelper("BYUGS"); + tstHelper("BYUL"); + tstHelper("BYUNG"); + tstHelper("BZA'"); + tstHelper("BZANG"); + tstHelper("BZHAG"); + tstHelper("BZHI"); + tstHelper("BZHIGS"); + tstHelper("BZHIN"); + tstHelper("BZHING"); + tstHelper("BZHUBS"); + tstHelper("BZHUGS"); + tstHelper("BZLOS"); + tstHelper("BZUN"); + tstHelper("BZUNG"); + tstHelper("CA"); + tstHelper("CA'I"); + tstHelper("CAD"); + tstHelper("CAN"); + tstHelper("CANG"); + tstHelper("CAn"); + tstHelper("CE"); + tstHelper("CES"); + tstHelper("CHA"); + tstHelper("CHAD"); + tstHelper("CHAGS"); + tstHelper("CHANG"); + tstHelper("CHAS"); + tstHelper("CHE"); + tstHelper("CHE'I"); + tstHelper("CHEN"); + tstHelper("CHER"); + tstHelper("CHO"); + tstHelper("CHOMS"); + tstHelper("CHOS"); + tstHelper("CHU"); + tstHelper("CHU'I"); + tstHelper("CHUB"); + tstHelper("CHUMS"); + tstHelper("CHUNG"); + tstHelper("CHUNG'U"); + tstHelper("CHUNGS"); + tstHelper("CIG"); + tstHelper("CING"); + tstHelper("CO"); + tstHelper("CONG"); + tstHelper("COR"); + tstHelper("COS"); + tstHelper("CYA"); + tstHelper("D'U"); + tstHelper("DA"); + tstHelper("DA'I"); + tstHelper("DA'URYA"); + tstHelper("DAD"); + tstHelper("DAG"); + tstHelper("DAGS"); + tstHelper("DAM"); + tstHelper("DAMBA"); + tstHelper("DAN"); + tstHelper("DANG"); + tstHelper("DAR"); + tstHelper("DAU"); + tstHelper("DBA'"); + tstHelper("DBA'I"); + tstHelper("DBAG"); + tstHelper("DBANG"); + tstHelper("DBU'I"); + tstHelper("DBUG"); + tstHelper("DBUGS"); + tstHelper("DBYANGS"); + tstHelper("DBYAR"); + tstHelper("DBYE"); + tstHelper("DBYE'I"); + tstHelper("DBYES"); + tstHelper("DBYINS"); + tstHelper("DBYIR"); + tstHelper("DE"); + tstHelper("DE'I"); + tstHelper("DENG"); + tstHelper("DGA"); + tstHelper("DGA'"); + tstHelper("DGAB"); + tstHelper("DGAG"); + tstHelper("DGE"); + tstHelper("DGRA"); + tstHelper("DGRA'I"); + tstHelper("DGU'I"); + tstHelper("DGYE'O"); + tstHelper("DHA"); + tstHelper("DHA'"); + tstHelper("DHA'I"); + tstHelper("DHANU"); + tstHelper("DHU"); + tstHelper("DI"); + tstHelper("DI'I"); + tstHelper("DIG"); + tstHelper("DJOGS"); + tstHelper("DKA'"); + tstHelper("DKAR"); + tstHelper("DKOD"); + tstHelper("DKRI"); + tstHelper("DKRUGS"); + tstHelper("DKU"); + tstHelper("DKUGS"); + tstHelper("DKUS"); + tstHelper("DKYIL"); + tstHelper("DMA"); + tstHelper("DMAN"); + tstHelper("DMAR"); + tstHelper("DMEL"); + tstHelper("DMIGS"); + tstHelper("DMYIGS"); + tstHelper("DNGOS"); + tstHelper("DO"); + tstHelper("DOG"); + tstHelper("DON"); + tstHelper("DONG"); + tstHelper("DOR"); + tstHelper("DPA"); + tstHelper("DPA'"); + tstHelper("DPAG"); + tstHelper("DPAL"); + tstHelper("DPAS"); + tstHelper("DPE"); + tstHelper("DPER"); + tstHelper("DPON"); + tstHelper("DPRAL"); + tstHelper("DPUN"); + tstHelper("DPUNG"); + tstHelper("DPYA'"); + tstHelper("DPYID"); + tstHelper("DPYINGS"); + tstHelper("DPYOD"); + tstHelper("DRA"); + tstHelper("DRAG"); + tstHelper("DRAN"); + tstHelper("DRANG"); + tstHelper("DRE'U"); + tstHelper("DREGS"); + tstHelper("DRI"); + tstHelper("DRID"); + tstHelper("DRIN"); + tstHelper("DRNGUBS"); + tstHelper("DROS"); + tstHelper("DRUG"); + tstHelper("DSPYOD"); + tstHelper("DTAR"); + tstHelper("DU"); + tstHelper("DU'I"); + tstHelper("DUG"); + tstHelper("DUNG"); + tstHelper("DUS"); + tstHelper("DVAGS"); + tstHelper("DW'A"); + tstHelper("DZAM"); + tstHelper("DZAMBU"); + tstHelper("DZAMBU'I"); + tstHelper("DZI"); + tstHelper("DZIN"); + tstHelper("DZOGS"); + tstHelper("G-YAS"); + tstHelper("G-YOR"); + tstHelper("G-YUM"); + tstHelper("G-YUNG"); + tstHelper("GA"); + tstHelper("GA'A"); + tstHelper("GA'I"); + tstHelper("GAL"); + tstHelper("GAM"); + tstHelper("GAMS"); + tstHelper("GAN"); + tstHelper("GANG"); + tstHelper("GAR"); + tstHelper("GAS"); + tstHelper("GAU"); + tstHelper("GAmGA'I"); + tstHelper("GAn"); + tstHelper("GAndI"); + tstHelper("GBA"); + tstHelper("GCIG"); + tstHelper("GCIN"); + tstHelper("GCOL"); + tstHelper("GCONG"); + tstHelper("GDAGS"); + tstHelper("GDEGS"); + tstHelper("GDGAS"); + tstHelper("GDOL"); + tstHelper("GDONGS"); + tstHelper("GDUD"); + tstHelper("GDUG"); + tstHelper("GDUG.PA (DLC!)"); + tstHelper("GDUGS"); + tstHelper("GDUN"); + tstHelper("GE"); + tstHelper("GE'I"); + tstHelper("GENGS"); + tstHelper("GHI"); + tstHelper("GHOM"); + tstHelper("GI"); + tstHelper("GIR"); + tstHelper("GIS"); + tstHelper("GLA"); + tstHelper("GLAB"); + tstHelper("GLAL"); + tstHelper("GLANG"); + tstHelper("GLO"); + tstHelper("GLONGS"); + tstHelper("GLUGS"); + tstHelper("GNA'I"); + tstHelper("GNAD"); + tstHelper("GNAS"); + tstHelper("GNGER"); + tstHelper("GNOD"); + tstHelper("GNON"); + tstHelper("GNYA'"); + tstHelper("GNYAR"); + tstHelper("GNYE'U"); + tstHelper("GNYEN"); + tstHelper("GNYER"); + tstHelper("GNYI"); + tstHelper("GNYIS"); + tstHelper("GO"); + tstHelper("GO'I"); + tstHelper("GONG"); + tstHelper("GOOTAMA"); + tstHelper("GOS"); + tstHelper("GRAG"); + tstHelper("GRAGS"); + tstHelper("GRANGS"); + tstHelper("GRANS"); + tstHelper("GREG"); + tstHelper("GRO"); + tstHelper("GRO'I"); + tstHelper("GROG"); + tstHelper("GROGS"); + tstHelper("GROR"); + tstHelper("GRUNGS"); + tstHelper("GRUR"); + tstHelper("GSAL"); + tstHelper("GSANG"); + tstHelper("GSAR"); + tstHelper("GSDEGS"); + tstHelper("GSEB"); + tstHelper("GSEG"); + tstHelper("GSER"); + tstHelper("GSHE"); + tstHelper("GSHE'"); + tstHelper("GSHEGS"); + tstHelper("GSHING"); + tstHelper("GSIGS"); + tstHelper("GSO"); + tstHelper("GSOL"); + tstHelper("GSOS"); + tstHelper("GSRUNG"); + tstHelper("GSUM"); + tstHelper("GSUNGS"); + tstHelper("GTA'"); + tstHelper("GTAGS"); + tstHelper("GTAM"); + tstHelper("GTAMS"); + tstHelper("GTAN"); + tstHelper("GTEGS"); + tstHelper("GTING"); + tstHelper("GTOGS"); + tstHelper("GTOL"); + tstHelper("GTONG"); + tstHelper("GTUBS"); + tstHelper("GTUM"); + tstHelper("GTZAB"); + tstHelper("GTZAD"); + tstHelper("GTZANG"); + tstHelper("GTZUG"); + tstHelper("GTZUGS"); + tstHelper("GU"); + tstHelper("GU'O"); + tstHelper("GYA"); + tstHelper("GYAN"); + tstHelper("GYAR"); + tstHelper("GYAS"); + tstHelper("GYES"); + tstHelper("GYI"); + tstHelper("GYIR"); + tstHelper("GYIS"); + tstHelper("GYO"); + tstHelper("GYOR"); + tstHelper("GYUL"); + tstHelper("GYUR"); + tstHelper("GZANG"); + tstHelper("GZAR"); + tstHelper("GZENG"); + tstHelper("GZENGS"); + tstHelper("GZES"); + tstHelper("GZHA'I"); + tstHelper("GZHAG"); + tstHelper("GZHAL"); + tstHelper("GZHAN"); + tstHelper("GZHANG"); + tstHelper("GZHEL"); + tstHelper("GZHI"); + tstHelper("GZHIG"); + tstHelper("GZHON"); + tstHelper("GZHUNG"); + tstHelper("GZI"); + tstHelper("GZIL"); + tstHelper("GZUGS"); + tstHelper("GZUNG"); + tstHelper("GZUNGS"); + tstHelper("GZUNGS'I"); + tstHelper("GndA'I"); + tstHelper("GndAA'I"); + tstHelper("H'A"); + tstHelper("HA"); + tstHelper("HAB"); + tstHelper("HAM"); + tstHelper("HANG"); + tstHelper("HETU"); + tstHelper("HETUR"); + tstHelper("HU"); + tstHelper("HUD"); + tstHelper("JA"); + tstHelper("JAA'"); + tstHelper("JI"); + tstHelper("JIGS"); + tstHelper("JO"); + tstHelper("JO'U"); + tstHelper("K'AU"); + tstHelper("KA"); + tstHelper("KA'A"); + tstHelper("KA'ASHI"); + tstHelper("KA'ASHI'I"); + tstHelper("KA'I"); + tstHelper("KA'U"); + tstHelper("KAA'"); + tstHelper("KAL"); + tstHelper("KAM"); + tstHelper("KAR"); + tstHelper("KARAnA"); + tstHelper("KE'U"); + tstHelper("KGRAG"); + tstHelper("KHA"); + tstHelper("KHAMS"); + tstHelper("KHANG"); + tstHelper("KHANS"); + tstHelper("KHE'U"); + tstHelper("KHRA"); + tstHelper("KHRAM"); + tstHelper("KHRI"); + tstHelper("KHRIG"); + tstHelper("KHRIS"); + tstHelper("KHUD"); + tstHelper("KHUG"); + tstHelper("KHYAB"); + tstHelper("KHYAD"); + tstHelper("KHYE'U"); + tstHelper("KHYE'US"); + tstHelper("KHYED"); + tstHelper("KHYIM"); + tstHelper("KHYOD"); + tstHelper("KHYON"); + tstHelper("KI"); + tstHelper("KKU"); + tstHelper("KLU'I"); + tstHelper("KLUG"); + tstHelper("KO"); + tstHelper("KOO"); + tstHelper("KOOSHAMBHI"); + tstHelper("KOS"); + tstHelper("KRIS"); + tstHelper("KROL"); + tstHelper("KROS"); + tstHelper("KTI"); + tstHelper("KU"); + tstHelper("KULA"); + tstHelper("KUM"); + tstHelper("KUN"); + tstHelper("KUR"); + tstHelper("KY"); + tstHelper("KYA"); + tstHelper("KYA'I"); + tstHelper("KYANG"); + tstHelper("KYE"); + tstHelper("KYI"); + tstHelper("KYIS"); + tstHelper("KYOD"); + tstHelper("KsI"); + tstHelper("L'A"); + tstHelper("LA"); + tstHelper("LA$GCAGS"); + tstHelper("LA'A"); + tstHelper("LA'I"); + tstHelper("LAG"); + tstHelper("LAM"); + tstHelper("LAN"); + tstHelper("LANDA"); + tstHelper("LANG"); + tstHelper("LANGGA"); + tstHelper("LANGS"); + tstHelper("LAR"); + tstHelper("LAS"); + tstHelper("LBA"); + tstHelper("LBANG"); + tstHelper("LCAG"); + tstHelper("LCAGS"); + tstHelper("LCAM"); + tstHelper("LCAMS"); + tstHelper("LCE"); + tstHelper("LCE'U"); + tstHelper("LDAN"); + tstHelper("LDIBS"); + tstHelper("LDOB"); + tstHelper("LDOG"); + tstHelper("LDOGS"); + tstHelper("LDONGS"); + tstHelper("LE"); + tstHelper("LE'I"); + tstHelper("LE'U'I"); + tstHelper("LE'UR"); + tstHelper("LEB"); + tstHelper("LED"); + tstHelper("LEGS"); + tstHelper("LEN"); + tstHelper("LENDRA"); + tstHelper("LHA"); + tstHelper("LHAG"); + tstHelper("LHAL"); + tstHelper("LHAN"); + tstHelper("LHONGS"); + tstHelper("LHUN"); + tstHelper("LHUNG"); + tstHelper("LI"); + tstHelper("LI'I"); + tstHelper("LIN"); + tstHelper("LJAB"); + tstHelper("LJANG"); + tstHelper("LJIB"); + tstHelper("LJIBS"); + tstHelper("LKOG"); + tstHelper("LNGA"); + tstHelper("LNGA'I"); + tstHelper("LNGA'I'I"); + tstHelper("LNGOG"); + tstHelper("LO"); + tstHelper("LOBS"); + tstHelper("LOGS"); + tstHelper("LTA"); + tstHelper("LTAR"); + tstHelper("LTU"); + tstHelper("LTUN"); + tstHelper("LTUNG"); + tstHelper("LU"); + tstHelper("LU'I"); + tstHelper("LUGS"); + tstHelper("LUM"); + tstHelper("LUS"); + tstHelper("MA"); + tstHelper("MA'A"); + tstHelper("MA'I"); + tstHelper("MAL"); + tstHelper("MAN"); + tstHelper("MANDA"); + tstHelper("MANG"); + tstHelper("MANYDZU"); + tstHelper("MAR"); + tstHelper("MAS"); + tstHelper("MBI"); + tstHelper("MCHAN"); + tstHelper("MCHED"); + tstHelper("MCHING"); + tstHelper("MCHOD"); + tstHelper("MCHOG"); + tstHelper("MCHU"); + tstHelper("MCHUR"); + tstHelper("MDA'"); + tstHelper("MDANGS"); + tstHelper("MDAS"); + tstHelper("MDO'I"); + tstHelper("MDOG"); + tstHelper("MDON"); + tstHelper("MDUD"); + tstHelper("MDUN"); + tstHelper("MDZAD"); + tstHelper("MDZES"); + tstHelper("MDZOS"); + tstHelper("ME"); + tstHelper("ME'I"); + tstHelper("MED"); + tstHelper("MGA"); + tstHelper("MGO"); + tstHelper("MGO'I"); + tstHelper("MGOS"); + tstHelper("MGRON"); + tstHelper("MI"); + tstHelper("MI'I"); + tstHelper("MID"); + tstHelper("MIG"); + tstHelper("MING"); + tstHelper("MKHA'"); + tstHelper("MKHA'I"); + tstHelper("MKHAH"); + tstHelper("MKHAN"); + tstHelper("MKHAS"); + tstHelper("MKHEN"); + tstHelper("MKHRANG"); + tstHelper("MKHREN"); + tstHelper("MKHYEN"); + tstHelper("MMING"); + tstHelper("MNGAGS"); + tstHelper("MNGAS"); + tstHelper("MNGON"); + tstHelper("MNYA'"); + tstHelper("MNYAM"); + tstHelper("MNYUNG"); + tstHelper("MO"); + tstHelper("MO'I"); + tstHelper("MO'U"); + tstHelper("MON"); + tstHelper("MONGS"); + tstHelper("MONS"); + tstHelper("MOS"); + tstHelper("MTHA'"); + tstHelper("MTHAB"); + tstHelper("MTHAN"); + tstHelper("MTHANGS"); + tstHelper("MTHAR"); + tstHelper("MTHO"); + tstHelper("MTHONG"); + tstHelper("MTHU"); + tstHelper("MTHUM"); + tstHelper("MTHUN"); + tstHelper("MTHUNG"); + tstHelper("MTHUS"); + tstHelper("MTON"); + tstHelper("MTONG"); + tstHelper("MTSA"); + tstHelper("MTSAMS"); + tstHelper("MTSAN"); + tstHelper("MTSANS"); + tstHelper("MTSO'I"); + tstHelper("MTSONG"); + tstHelper("MTSOS"); + tstHelper("MTSUNG"); + tstHelper("MTSUNGS"); + tstHelper("MU"); + tstHelper("MUG"); + tstHelper("MYA"); + tstHelper("MYANG"); + tstHelper("MYAS"); + tstHelper("MYID"); + tstHelper("MYOG"); + tstHelper("MYONG"); + tstHelper("MYOS"); + tstHelper("MYUNG"); + tstHelper("N'I"); + tstHelper("NA"); + tstHelper("NA'I"); + tstHelper("NAD"); + tstHelper("NAG"); + tstHelper("NAM"); + tstHelper("NANG"); + tstHelper("NAS"); + tstHelper("NDA"); + tstHelper("NE"); + tstHelper("NE'U"); + tstHelper("NES"); + tstHelper("NGA"); + tstHelper("NGA'I"); + tstHelper("NGAB"); + tstHelper("NGAG"); + tstHelper("NGAL"); + tstHelper("NGAN"); + tstHelper("NGE"); + tstHelper("NGE'I"); + tstHelper("NGES"); + tstHelper("NGO"); + tstHelper("NGOR"); + tstHelper("NGU'I"); + tstHelper("NI"); + tstHelper("NI'A"); + tstHelper("NIG"); + tstHelper("NIMITTA"); + tstHelper("NIS"); + tstHelper("NO"); + tstHelper("NOG"); + tstHelper("NOGS"); + tstHelper("NONG"); + tstHelper("NU"); + tstHelper("NU'I"); + tstHelper("NUB"); + tstHelper("NYA"); + tstHelper("NYAMS"); + tstHelper("NYAN"); + tstHelper("NYDZU"); + tstHelper("NYE"); + tstHelper("NYE'U"); + tstHelper("NYEN"); + tstHelper("NYES"); + tstHelper("NYI"); + tstHelper("NYID"); + tstHelper("NYING"); + tstHelper("NYON"); + tstHelper("NYUG"); + tstHelper("OD"); + tstHelper("PA"); + tstHelper("PA'A"); + tstHelper("PA'I"); + tstHelper("PA'LA"); + tstHelper("PA'O"); + tstHelper("PAD"); + tstHelper("PADMA"); + tstHelper("PADMA'I"); + tstHelper("PAG"); + tstHelper("PALA"); + tstHelper("PANG"); + tstHelper("PAR"); + tstHelper("PARU"); + tstHelper("PAS"); + tstHelper("PA"); + tstHelper("PHA"); + tstHelper("PHAN"); + tstHelper("PHEG"); + tstHelper("PHI"); + tstHelper("PHIN"); + tstHelper("PHIR"); + tstHelper("PHOD"); + tstHelper("PHOG"); + tstHelper("PHRA"); + tstHelper("PHRAG"); + tstHelper("PHRAN"); + tstHelper("PHREN"); + tstHelper("PHRENG"); + tstHelper("PHROGS"); + tstHelper("PHRUG"); + tstHelper("PHUDG"); + tstHelper("PHUN"); + tstHelper("PHUNG"); + tstHelper("PHUR"); + tstHelper("PHYAG"); + tstHelper("PHYE"); + tstHelper("PHYI"); + tstHelper("PHYIN"); + tstHelper("PHYIR"); + tstHelper("PHYIS"); + tstHelper("PHYOGS"); + tstHelper("PHYUN"); + tstHelper("PHYUNG"); + tstHelper("PHYUNGS"); + tstHelper("PI"); + tstHelper("PINGKA"); + tstHelper("PO"); + tstHelper("PO'I"); + tstHelper("PO'i"); + tstHelper("POR"); + tstHelper("POS"); + tstHelper("PRA"); + tstHelper("PRAKshVA"); + tstHelper("PRAL"); + tstHelper("PRASIDDHA"); + tstHelper("PRI"); + tstHelper("PRONG"); + tstHelper("PUN"); + tstHelper("PUNANA'A"); + tstHelper("PUS"); + tstHelper("PYE"); + tstHelper("R'AGA"); + tstHelper("RA"); + tstHelper("RA'"); + tstHelper("RA'A"); + tstHelper("RA'I"); + tstHelper("RAA'"); + tstHelper("RAB"); + tstHelper("RABS"); + tstHelper("RAG"); + tstHelper("RAL"); + tstHelper("RAN"); + tstHelper("RANG"); + tstHelper("RAS"); + tstHelper("RAndABA"); + tstHelper("RBOD"); + tstHelper("RBYAR"); + tstHelper("RDAS"); + tstHelper("RDE'U"); + tstHelper("RDEG"); + tstHelper("RDO"); + tstHelper("RDOB"); + tstHelper("RDOBS"); + tstHelper("RDUM"); + tstHelper("RDZAS"); + tstHelper("RDZI"); + tstHelper("RDZOGS"); + tstHelper("RDZU"); + tstHelper("RDZUL"); + tstHelper("RENGS"); + tstHelper("RGOD"); + tstHelper("RGOG"); + tstHelper("RGOS"); + tstHelper("RGYA"); + tstHelper("RGYAL"); + tstHelper("RGYAN"); + tstHelper("RGYANG"); + tstHelper("RGYAS"); + tstHelper("RGYES"); + tstHelper("RGYU"); + tstHelper("RGYUD"); + tstHelper("RGYUN"); + tstHelper("RGYUS"); + tstHelper("RI"); + tstHelper("RI'I"); + tstHelper("RIGS"); + tstHelper("RIL"); + tstHelper("RIM"); + tstHelper("RIN"); + tstHelper("RING"); + tstHelper("RIS"); + tstHelper("RJE"); + tstHelper("RJE'U"); + tstHelper("RJES"); + tstHelper("RJO"); + tstHelper("RKA'I"); + tstHelper("RKANG"); + tstHelper("RKO"); + tstHelper("RKU"); + tstHelper("RKUN"); + tstHelper("RKYEN"); + tstHelper("RLABS"); + tstHelper("RLE'O"); + tstHelper("RLING"); + tstHelper("RMAL"); + tstHelper("RMEL"); + tstHelper("RMONG"); + tstHelper("RMONGS"); + tstHelper("RMUS"); + tstHelper("RMYANGS"); + tstHelper("RNA"); + tstHelper("RNABS"); + tstHelper("RNAM"); + tstHelper("RNAMS"); + tstHelper("RNGA'U"); + tstHelper("RNGAB"); + tstHelper("RNGANGS"); + tstHelper("RNGE"); + tstHelper("RNGOG"); + tstHelper("RNGU"); + tstHelper("RNGUS"); + tstHelper("RNOGS"); + tstHelper("RO"); + tstHelper("RO.STOD (DLC!)"); + tstHelper("ROL"); + tstHelper("RSBOD"); + tstHelper("RTAG"); + tstHelper("RTE'"); + tstHelper("RTEN"); + tstHelper("RTOL"); + tstHelper("RTUL"); + tstHelper("RTYAM"); + tstHelper("RTZA"); + tstHelper("RTZAL"); + tstHelper("RTZE"); + tstHelper("RTZENGS"); + tstHelper("RTZI"); + tstHelper("RTZIBS"); + tstHelper("RTZIS"); + tstHelper("RTZUB"); + tstHelper("RTZUBS"); + tstHelper("RU"); + tstHelper("RUL"); + tstHelper("RUNG"); + tstHelper("RUNGS"); + tstHelper("RYA"); + tstHelper("RYA'I"); + tstHelper("RYAL"); + tstHelper("SA"); + tstHelper("SA'"); + tstHelper("SA'A"); + tstHelper("SA'I"); + tstHelper("SA'O"); + tstHelper("SAG"); + tstHelper("SANG"); + tstHelper("SANGS"); + tstHelper("SBANG"); + tstHelper("SBNYEN"); + tstHelper("SBRANG"); + tstHelper("SBRAS"); + tstHelper("SBREL"); + tstHelper("SBRING"); + tstHelper("SBRIS"); + tstHelper("SBROS"); + tstHelper("SBRUGS"); + tstHelper("SBRUL"); + tstHelper("SBRUNG"); + tstHelper("SBU"); + tstHelper("SBUNGS"); + tstHelper("SBUR"); + tstHelper("SBYAN"); + tstHelper("SBYANGS"); + tstHelper("SBYANS"); + tstHelper("SBYAR"); + tstHelper("SBYER"); + tstHelper("SBYI"); + tstHelper("SBYIN"); + tstHelper("SBYIR"); + tstHelper("SBYO"); + tstHelper("SBYOR"); + tstHelper("SBYU"); + tstHelper("SBYUG"); + tstHelper("SBYUGS"); + tstHelper("SDAGS"); + tstHelper("SDAN"); + tstHelper("SDBUGS"); + tstHelper("SDE"); + tstHelper("SDE'U"); + tstHelper("SDIG"); + tstHelper("SDOD"); + tstHelper("SDON"); + tstHelper("SDONG"); + tstHelper("SDUG"); + tstHelper("SE'I"); + tstHelper("SEMS"); + tstHelper("SENG"); + tstHelper("SER"); + tstHelper("SGA"); + tstHelper("SGO'U"); + tstHelper("SGOGS"); + tstHelper("SGRA"); + tstHelper("SGRAR"); + tstHelper("SGRE'U"); + tstHelper("SGREL"); + tstHelper("SGRON"); + tstHelper("SGRUB"); + tstHelper("SGRUP"); + tstHelper("SGUB"); + tstHelper("SGUG"); + tstHelper("SGUN"); + tstHelper("SGYE'O"); + tstHelper("SGYES"); + tstHelper("SGYOD"); + tstHelper("SGYUNG"); + tstHelper("SH'A"); + tstHelper("SH'AKYA"); + tstHelper("SHA"); + tstHelper("SHA'"); + tstHelper("SHA'A"); + tstHelper("SHAA'"); + tstHelper("SHAA'RI'I"); + tstHelper("SHAL"); + tstHelper("SHAM"); + tstHelper("SHAN"); + tstHelper("SHANG"); + tstHelper("SHAR"); + tstHelper("SHEG"); + tstHelper("SHES"); + tstHelper("SHI"); + tstHelper("SHI'A"); + tstHelper("SHIG"); + tstHelper("SHIN"); + tstHelper("SHING"); + tstHelper("SHOG"); + tstHelper("SHRU"); + tstHelper("SHRUT"); + tstHelper("SHVA"); + tstHelper("SI"); + tstHelper("SI'I"); + tstHelper("SING"); + tstHelper("SKA"); + tstHelper("SKABS"); + tstHelper("SKAD"); + tstHelper("SKAM"); + tstHelper("SKANG"); + tstHelper("SKAR"); + tstHelper("SKARI"); + tstHelper("SKE"); + tstHelper("SKEGS"); + tstHelper("SKEL"); + tstHelper("SKHANG"); + tstHelper("SKO"); + tstHelper("SKONG"); + tstHelper("SKOR"); + tstHelper("SKRA'I"); + tstHelper("SKRED"); + tstHelper("SKUD"); + tstHelper("SKYAN"); + tstHelper("SKYANG"); + tstHelper("SKYE"); + tstHelper("SKYE'O"); + tstHelper("SKYE'U"); + tstHelper("SKYED"); + tstHelper("SKYEGS"); + tstHelper("SKYEN"); + tstHelper("SKYES"); + tstHelper("SKYIMS"); + tstHelper("SKYO"); + tstHelper("SKYOB"); + tstHelper("SKYONG"); + tstHelper("SKYONGS"); + tstHelper("SLABS"); + tstHelper("SLAR"); + tstHelper("SLE'O"); + tstHelper("SLO"); + tstHelper("SLOB"); + tstHelper("SLONG"); + tstHelper("SLOP"); + tstHelper("SMA"); + tstHelper("SMAD"); + tstHelper("SMAM"); + tstHelper("SMAN"); + tstHelper("SMEL"); + tstHelper("SMOD"); + tstHelper("SMON"); + tstHelper("SMONGS"); + tstHelper("SMRA"); + tstHelper("SMRA'I"); + tstHelper("SMRA'O"); + tstHelper("SMUNG"); + tstHelper("SMYANGS"); + tstHelper("SMYUG"); + tstHelper("SMYUNG"); + tstHelper("SNA"); + tstHelper("SNABS"); + tstHelper("SNAMG"); + tstHelper("SNAR"); + tstHelper("SNGAGS"); + tstHelper("SNGANG"); + tstHelper("SNGANGS"); + tstHelper("SNGO"); + tstHelper("SNGO'I"); + tstHelper("SNGOGS"); + tstHelper("SNGON"); + tstHelper("SNOD"); + tstHelper("SNRA"); + tstHelper("SNREL"); + tstHelper("SNRON"); + tstHelper("SNUM"); + tstHelper("SNYA"); + tstHelper("SNYAM"); + tstHelper("SNYEG"); + tstHelper("SNYEGS"); + tstHelper("SNYER"); + tstHelper("SNYIL"); + tstHelper("SNYING"); + tstHelper("SNYOD"); + tstHelper("SNYOMS"); + tstHelper("SO"); + tstHelper("SO'I"); + tstHelper("SOGS"); + tstHelper("SONG"); + tstHelper("SOR"); + tstHelper("SPANG"); + tstHelper("SPANGS"); + tstHelper("SPOBS"); + tstHelper("SPRAS"); + tstHelper("SPRE'U'I"); + tstHelper("SPRIN"); + tstHelper("SPRO"); + tstHelper("SPRUGS"); + tstHelper("SPU"); + tstHelper("SPUNGS"); + tstHelper("SPYAD"); + tstHelper("SPYAN"); + tstHelper("SPYANG"); + tstHelper("SPYI"); + tstHelper("SPYI'I"); + tstHelper("SPYI'U"); + tstHelper("SPYIR"); + tstHelper("SPYIS"); + tstHelper("SPYOD"); + tstHelper("SPYON"); + tstHelper("SPYONG"); + tstHelper("SPYOOD"); + tstHelper("SPYOR"); + tstHelper("SPYUGS"); + tstHelper("SRAL"); + tstHelper("SRAN"); + tstHelper("SRAS"); + tstHelper("SREG"); + tstHelper("SRI"); + tstHelper("SRID"); + tstHelper("SRIN"); + tstHelper("SRJOD"); + tstHelper("SRO"); + tstHelper("SROG"); + tstHelper("SROL"); + tstHelper("SRONG"); + tstHelper("SRUNG"); + tstHelper("SRUNGS"); + tstHelper("STE"); + tstHelper("STED"); + tstHelper("STENG"); + tstHelper("STENGS"); + tstHelper("STHA"); + tstHelper("STOB"); + tstHelper("STOBS"); + tstHelper("STOD"); + tstHelper("STON"); + tstHelper("STONG"); + tstHelper("STUG"); + tstHelper("STUGS"); + tstHelper("STZAGS"); + tstHelper("SU"); + tstHelper("SUM"); + tstHelper("SZHON"); + tstHelper("Si"); + tstHelper("T'A"); + tstHelper("T'A'I"); + tstHelper("TA"); + tstHelper("TA'A"); + tstHelper("TAA'I"); + tstHelper("TAMBA"); + tstHelper("TE"); + tstHelper("TE'U"); + tstHelper("TE'U'I"); + tstHelper("THA"); + tstHelper("THABS"); + tstHelper("THAD"); + tstHelper("THAG"); + tstHelper("THAL"); + tstHelper("THAMS"); + tstHelper("THE"); + tstHelper("THE'U"); + tstHelper("THED"); + tstHelper("THEG"); + tstHelper("THI"); + tstHelper("THIB"); + tstHelper("THIM"); + tstHelper("THIMS"); + tstHelper("THIN"); + tstHelper("THING"); + tstHelper("THOB"); + tstHelper("THOD"); + tstHelper("THOGS"); + tstHelper("THOS"); + tstHelper("THUB"); + tstHelper("THUG"); + tstHelper("THUGS"); + tstHelper("THUNG"); + tstHelper("THUR"); + tstHelper("TI"); + tstHelper("TI'"); + tstHelper("TI'A"); + tstHelper("TI'I"); + tstHelper("TIG"); + tstHelper("TING"); + tstHelper("TKA'A"); + tstHelper("TOG"); + tstHelper("TOR"); + tstHelper("TPA"); + tstHelper("TRA'AI"); + tstHelper("TRE"); + tstHelper("TRYAM"); + tstHelper("TSAD"); + tstHelper("TSAL"); + tstHelper("TSANG"); + tstHelper("TSANGS"); + tstHelper("TSE"); + tstHelper("TSE'U"); + tstHelper("TSEGS"); + tstHelper("TSER"); + tstHelper("TSIG"); + tstHelper("TSIGS"); + tstHelper("TSOD"); + tstHelper("TSOGS"); + tstHelper("TSOM"); + tstHelper("TSON"); + tstHelper("TSONG"); + tstHelper("TSOR"); + tstHelper("TSOS"); + tstHelper("TSUGS"); + tstHelper("TSUL"); + tstHelper("TSUR"); + tstHelper("TU"); + tstHelper("TUB"); + tstHelper("TUD"); + tstHelper("TUS"); + tstHelper("TYA'I"); + tstHelper("TZ'A"); + tstHelper("TZA"); + tstHelper("TZAM"); + tstHelper("TZAMMI"); + tstHelper("TZAMPAKA"); + tstHelper("TZAN"); + tstHelper("TZANDA"); + tstHelper("TZANDAN"); + tstHelper("TZE'U"); + tstHelper("TZER"); + tstHelper("TZI"); + tstHelper("TZIB"); + tstHelper("WA"); + tstHelper("WA'A"); + tstHelper("WAM"); + tstHelper("WANG"); + tstHelper("X"); + tstHelper("YA"); + tstHelper("YA'A"); + tstHelper("YA'AnA"); + tstHelper("YA'I"); + tstHelper("YAN"); + tstHelper("YANG"); + tstHelper("YANGS"); + tstHelper("YAS"); + tstHelper("YAmGU"); + tstHelper("YE"); + tstHelper("YI"); + tstHelper("YID"); + tstHelper("YIN"); + tstHelper("YOL"); + tstHelper("YONGS"); + tstHelper("YU"); + tstHelper("YUG"); + tstHelper("YUL"); + tstHelper("ZAD"); + tstHelper("ZAG"); + tstHelper("ZAN"); + tstHelper("ZAR"); + tstHelper("ZDOG"); + tstHelper("ZE'I"); + tstHelper("ZER"); + tstHelper("ZHA'I"); + tstHelper("ZHABS"); + tstHelper("ZHAL"); + tstHelper("ZHE"); + tstHelper("ZHEN"); + tstHelper("ZHENG"); + tstHelper("ZHER"); + tstHelper("ZHES"); + tstHelper("ZHES'O"); + tstHelper("ZHI"); + tstHelper("ZHI'I"); + tstHelper("ZHIG"); + tstHelper("ZHIGS"); + tstHelper("ZHIN"); + tstHelper("ZHING"); + tstHelper("ZHON"); + tstHelper("ZHUGS"); + tstHelper("ZHUS"); + tstHelper("ZI'I"); + tstHelper("ZIL"); + tstHelper("ZING"); + tstHelper("ZLA"); + tstHelper("ZLOG"); + tstHelper("ZLOS"); + tstHelper("ZLUM"); + tstHelper("ZUG"); + tstHelper("ZUL"); + tstHelper("ZUNG"); + tstHelper("d'U"); + tstHelper("dA"); + tstHelper("dALA"); + tstHelper("dI"); + tstHelper("dU"); + tstHelper("nA"); + tstHelper("nA'I"); + tstHelper("nDA'A"); + tstHelper("nE"); + tstHelper("nI"); + tstHelper("nYAGRO"); + tstHelper("ndI"); +tstHelper("shKA"); + + } } +/* DLC FIXME: add test cases: from R0021F.ACE: ambiguous Tibetan/Sanskrit: + + BDA' þþþþ +B+DA þþþ +DBANG þþþ +D+BA þþþ +DGA' þþþþ +D+GA þþþ +DGRA þþþ +D+GRA þþþ +DGYESþþþþþ +D+GYA þþþ +DMAR þþþþ +D+MA þþþ +GDA' þþþþ +G+DA þþþ +GNAD þþþþ +G+NA þþþ +MNA' þþþþ +M+NA þþþ +*/