I really hesitate to commit this because I'm not sure what it brings to the

table exactly and I fear that it makes the ACIP->Tibetan converter code
a lot uglier.  The TODO(DLC)[EWTS->Tibetan] comments littered throughout
are part of the ugliness; they point to the ugliness.  If each were addressed,
cleanliness could perhaps be achieved.

I've largely forgotten exactly what this change does, but it attempts to
improve EWTS->Tibetan conversion.  The lexer is probably really, really
primitive.  I concentrate here on converting a single tsheg bar rather than
a whole document.

Eclipse was used during part of my journey here and some imports were
reorganized merely because I could.  :)

(Eclipse was needed when the usual ant build failed to run a new test
EWTSTest.  And I wanted its debugger.)

Next steps: end-to-end EWTS tests should bring many problems to light.  Fix
those.  Triage all the TODO comments.

I don't know that I'll ever really trust the implementation.  The tests are
valuable, though.  A clean implementation of EWTS->Tibetan in Jython
might hold enough interest for me; I'd like to learn Python.
This commit is contained in:
dchandler 2005-06-20 06:18:00 +00:00
parent f64bae8ea6
commit 7198f23361
45 changed files with 1666 additions and 695 deletions

View file

@ -1,15 +1,10 @@
<?xml version="1.0" encoding="UTF-8"?>
<classpath>
<classpathentry kind="output" path="eclipse_bin"/>
<classpathentry kind="src" path="source"/>
<classpathentry kind="var" path="JRE_LIB" rootpath="JRE_SRCROOT" sourcepath="JRE_SRC"/>
<classpathentry kind="lib" path="extensions/jdom.jar"/>
<classpathentry kind="lib" path="extensions/jmf.jar"/>
<classpathentry kind="lib" path="extensions/xalan.jar"/>
<classpathentry kind="lib" path="extensions/xercesImpl.jar"/>
<classpathentry kind="lib" path="extensions/xml-apis.jar"/>
<classpathentry kind="lib" path="extensions"/>
<classpathentry kind="lib" path="F:/thdl/Jskad/extensions/drop-ins/QTJava.zip"/>
<classpathentry kind="lib" path="F:/Program Files/Eclipse/eclipse/plugins/org.junit_3.7.0/junit.jar"/>
<classpathentry kind="lib" path="F:/Program Files/j2sdkee1.3.1/lib/j2ee.jar"/> <!-- or you could use Tomcat's JAR. -DC -->
<classpathentry kind="src" path="source"/>
<classpathentry kind="lib" path="extensions/jdom.jar"/>
<classpathentry kind="lib" path="extensions/to-be-installed-with-ant/junit.jar"/>
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/>
<classpathentry kind="lib" path="extensions"/>
<classpathentry kind="lib" path="G:/Program Files/eclipse/plugins/org.eclipse.tomcat_4.1.30/servlet.jar"/>
<classpathentry kind="output" path="bin_for_eclipse"/>
</classpath>

View file

@ -472,6 +472,16 @@ the jvm starting tomcat:
description="compiles all JUnit test cases that can be compiled in the present CLASSPATH (NB that this distinction is just wishful thinking for now because we have such weak test coverage at this point)" >
<mkdir dir="${junitbin}"/>
<antcall target="create-timestamp-source-code"/> <!-- DLC NOW! The -run targets are mucking with this! It isn't fatal, but it should be fixed. -->
<!-- TODO(DLC)[EWTS->Tibetan]: <antcall target="our-internal-javac-task">
<param name="mybin" value="${junitbin}"/>
<param name="my.included.source.file"
value="org/thdl/tib/text/ttt/EWTSTest.java"/>
</antcall> -->
<antcall target="our-internal-javac-task">
<param name="mybin" value="${junitbin}"/>
<param name="my.included.source.file"
value="org/thdl/tib/text/ttt/EWTStibwniniTest.java"/>
</antcall>
<antcall target="our-internal-javac-task">
<param name="mybin" value="${junitbin}"/>
<param name="my.included.source.file"
@ -482,16 +492,6 @@ the jvm starting tomcat:
<param name="my.included.source.file"
value="org/thdl/tib/text/ttt/PackageTest.java"/>
</antcall>
<antcall target="our-internal-javac-task">
<param name="mybin" value="${junitbin}"/>
<param name="my.included.source.file"
value="org/thdl/tib/text/ttt/EWTSTest.java"/>
</antcall>
<antcall target="our-internal-javac-task">
<param name="mybin" value="${junitbin}"/>
<param name="my.included.source.file"
value="org/thdl/tib/text/ttt/EWTStibwniniTest.java"/>
</antcall>
<antcall target="our-internal-javac-task">
<param name="mybin" value="${junitbin}"/>
<param name="my.included.source.file"

View file

@ -73,7 +73,7 @@
<formatter type="xml"/><!-- If not XML, then 'ant -buildfile
build.xml check-report' will fail. -->
<sysproperty key="java.awt.headless" value="true"/>
<test name="org.thdl.tib.text.ttt.EWTSTest"/>
<!-- TODO(DLC)[EWTS->Tibetan]: enable this test: <test name="org.thdl.tib.text.ttt.EWTSTest"/> -->
<test name="org.thdl.tib.text.ttt.EWTStibwniniTest"/>
<test name="org.thdl.tib.input.TMW_RTF_TO_THDL_WYLIETest"/>
<test name="org.thdl.tib.text.TibetanMachineWebTest"/>

View file

@ -18,31 +18,59 @@ Contributor(s): ______________________________________.
package org.thdl.tib.input;
import java.io.*;
import java.awt.BorderLayout;
import java.awt.Cursor;
import java.awt.Dimension;
import java.awt.Frame;
import java.awt.LayoutManager;
import java.awt.Point;
import java.awt.event.ActionEvent;
import java.awt.event.KeyEvent;
import java.awt.event.WindowAdapter;
import java.awt.event.WindowEvent;
import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.awt.*;
import java.awt.event.*;
import java.awt.print.*;
import javax.swing.plaf.basic.*;
import javax.swing.*;
import javax.swing.event.*;
import javax.swing.text.*;
import javax.swing.text.rtf.*;
import java.util.Vector;
import org.thdl.tib.text.*;
import org.thdl.util.ThdlDebug;
import javax.swing.Box;
import javax.swing.JApplet;
import javax.swing.JComboBox;
import javax.swing.JFileChooser;
import javax.swing.JFrame;
import javax.swing.JInternalFrame;
import javax.swing.JLabel;
import javax.swing.JMenu;
import javax.swing.JMenuBar;
import javax.swing.JMenuItem;
import javax.swing.JOptionPane;
import javax.swing.JPanel;
import javax.swing.JScrollPane;
import javax.swing.JToolBar;
import javax.swing.KeyStroke;
import javax.swing.SwingUtilities;
import javax.swing.UIManager;
import javax.swing.WindowConstants;
import javax.swing.event.DocumentEvent;
import javax.swing.event.DocumentListener;
import javax.swing.text.BadLocationException;
import org.thdl.tib.text.TibetanDocument;
import org.thdl.util.RTFFixerInputStream;
import org.thdl.util.ThdlOptions;
import org.thdl.util.ThdlVersion;
import org.thdl.util.SimpleFrame;
import org.thdl.util.StatusBar;
import org.thdl.util.ThdlActionListener;
import org.thdl.util.HTMLPane;
import org.thdl.util.SimpleFrame;
import org.thdl.util.ThdlDebug;
import org.thdl.util.ThdlLazyException;
import org.thdl.util.ThdlOptions;
import org.thdl.util.ThdlVersion;
import calpa.html.CalHTMLPane;

View file

@ -258,7 +258,7 @@ public class TGCPair implements THDLWylieConstants {
}
if (mark < v.length()) {
vowelish_sb.append(v.substring(mark));
ThdlDebug.noteIffyCode();
// TODO(DLC)[EWTS->Tibetan]: ThdlDebug.noteIffyCode();
// FIXME(dchandler): what should I do here? I doubt v is
// valid.
}

View file

@ -506,5 +506,25 @@ public class UnicodeUtils implements UnicodeConstants {
} while (mutated_this_time_through);
return mutated;
}
/** Returns true iff ch is a valid Tibetan codepoint in Unicode
* 4.0: */
public boolean isTibetanUnicodeCodepoint(char ch) {
// NOTE: could use an array of 256 booleans for speed but I'm lazy
return ((ch >= '\u0f00' && ch <= '\u0fcf')
&& !(ch == '\u0f48'
|| (ch > '\u0f6a' && ch < '\u0f71')
|| (ch > '\u0f8b' && ch < '\u0f90')
|| ch == '\u0f98'
|| ch == '\u0fbd'
|| ch == '\u0fcd'
|| ch == '\u0fce'));
}
/** Returns true iff ch is in 0F00-0FFF but isn't a valid Tibetan
* codepoint in Unicode 4.0: */
public boolean isInvalidTibetanUnicode(char ch) {
return (isInTibetanRange(ch) && !isTibetanUnicodeCodepoint(ch));
}
}

View file

@ -258,7 +258,7 @@ class ValidatingUnicodeReader implements UnicodeReadingStateMachineConstants {
throws TibetanSyntaxException
{
Vector syllables = new Vector();
int grcls_len = grcls.length();
int grcls_len = grcls.size();
int beginning_of_cluster = 0;
for (int i = 0; i < grcls_len; i++) {
UnicodeGraphemeCluster current_grcl

View file

@ -178,9 +178,9 @@ class ValidatingUnicodeReaderTest {
}
}
DLC;
assertTrue(ValidatingUnicodeReader.isFullyValidUnicode(
"\u0F\u0F\u0F\u0F\u0F"));
// DLC;
// assertTrue(ValidatingUnicodeReader.isFullyValidUnicode(
// "\u0F00\u0F00\u0F00\u0F00\u0F00"));
}
void testSyntacticallyLegalUnicodeToThdlWylie() {

View file

@ -18,17 +18,15 @@ Contributor(s): ______________________________________.
package org.thdl.tib.text.ttt;
import java.util.HashSet;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.StringTokenizer;
import java.util.List;
import org.thdl.util.ThdlOptions;
import org.thdl.tib.text.DuffCode;
import org.thdl.tib.text.THDLWylieConstants;
import org.thdl.tib.text.TibetanMachineWeb;
import org.thdl.tib.text.TibTextUtils;
import org.thdl.tib.text.TibetanMachineWeb;
import org.thdl.util.ThdlOptions;
/** A singleton class that should contain (but due to laziness and
@ -62,7 +60,9 @@ public final class ACIPTraits implements TTraits {
public int maxWowelLength() { return MAX_WOWEL_LENGTH; }
public boolean hasSimpleError(TPair p) {
return ("A".equals(p.getLeft()) && null == p.getRight());
return (("A".equals(p.getLeft()) && null == p.getRight())
|| (null == p.getLeft()
&& !this.disambiguator().equals(p.getRight())));
}
public String aVowel() { return "A"; }
@ -95,6 +95,11 @@ public final class ACIPTraits implements TTraits {
private HashMap superACIP2unicode = null;
private HashMap subACIP2unicode = null;
public String getUnicodeForWowel(String wowel) {
return getUnicodeFor(wowel, /* doesn't matter: */ true);
}
public /* synchronized */ String getUnicodeFor(String acip, boolean subscribed) {
if (superACIP2unicode == null) {
final boolean compactUnicode
@ -588,5 +593,45 @@ public final class ACIPTraits implements TTraits {
if (wowel.indexOf(':') >= 0)
duff.add(TibetanMachineWeb.getGlyph(getEwtsForOther(":")));
}
public String shortTranslitName() { return "ACIP"; }
public boolean isClearlyIllegal(TPair p) {
if (p.getLeft() == null
&& !disambiguator().equals(p.getRight()))
return true;
if ("+".equals(p.getLeft()))
return true;
if (isWowel(p.getLeft())
&& !aVowel().equals(p.getLeft())) // achen
return true;
if (":".equals(p.getLeft()))
return true;
if ("m".equals(p.getLeft()))
return true;
if ("m:".equals(p.getLeft()))
return true;
return false;
}
public TPairList[] breakTshegBarIntoChunks(String tt, boolean sh) {
try {
return TPairListFactory.breakACIPIntoChunks(tt, sh);
} catch (StackOverflowError e) {
throw new IllegalArgumentException("Input too large[1]: " + tt);
} catch (OutOfMemoryError e) {
throw new IllegalArgumentException("Input too large[2]: " + tt);
}
}
public boolean isACIP() { return true; }
public boolean vowelAloneImpliesAChen() { return false; }
public boolean vowelsMayStack() { return false; }
public boolean isUnicodeWowel(char ch) { return false; }
public boolean couldBeValidStack(TPairList pl) { return true; }
}

View file

@ -115,7 +115,8 @@ class ACIPTshegBarScanner extends TTshegBarScanner {
al.add(new TString("ACIP",
errMsg = ErrorsAndWarnings.getMessage(code,
shortMessages,
translit),
translit,
ACIPTraits.instance()),
TString.ERROR));
if (null != errors)
errors.append("Offset " + ((i < 0) ? "END" : ("" + i))
@ -792,7 +793,8 @@ class ACIPTshegBarScanner extends TTshegBarScanner {
al.add(new TString("ACIP",
ErrorsAndWarnings.getMessage(510,
shortMessages,
"" + ch),
"" + ch,
ACIPTraits.instance()),
TString.WARNING));
}
startOfString = i+1;
@ -902,7 +904,8 @@ class ACIPTshegBarScanner extends TTshegBarScanner {
al.add(new TString("ACIP",
ErrorsAndWarnings.getMessage(504,
shortMessages,
"" + ch),
"" + ch,
ACIPTraits.instance()),
TString.WARNING));
}
}

View file

@ -18,12 +18,12 @@ Contributor(s): ______________________________________.
package org.thdl.tib.text.ttt;
import org.thdl.util.ThdlOptions;
import java.util.ArrayList;
import java.io.PrintStream;
import junit.framework.TestCase;
import org.thdl.util.ThdlOptions;
import org.thdl.tib.text.tshegbar.UnicodeUtils;
/** Tests this package's ability to understand EWTS and turn it into
* the appropriate TMW or Unicode.
@ -53,16 +53,106 @@ public class EWTSTest extends TestCase {
public EWTSTest() { }
/** Prints a human-readable explanation of how actual and expected
* differ to out. Precondition: expected is non-null, out is
* non-null */
static void explainInequality(String actual, String expected, PrintStream out) {
if (null == actual)
out.println("Expected \""
+ UnicodeUtils.unicodeStringToPrettyString(expected)
+ "\" but found the null string");
if (actual.length() != expected.length()) {
out.println("Expected a string with " + expected.length()
+ " characters but found a string with "
+ actual.length() + " characters");
return;
}
for (int i = 0; i < actual.length(); i++) {
if (actual.charAt(i) != expected.charAt(i)) {
out.println("Expected string \"" + UnicodeUtils.unicodeStringToPrettyString(expected) + "\" but found the string \""
+ UnicodeUtils.unicodeStringToPrettyString(actual)
+ "\" which differs at character " + i + " (counting from zero, not one)");
}
}
}
/** Causes a JUnit test case failure unless the EWTS document ewts
* converts to the unicode expectedUnicode. */
static void ewts2uni_test(String ewts, String expectedUnicode) {
// TODO(DLC)[EWTS->Tibetan]: NOW! Implement me.
StringBuffer errors = new StringBuffer();
String unicode = TConverter.convertToUnicodeText(EWTSTraits.instance(),
ewts, errors,
null, true,
"None", // TODO(DLC)[EWTS->Tibetan]: ???
false /* short warnings */);
if (null == unicode) {
if (null != expectedUnicode && "none" != expectedUnicode) {
System.out.println("No unicode exists for " + ewts
+ " but you expected "
+ UnicodeUtils.unicodeStringToPrettyString(expectedUnicode));
assertTrue(false);
}
System.out.println("Unicode for " + ewts + " can't be had; errors are " + errors);
} else {
if (null != expectedUnicode && !expectedUnicode.equals(unicode)) {
explainInequality(unicode, expectedUnicode, System.out);
if (UnicodeUtils.unicodeStringToPrettyString(unicode).equals(UnicodeUtils.unicodeStringToPrettyString(expectedUnicode))) {
System.out.println("UGLY strings: The unicode for\n \"" + ewts
+ "\"\nis\n \""
+ unicode
+ "\",\nbut you expected\n \""
+ expectedUnicode
+ "\"");
} else {
System.out.println("The unicode for\n \"" + ewts
+ "\"\nis\n \""
+ UnicodeUtils.unicodeStringToPrettyString(unicode)
+ "\",\nbut you expected\n \""
+ UnicodeUtils.unicodeStringToPrettyString(expectedUnicode)
+ "\"");
}
{
StringBuffer sb = new StringBuffer(ewts);
EWTSTshegBarScanner.ExpandEscapeSequences(sb);
TPairList[] la
= EWTSTraits.instance().breakTshegBarIntoChunks(sb.toString(), false);
assertTrue(la[1] == null);
System.out.println("EWTS=" + ewts + " and l'=" + la[0].toString2());
}
assertTrue(false);
}
}
}
/** Returns true iff ewts is not a valid EWTS string. */
static boolean hasEwtsError(String ewts) {
StringBuffer errors = new StringBuffer();
String unicode = TConverter.convertToUnicodeText(EWTSTraits.instance(),
ewts, errors,
null, true,
"None", // TODO(DLC)[EWTS->Tibetan]: ???
true);
// TODO(DLC)[EWTS->Tibetan]: Is this sufficient?
return (null == unicode || errors.length() > 0);
}
/** Causes a JUnit test case failure iff the EWTS document ewts is
* legal EWTS transliteration. */
static void assert_EWTS_error(String ewts) {
// TODO(DLC)[EWTS->Tibetan]: NOW! Implement me.
boolean ewts_error = hasEwtsError(ewts);
assertTrue(ewts_error);
}
/** Tests that the EWTS->unicode converter isn't completely
braindead. */
public void testEwtsBasics() {
ewts2uni_test("ma", "\u0f58");
ewts2uni_test("mi", "\u0f58\u0f72");
ewts2uni_test("mi ", "\u0f58\u0f72\u0f0b");
ewts2uni_test("mi/", "\u0f58\u0f72\u0f0d");
ewts2uni_test("bra ", "\u0f56\u0fb2\u0f0b");
ewts2uni_test("b+ra ", "\u0f56\u0fb2\u0f0b");
ewts2uni_test("b+Ra ", "\u0f56\u0fbc\u0f0b");
}
/** Miscellaneous tests of EWTS->Unicode conversion. */
@ -83,17 +173,18 @@ public class EWTSTest extends TestCase {
ewts2uni_test("k+Ya", "\u0f40\u0FBB");
ewts2uni_test("k+Ra", "\u0f40\u0FBC");
ewts2uni_test("k+wa", "\u0f40\u0Fad");
ewts2uni_test("k+ya", "\u0f40\u0Fb3");
ewts2uni_test("k+la", "\u0f40\u0Fb3");
ewts2uni_test("k+ya", "\u0f40\u0Fb1");
ewts2uni_test("k+ra", "\u0f40\u0Fb2");
ewts2uni_test("r-I", "\u0f62\u0f81");
ewts2uni_test("l-I", "\u0f63\u0f81");
ewts2uni_test("r-i", "\u0f62\u0f80");
ewts2uni_test("l-i", "\u0f63\u0f80");
ewts2uni_test("gr-i", "\u0f42\u0f76"); // TODO(DLC)[EWTS->Tibetan]: "\u0f42\u0fb2\u0f80"
ewts2uni_test("gr-I", "\u0f42\u0f77"); // TODO(DLC)[EWTS->Tibetan]: "\u0f42\u0fb2\u0f81"
ewts2uni_test("gl-i", "\u0f42\u0f78"); // TODO(DLC)[EWTS->Tibetan]: "\u0f42\u0fb3\u0f80"
ewts2uni_test("gl-I", "\u0f42\u0f79"); // TODO(DLC)[EWTS->Tibetan]: "\u0f42\u0fb3\u0f81"
ewts2uni_test("gr-i", "\u0f42\u0fb2\u0f80");
ewts2uni_test("gr-I", "\u0f42\u0fb2\u0f81");
ewts2uni_test("gl-i", "\u0f42\u0fb3\u0f80");
ewts2uni_test("gl-I", "\u0f42\u0fb3\u0f81");
}
@ -102,26 +193,39 @@ public class EWTSTest extends TestCase {
* mostly by testing that the Unicode generated for a single
* wowel or set of wowels atop achen (U+0F68) is correct. */
public void test__EWTS__wowels_on_achen() {
assert_EWTS_error("+yo");
ewts2uni_test("a+yo", "\u0f68\u0fb1\u0f7c");
ewts2uni_test("a+yo+o", "\u0f68\u0fb1\u0f7c\u0f7c");
ewts2uni_test("a+ya.una", "\u0f68\u0fb1\u0f68\u0f74\u0f53");
ewts2uni_test("a+yauna", "\u0f68\u0fb1\u0f7d\u0f53"); // TODO(DLC)[EWTS->Tibetan]: warn that '.' might have been needed
ewts2uni_test("a+yoona", "\u0f68\u0fb1\u0f7c\u0f68\u0f7c\u0f53"); // TODO(DLC)[EWTS->Tibetan]: warn!
ewts2uni_test("a+yoon", "\u0f68\u0fb1\u0f7c\u0f68\u0f7c\u0f53"); // TODO(DLC)[EWTS->Tibetan]: warn!
// ewts2uni_test("a+yo+ona", "TODO(DLC)[EWTS->Tibetan]");
ewts2uni_test("A", "\u0f68\u0f71");
ewts2uni_test("i", "\u0f68\u0f72");
ewts2uni_test("I", "\u0f68\u0f73");
ewts2uni_test("I", "\u0f68\u0f71\u0f72");
ewts2uni_test("u", "\u0f68\u0f74");
ewts2uni_test("U", "\u0f68\u0f75");
ewts2uni_test("a+r-i", "\u0f68\u0f76");
ewts2uni_test("a+r-I", "\u0f68\u0f77");
ewts2uni_test("a+l-i", "\u0f68\u0f78");
ewts2uni_test("a+l-I", "\u0f68\u0f79");
ewts2uni_test("U", "\u0f68\u0f71\u0f74");
ewts2uni_test("a+r-i", "\u0f68\u0fb2\u0f80");
ewts2uni_test("a+r-I", "\u0f68\u0fb2\u0f81");
ewts2uni_test("a+l-i", "\u0f68\u0fb3\u0f80");
ewts2uni_test("a+l-I", "\u0f68\u0fb3\u0f81");
ewts2uni_test("e", "\u0f68\u0f7a");
ewts2uni_test("ai", "\u0f68\u0f7b");
// ewts2uni_test("ao", "\u0f68\u0f68\u0f7c"); // TODO(DLC)[EWTS->Tibetan]:
// assert_EWTS_error("ao"); // TODO(DLC)[EWTS->Tibetan]:
ewts2uni_test("o", "\u0f68\u0f7c");
ewts2uni_test("au", "\u0f68\u0f7d");
ewts2uni_test("aM", "\u0f68\u0f7e"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
ewts2uni_test("aH", "\u0f68\u0f7f"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
// ewts2uni_test("aM", "\u0f68\u0f7e"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
// ewts2uni_test("aH", "\u0f68\u0f7f"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
ewts2uni_test("-i", "\u0f68\u0f80");
ewts2uni_test("-I", "\u0f68\u0f81");
ewts2uni_test("a~M`", "\u0f68\u0f82"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
ewts2uni_test("a~M", "\u0f68\u0f83"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
ewts2uni_test("a?", "\u0f68\u0f84"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
// ewts2uni_test("a~M`", "\u0f68\u0f82"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
// ewts2uni_test("a~M", "\u0f68\u0f83"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
// ewts2uni_test("a?", "\u0f68\u0f84"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
ewts2uni_test("\\u0f68", "\u0f68");
ewts2uni_test("a\\u0f86", "\u0f68\u0f86");
ewts2uni_test("a\\U0f86", "\u0f68\u0f86");
ewts2uni_test("a\\U0F86", "\u0f68\u0f86");
@ -132,24 +236,32 @@ public class EWTSTest extends TestCase {
ewts2uni_test("a\\u00000F86", "\u0f68\u0f86");
ewts2uni_test("a\\u0f87", "\u0f68\u0f87");
ewts2uni_test("aMH", "\u0f68\u0f7e\u0f7f"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
ewts2uni_test("aHM", "\u0f68\u0f7f\u0f7e"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
// ewts2uni_test("aMH", "\u0f68\u0f7e\u0f7f"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
// ewts2uni_test("aHM", "\u0f68\u0f7f\u0f7e"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
ewts2uni_test("a", "\u0f68");
}
public void test__EWTS__stacked_wowels_on_achen() {
if (false) { // TODO(DLC)[EWTS->Tibetan]: make this true ASAP
ewts2uni_test("o+o", "\u0f68\u0f7c\u0f7c");
assert_EWTS_error("a+o"); // TODO(DLC)[EWTS->Tibetan]:?
assert_EWTS_error("o+a"); // TODO(DLC)[EWTS->Tibetan]:?
assert_EWTS_error("ka+o"); // TODO(DLC)[EWTS->Tibetan]:?
// Than's e-mails of Aug 10 and Aug 11, 2004 say that A+i is
// the same as I and o+o is the same as au.
ewts2uni_test("A+i", "\u0f68\u0f73");
ewts2uni_test("o+o", "\u0f68\u0f7d");
ewts2uni_test("e+e", "\u0f68\u0f7b");
ewts2uni_test("e+e+e", "\u0f68\u0f7b\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
ewts2uni_test("e+e+e+e", "\u0f68\u0f7b\u0f7b"); // TODO(DLC)[EWTS->Tibetan]:?
ewts2uni_test("e+e+e+e+e", "\u0f68\u0f7b\u0f7b\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
ewts2uni_test("A+i", "\u0f68\u0f71\u0f72");
ewts2uni_test("e+e", "\u0f68\u0f7a\u0f7a");
ewts2uni_test("e+e+e", "\u0f68\u0f7a\u0f7a\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
ewts2uni_test("e+e+e+e", "\u0f68\u0f7a\u0f7a\u0f7a\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
ewts2uni_test("e+e+e+e+e", "\u0f68\u0f7a\u0f7a\u0f7a\u0f7a\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
ewts2uni_test("o+e", "\u0f68\u0f7c\u0f7a");
ewts2uni_test("u+A+i+o+e", "\u0f68\u0f74\u0f72\u0f7c\u0f7a");
ewts2uni_test("u+A+i+o+eHM", "\u0f68\u0f74\u0f72\u0f7c\u0f7a\u0f7f\u0f7e");
ewts2uni_test("u+A", "\u0f68\u0f75");
ewts2uni_test("u+A+i+o+e", "\u0f68\u0f74\u0f71\u0f72\u0f7c\u0f7a");
ewts2uni_test("u+A+i+o+eHM", "\u0f68\u0f74\u0f71\u0f72\u0f7c\u0f7a\u0f7f\u0f7e");
ewts2uni_test("u+A", "\u0f68\u0f74\u0f71");
ewts2uni_test("a", "\u0f68");
ewts2uni_test("o+-I", "DLC");
}
}
/** Tests that our implementation of EWTS's wowels are correct,
@ -158,14 +270,16 @@ public class EWTSTest extends TestCase {
public void test__EWTS__wowels_on_ka() {
ewts2uni_test("kA", "\u0f40\u0f71");
ewts2uni_test("ki", "\u0f40\u0f72");
ewts2uni_test("kI", "\u0f40\u0f73");
ewts2uni_test("kI", "\u0f40\u0f71\u0f72");
ewts2uni_test("ku", "\u0f40\u0f74");
ewts2uni_test("kU", "\u0f40\u0f75");
ewts2uni_test("ka+r-i", "\u0f40\u0f76");
ewts2uni_test("ka+r-I", "\u0f40\u0f77");
ewts2uni_test("ka+l-i", "\u0f40\u0f78");
ewts2uni_test("ka+l-I", "\u0f40\u0f79");
ewts2uni_test("kU", "\u0f40\u0f71\u0f74");
ewts2uni_test("k+r-i", "\u0f40\u0fb2\u0f80");
ewts2uni_test("k+r-I", "\u0f40\u0fb2\u0f81");
ewts2uni_test("k+l-i", "\u0f40\u0fb3\u0f80");
ewts2uni_test("k+l-I", "\u0f40\u0fb3\u0f81");
ewts2uni_test("ke", "\u0f40\u0f7a");
ewts2uni_test("e", "\u0f68\u0f7a");
ewts2uni_test("a", "\u0f68");
ewts2uni_test("kai", "\u0f40\u0f7b");
ewts2uni_test("ko", "\u0f40\u0f7c");
ewts2uni_test("kau", "\u0f40\u0f7d");
@ -192,34 +306,39 @@ public class EWTSTest extends TestCase {
// Than's e-mails of Aug 10 and Aug 11, 2004 say that A+i is
// the same as I and o+o is the same as au.
ewts2uni_test("kA+i", "\u0f40\u0f73");
ewts2uni_test("ko+o", "\u0f40\u0f7d");
ewts2uni_test("ke+e", "\u0f40\u0f7b");
ewts2uni_test("ke+e+e", "\u0f40\u0f7b\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
ewts2uni_test("ke+e+e+e", "\u0f40\u0f7b\u0f7b"); // TODO(DLC)[EWTS->Tibetan]:?
ewts2uni_test("ke+e+e+e+e", "\u0f40\u0f7b\u0f7b\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
ewts2uni_test("kA+i", "\u0f40\u0f71\u0f72");
ewts2uni_test("ko+o", "\u0f40\u0f7c\u0f7c");
ewts2uni_test("ke+e", "\u0f40\u0f7a\u0f7a");
ewts2uni_test("ke+e+e", "\u0f40\u0f7a\u0f7a\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
ewts2uni_test("ke+e+e+e", "\u0f40\u0f7a\u0f7a\u0f7a\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
ewts2uni_test("ke+e+e+e+e", "\u0f40\u0f7a\u0f7a\u0f7a\u0f7a\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
ewts2uni_test("ko+e", "\u0f40\u0f7c\u0f7a");
ewts2uni_test("ku+A+i+o+e", "\u0f40\u0f74\u0f72\u0f7c\u0f7a");
ewts2uni_test("ku+A+i+o+eHM", "\u0f40\u0f74\u0f72\u0f7c\u0f7a\u0f7f\u0f7e");
ewts2uni_test("ku+A", "\u0f40\u0f75");
ewts2uni_test("ku+A+i+o+e", "\u0f40\u0f74\u0f71\u0f72\u0f7c\u0f7a");
ewts2uni_test("ku+A+i+o+eHM", "\u0f40\u0f74\u0f71\u0f72\u0f7c\u0f7a\u0f7f\u0f7e");
ewts2uni_test("ku+A", "\u0f40\u0f74\u0f71");
ewts2uni_test("k", "\u0f40");
ewts2uni_test("ka", "\u0f40");
assert_EWTS_error("ka+r-i"); // TODO(DLC)[EWTS->Tibetan]: right?
assert_EWTS_error("ka+r-I");
assert_EWTS_error("ka+l-i");
assert_EWTS_error("ka+l-I");
assert_EWTS_error("ko+a");
assert_EWTS_error("ka+o");
}
/** Tests that our implementation of EWTS's wowels are correct,
* mostly by testing that the Unicode generated for a single
* wowel or set of wowels atop achung (U+0F60) is correct. */
public void test__EWTS__wowels_on_achung() {
ewts2uni_test("'a", "\u0f60");
ewts2uni_test("'A", "\u0f60\u0f71");
ewts2uni_test("'i", "\u0f60\u0f72");
ewts2uni_test("'I", "\u0f60\u0f73");
ewts2uni_test("'I", "\u0f60\u0f71\u0f72");
ewts2uni_test("'u", "\u0f60\u0f74");
ewts2uni_test("'U", "\u0f60\u0f75");
ewts2uni_test("'a+r-i", "\u0f60\u0f76");
ewts2uni_test("'a+r-I", "\u0f60\u0f77");
ewts2uni_test("'a+l-i", "\u0f60\u0f78");
ewts2uni_test("'a+l-I", "\u0f60\u0f79");
ewts2uni_test("'U", "\u0f60\u0f71\u0f74");
ewts2uni_test("'e", "\u0f60\u0f7a");
ewts2uni_test("'ai", "\u0f60\u0f7b");
ewts2uni_test("'o", "\u0f60\u0f7c");
@ -247,75 +366,81 @@ public class EWTSTest extends TestCase {
// Than's e-mails of Aug 10 and Aug 11, 2004 say that A+i is
// the same as I and o+o is the same as au.
ewts2uni_test("'A+i", "\u0f60\u0f73");
ewts2uni_test("'o+o", "\u0f60\u0f7d");
ewts2uni_test("'e+e", "\u0f60\u0f7b");
ewts2uni_test("'e+e+e", "\u0f60\u0f7b\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
ewts2uni_test("'e+e+e+e", "\u0f60\u0f7b\u0f7b"); // TODO(DLC)[EWTS->Tibetan]:?
ewts2uni_test("'e+e+e+e+e", "\u0f60\u0f7b\u0f7b\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
ewts2uni_test("'A+i", "\u0f60\u0f71\u0f72");
ewts2uni_test("'o+o", "\u0f60\u0f7c\u0f7c");
ewts2uni_test("'e+e", "\u0f60\u0f7a\u0f7a");
ewts2uni_test("'e+e+e", "\u0f60\u0f7a\u0f7a\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
ewts2uni_test("'e+e+e+e", "\u0f60\u0f7a\u0f7a\u0f7a\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
ewts2uni_test("'e+e+e+e+e", "\u0f60\u0f7a\u0f7a\u0f7a\u0f7a\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
ewts2uni_test("'o+e", "\u0f60\u0f7c\u0f7a");
ewts2uni_test("'u+A+i+o+e", "\u0f60\u0f74\u0f72\u0f7c\u0f7a");
ewts2uni_test("'u+A+i+o+eHM", "\u0f60\u0f74\u0f72\u0f7c\u0f7a\u0f7f\u0f7e");
ewts2uni_test("'u+A+i+o+e", "\u0f60\u0f74\u0f71\u0f72\u0f7c\u0f7a");
ewts2uni_test("'u+A+i+o+eHM", "\u0f60\u0f74\u0f71\u0f72\u0f7c\u0f7a\u0f7f\u0f7e");
ewts2uni_test("'u+A", "\u0f60\u0f75");
ewts2uni_test("'u+A", "\u0f60\u0f74\u0f71");
ewts2uni_test("'", "\u0f60");
ewts2uni_test("'a", "\u0f60");
ewts2uni_test("'+r-i", "\u0f60\u0fb2\u0f80");
ewts2uni_test("'+r-I", "\u0f60\u0fb2\u0f81");
ewts2uni_test("'+l-i", "\u0f60\u0fb3\u0f80");
ewts2uni_test("'+l-I", "\u0f60\u0fb3\u0f81");
}
/** Tests that our implementation of EWTS's wowels are correct,
* mostly by testing that the Unicode generated for a single
* wowel or set of wowels atop k+Sh (U+0F69) is correct. */
public void test__EWTS__wowels_on_kSh() {
ewts2uni_test("k+ShA", "\u0f69\u0f71");
ewts2uni_test("k+Shi", "\u0f69\u0f72");
ewts2uni_test("k+ShI", "\u0f69\u0f73");
ewts2uni_test("k+Shu", "\u0f69\u0f74");
ewts2uni_test("k+ShU", "\u0f69\u0f75");
ewts2uni_test("k+Sha+r-i", "\u0f69\u0f76");
ewts2uni_test("k+Sha+r-I", "\u0f69\u0f77");
ewts2uni_test("k+Sha+l-i", "\u0f69\u0f78");
ewts2uni_test("k+Sha+l-I", "\u0f69\u0f79");
ewts2uni_test("k+She", "\u0f69\u0f7a");
ewts2uni_test("k+Shai", "\u0f69\u0f7b");
ewts2uni_test("k+Sho", "\u0f69\u0f7c");
ewts2uni_test("k+Shau", "\u0f69\u0f7d");
ewts2uni_test("k+ShaM", "\u0f69\u0f7e"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
ewts2uni_test("k+ShaH", "\u0f69\u0f7f"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
ewts2uni_test("k+Sh-i", "\u0f69\u0f80");
ewts2uni_test("k+Sh-I", "\u0f69\u0f81");
ewts2uni_test("k+Sha~M`", "\u0f69\u0f82"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
ewts2uni_test("k+Sha~M", "\u0f69\u0f83"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
ewts2uni_test("k+Sha?", "\u0f69\u0f84"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
ewts2uni_test("k+Sha\\u0f86", "\u0f69\u0f86");
ewts2uni_test("k+Sha\\U0f86", "\u0f69\u0f86");
ewts2uni_test("k+Sha\\U0F86", "\u0f69\u0f86");
ewts2uni_test("k+Sha\\u0F86", "\u0f69\u0f86");
ewts2uni_test("k+Sha\\u00000f86", "\u0f69\u0f86");
ewts2uni_test("k+Sha\\u00000f86", "\u0f69\u0f86");
ewts2uni_test("k+Sha\\u00000F86", "\u0f69\u0f86");
ewts2uni_test("k+Sha\\u00000F86", "\u0f69\u0f86");
ewts2uni_test("k+Sha\\u0f87", "\u0f69\u0f87");
ewts2uni_test("k+ShA", "\u0f40\u0fb5\u0f71");
ewts2uni_test("k+Shi", "\u0f40\u0fb5\u0f72");
ewts2uni_test("k+ShI", "\u0f40\u0fb5\u0f71\u0f72");
ewts2uni_test("k+Shu", "\u0f40\u0fb5\u0f74");
ewts2uni_test("k+ShU", "\u0f40\u0fb5\u0f71\u0f74");
ewts2uni_test("k+She", "\u0f40\u0fb5\u0f7a");
ewts2uni_test("k+Shai", "\u0f40\u0fb5\u0f7b");
ewts2uni_test("k+Sho", "\u0f40\u0fb5\u0f7c");
ewts2uni_test("k+Shau", "\u0f40\u0fb5\u0f7d");
ewts2uni_test("k+ShaM", "\u0f40\u0fb5\u0f7e"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
ewts2uni_test("k+ShaH", "\u0f40\u0fb5\u0f7f"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
ewts2uni_test("k+Sh-i", "\u0f40\u0fb5\u0f80");
ewts2uni_test("k+Sh-I", "\u0f40\u0fb5\u0f81");
ewts2uni_test("k+Sha~M`", "\u0f40\u0fb5\u0f82"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
ewts2uni_test("k+Sha~M", "\u0f40\u0fb5\u0f83"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
ewts2uni_test("k+Sha?", "\u0f40\u0fb5\u0f84"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
ewts2uni_test("k+Sha\\u0f86", "\u0f40\u0fb5\u0f86");
ewts2uni_test("k+Sha\\U0f86", "\u0f40\u0fb5\u0f86");
ewts2uni_test("k+Sha\\U0F86", "\u0f40\u0fb5\u0f86");
ewts2uni_test("k+Sha\\u0F86", "\u0f40\u0fb5\u0f86");
ewts2uni_test("k+Sha\\u00000f86", "\u0f40\u0fb5\u0f86");
ewts2uni_test("k+Sha\\u00000f86", "\u0f40\u0fb5\u0f86");
ewts2uni_test("k+Sha\\u00000F86", "\u0f40\u0fb5\u0f86");
ewts2uni_test("k+Sha\\u00000F86", "\u0f40\u0fb5\u0f86");
ewts2uni_test("k+Sha\\u0f87", "\u0f40\u0fb5\u0f87");
ewts2uni_test("k+ShaMH", "\u0f69\u0f7e\u0f7f"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
ewts2uni_test("k+ShaHM", "\u0f69\u0f7f\u0f7e"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
ewts2uni_test("k+ShaMH", "\u0f40\u0fb5\u0f7e\u0f7f"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
ewts2uni_test("k+ShaHM", "\u0f40\u0fb5\u0f7f\u0f7e"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
// Than's e-mails of Aug 10 and Aug 11, 2004 say that A+i is
// the same as I and o+o is the same as au.
ewts2uni_test("k+ShA+i", "\u0f69\u0f73");
ewts2uni_test("k+Sho+o", "\u0f69\u0f7d");
ewts2uni_test("k+She+e", "\u0f69\u0f7b");
ewts2uni_test("k+She+e+e", "\u0f69\u0f7b\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
ewts2uni_test("k+She+e+e+e", "\u0f69\u0f7b\u0f7b"); // TODO(DLC)[EWTS->Tibetan]:?
ewts2uni_test("k+She+e+e+e+e", "\u0f69\u0f7b\u0f7b\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
ewts2uni_test("k+Sho+e", "\u0f69\u0f7c\u0f7a");
ewts2uni_test("k+Shu+A+i+o+e", "\u0f69\u0f74\u0f72\u0f7c\u0f7a");
ewts2uni_test("k+Shu+A+i+o+eHM", "\u0f69\u0f74\u0f72\u0f7c\u0f7a\u0f7f\u0f7e");
ewts2uni_test("k+Shu+A", "\u0f69\u0f75");
ewts2uni_test("k+ShA+i", "\u0f40\u0fb5\u0f71\u0f72");
ewts2uni_test("k+Sho+o", "\u0f40\u0fb5\u0f7c\u0f7c");
ewts2uni_test("k+She+e", "\u0f40\u0fb5\u0f7a\u0f7a");
ewts2uni_test("k+She+e+e", "\u0f40\u0fb5\u0f7a\u0f7a\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
ewts2uni_test("k+She+e+e+e", "\u0f40\u0fb5\u0f7a\u0f7a\u0f7a\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
ewts2uni_test("k+She+e+e+e+e", "\u0f40\u0fb5\u0f7a\u0f7a\u0f7a\u0f7a\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
ewts2uni_test("k+Sho+e", "\u0f40\u0fb5\u0f7c\u0f7a");
ewts2uni_test("k+Shu+A+i+o+e", "\u0f40\u0fb5\u0f74\u0f71\u0f72\u0f7c\u0f7a");
ewts2uni_test("k+Shu+A+i+o+eHM", "\u0f40\u0fb5\u0f74\u0f71\u0f72\u0f7c\u0f7a\u0f7f\u0f7e");
ewts2uni_test("k+Shu+A", "\u0f40\u0fb5\u0f74\u0f71");
ewts2uni_test("k+Sh", "\u0f69");
ewts2uni_test("k+Sha", "\u0f69");
ewts2uni_test("k+Sh", "\u0f40\u0fb5");
ewts2uni_test("k+Sha", "\u0f40\u0fb5");
ewts2uni_test("k+Sh+r-i", "\u0f40\u0fb5\u0fb2\u0f80");
ewts2uni_test("k+Sh+r-I", "\u0f40\u0fb5\u0fb2\u0f81");
ewts2uni_test("k+Sh+l-i", "\u0f40\u0fb5\u0fb3\u0f80");
ewts2uni_test("k+Sh+l-I", "\u0f40\u0fb5\u0fb3\u0f81");
}
/** Tests that our implementation of EWTS's wowels are correct,
@ -325,25 +450,22 @@ public class EWTSTest extends TestCase {
public void test__EWTS__wowels_on_phyw() {
ewts2uni_test("phywA", "\u0f55\u0fb1\u0fad\u0f71");
ewts2uni_test("phywi", "\u0f55\u0fb1\u0fad\u0f72");
ewts2uni_test("phywI", "\u0f55\u0fb1\u0fad\u0f73");
ewts2uni_test("phywI", "\u0f55\u0fb1\u0fad\u0f71\u0f72");
ewts2uni_test("phywu", "\u0f55\u0fb1\u0fad\u0f74");
ewts2uni_test("phywU", "\u0f55\u0fb1\u0fad\u0f75");
ewts2uni_test("phywa+r-i", "\u0f55\u0fb1\u0fad\u0f76");
ewts2uni_test("phywa+r-I", "\u0f55\u0fb1\u0fad\u0f77");
ewts2uni_test("phywa+l-i", "\u0f55\u0fb1\u0fad\u0f78");
ewts2uni_test("phywa+l-I", "\u0f55\u0fb1\u0fad\u0f79");
ewts2uni_test("phywU", "\u0f55\u0fb1\u0fad\u0f71\u0f74");
ewts2uni_test("phywe", "\u0f55\u0fb1\u0fad\u0f7a");
ewts2uni_test("phywai", "\u0f55\u0fb1\u0fad\u0f7b");
ewts2uni_test("phywo", "\u0f55\u0fb1\u0fad\u0f7c");
ewts2uni_test("phywau", "\u0f55\u0fb1\u0fad\u0f7d");
ewts2uni_test("phywaM", "\u0f55\u0fb1\u0fad\u0f7e"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
ewts2uni_test("phywaH", "\u0f55\u0fb1\u0fad\u0f7f"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
ewts2uni_test("phyw-i", "\u0f55\u0fb1\u0fad\u0f80");
ewts2uni_test("phyw-I", "\u0f55\u0fb1\u0fad\u0f81");
ewts2uni_test("phywa~M`", "\u0f55\u0fb1\u0fad\u0f82"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
ewts2uni_test("phywa~M", "\u0f55\u0fb1\u0fad\u0f83"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
ewts2uni_test("phywa?", "\u0f55\u0fb1\u0fad\u0f84"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
ewts2uni_test("phyw\\u0f86", "\u0f55\u0fb1\u0fad\u0f86");
assertEquals(EWTSTraits.instance().getUnicodeForWowel("\u0f86+\u0f84"), "\u0f86\u0f84");
ewts2uni_test("phyw\\u0f84\\u0f86", "\u0f55\u0fb1\u0fad\u0f84\u0f86");
ewts2uni_test("phyw\\u0f84\u0f86", "\u0f55\u0fb1\u0fad\u0f84\u0f86");
ewts2uni_test("phywa\\u0f86", "\u0f55\u0fb1\u0fad\u0f86");
ewts2uni_test("phywa\\u0f86\u0f84", "\u0f55\u0fb1\u0fad\u0f86\u0f84");
ewts2uni_test("phywa\\U0f86", "\u0f55\u0fb1\u0fad\u0f86");
ewts2uni_test("phywa\\U0F86", "\u0f55\u0fb1\u0fad\u0f86");
ewts2uni_test("phywa\\u0F86", "\u0f55\u0fb1\u0fad\u0f86");
@ -353,25 +475,34 @@ public class EWTSTest extends TestCase {
ewts2uni_test("phywa\\u00000F86", "\u0f55\u0fb1\u0fad\u0f86");
ewts2uni_test("phywa\\u0f87", "\u0f55\u0fb1\u0fad\u0f87");
ewts2uni_test("phywaMH", "\u0f55\u0fb1\u0fad\u0f7e\u0f7f"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
ewts2uni_test("phywaHM", "\u0f55\u0fb1\u0fad\u0f7f\u0f7e"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
// Than's e-mails of Aug 10 and Aug 11, 2004 say that A+i is
// the same as I and o+o is the same as au.
ewts2uni_test("phywA+i", "\u0f55\u0fb1\u0fad\u0f73");
ewts2uni_test("phywo+o", "\u0f55\u0fb1\u0fad\u0f7d");
ewts2uni_test("phywe+e", "\u0f55\u0fb1\u0fad\u0f7b");
ewts2uni_test("phywe+e+e", "\u0f55\u0fb1\u0fad\u0f7b\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
ewts2uni_test("phywe+e+e+e", "\u0f55\u0fb1\u0fad\u0f7b\u0f7b"); // TODO(DLC)[EWTS->Tibetan]:?
ewts2uni_test("phywe+e+e+e+e", "\u0f55\u0fb1\u0fad\u0f7b\u0f7b\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
ewts2uni_test("phywA+i", "\u0f55\u0fb1\u0fad\u0f71\u0f72");
ewts2uni_test("phywo+o", "\u0f55\u0fb1\u0fad\u0f7c\u0f7c");
ewts2uni_test("phywe+e", "\u0f55\u0fb1\u0fad\u0f7a\u0f7a");
ewts2uni_test("phywe+e+e", "\u0f55\u0fb1\u0fad\u0f7a\u0f7a\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
ewts2uni_test("phywe+e+e+e", "\u0f55\u0fb1\u0fad\u0f7a\u0f7a\u0f7a\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
ewts2uni_test("phywe+e+e+e+e", "\u0f55\u0fb1\u0fad\u0f7a\u0f7a\u0f7a\u0f7a\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
ewts2uni_test("phywo+e", "\u0f55\u0fb1\u0fad\u0f7c\u0f7a");
ewts2uni_test("phywu+A+i+o+e", "\u0f55\u0fb1\u0fad\u0f74\u0f72\u0f7c\u0f7a");
ewts2uni_test("phywu+A+i+o+eHM", "\u0f55\u0fb1\u0fad\u0f74\u0f72\u0f7c\u0f7a\u0f7f\u0f7e");
ewts2uni_test("phywu+A", "\u0f55\u0fb1\u0fad\u0f75");
ewts2uni_test("phywu+A+i+o+e", "\u0f55\u0fb1\u0fad\u0f74\u0f71\u0f72\u0f7c\u0f7a");
ewts2uni_test("phywu+A+i+o+eHM", "\u0f55\u0fb1\u0fad\u0f74\u0f71\u0f72\u0f7c\u0f7a\u0f7f\u0f7e");
ewts2uni_test("phywu+A", "\u0f55\u0fb1\u0fad\u0f74\u0f71");
ewts2uni_test("phyw", "\u0f55\u0fb1\u0fad");
ewts2uni_test("phywa", "\u0f55\u0fb1\u0fad");
ewts2uni_test("phywaM", "\u0f55\u0fb1\u0fad\u0f7e"); /* TODO(DLC)[EWTS->Tibetan]: NOW: aM is not a wowel! */ // TODO(DLC)[EWTS->Tibetan]: than needs to say
ewts2uni_test("phywaH", "\u0f55\u0fb1\u0fad\u0f7f"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
ewts2uni_test("phywa~M`", "\u0f55\u0fb1\u0fad\u0f82"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
ewts2uni_test("phywa~M", "\u0f55\u0fb1\u0fad\u0f83"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
ewts2uni_test("phywa?", "\u0f55\u0fb1\u0fad\u0f84"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
ewts2uni_test("phywaMH", "\u0f55\u0fb1\u0fad\u0f7e\u0f7f"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
ewts2uni_test("phywaHM", "\u0f55\u0fb1\u0fad\u0f7f\u0f7e"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
assert_EWTS_error("phywr-i");
assert_EWTS_error("phyw+r-i");
assert_EWTS_error("phyw+l-i");
}
/** Tests that our implementation of EWTS's wowels are correct,
@ -382,13 +513,9 @@ public class EWTSTest extends TestCase {
public void test__EWTS__wowels_on_kjjkkj() {
ewts2uni_test("k+j+j+k+k+jA", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f71");
ewts2uni_test("k+j+j+k+k+ji", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f72");
ewts2uni_test("k+j+j+k+k+jI", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f73");
ewts2uni_test("k+j+j+k+k+jI", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f71\u0f72");
ewts2uni_test("k+j+j+k+k+ju", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f74");
ewts2uni_test("k+j+j+k+k+jU", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f75");
ewts2uni_test("k+j+j+k+k+ja+r-i", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f76");
ewts2uni_test("k+j+j+k+k+ja+r-I", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f77");
ewts2uni_test("k+j+j+k+k+ja+l-i", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f78");
ewts2uni_test("k+j+j+k+k+ja+l-I", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f79");
ewts2uni_test("k+j+j+k+k+jU", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f71\u0f74");
ewts2uni_test("k+j+j+k+k+je", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7a");
ewts2uni_test("k+j+j+k+k+jai", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7b");
ewts2uni_test("k+j+j+k+k+jo", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7c");
@ -416,85 +543,52 @@ public class EWTSTest extends TestCase {
// Than's e-mails of Aug 10 and Aug 11, 2004 say that A+i is
// the same as I and o+o is the same as au.
ewts2uni_test("k+j+j+k+k+jA+i", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f73");
ewts2uni_test("k+j+j+k+k+jo+o", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7d");
ewts2uni_test("k+j+j+k+k+je+e", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7b");
ewts2uni_test("k+j+j+k+k+je+e+e", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7b\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
ewts2uni_test("k+j+j+k+k+je+e+e+e", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7b\u0f7b"); // TODO(DLC)[EWTS->Tibetan]:?
ewts2uni_test("k+j+j+k+k+je+e+e+e+e", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7b\u0f7b\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
ewts2uni_test("k+j+j+k+k+jA+i", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f71\u0f72");
ewts2uni_test("k+j+j+k+k+jo+o", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7c\u0f7c");
ewts2uni_test("k+j+j+k+k+je+e", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7a\u0f7a");
ewts2uni_test("k+j+j+k+k+je+e+e", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7a\u0f7a\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
ewts2uni_test("k+j+j+k+k+je+e+e+e", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7a\u0f7a\u0f7a\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
ewts2uni_test("k+j+j+k+k+je+e+e+e+e", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7a\u0f7a\u0f7a\u0f7a\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
ewts2uni_test("k+j+j+k+k+jo+e", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7c\u0f7a");
ewts2uni_test("k+j+j+k+k+ju+A+i+o+e", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f74\u0f72\u0f7c\u0f7a");
ewts2uni_test("k+j+j+k+k+ju+A+i+o+eHM", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f74\u0f72\u0f7c\u0f7a\u0f7f\u0f7e");
ewts2uni_test("k+j+j+k+k+ju+A", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f75");
ewts2uni_test("k+j+j+k+k+ju+A+i+o+e", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f74\u0f71\u0f72\u0f7c\u0f7a");
ewts2uni_test("k+j+j+k+k+ju+A+i+o+eHM", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f74\u0f71\u0f72\u0f7c\u0f7a\u0f7f\u0f7e");
ewts2uni_test("k+j+j+k+k+ju+A", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f74\u0f71");
ewts2uni_test("k+j+j+k+k+j", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97");
ewts2uni_test("k+j+j+k+k+ja", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97");
ewts2uni_test("k+j+j+k+k+j+r-i", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0fb2\u0f80");
ewts2uni_test("k+j+j+k+k+j+r-I", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0fb2\u0f81");
ewts2uni_test("k+j+j+k+k+j+l-i", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0fb3\u0f80");
ewts2uni_test("k+j+j+k+k+j+l-I", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0fb3\u0f81");
}
/** Tests that the EWTS that the spec says corresponds to each
* codepoint really does. */
public void test__EWTS__tags_each_unicode_value() {
ewts2uni_test("\\u0ef0", "\u0ef0");
for (char i = '\u0ef0'; i < '\u1010'; i++) {
// invalid codepoint like U+0F48? No problem! TODO(DLC)[EWTS->Tibetan]: NOTE: use a unicode "spell checker" to find such problems
String s = new String(new char[] { i });
ewts2uni_test(UnicodeUtils.unicodeStringToPrettyString(s), s);
ewts2uni_test("\\" + UnicodeUtils.unicodeStringToPrettyString(s), s);
}
ewts2uni_test("\\u0000", "\u0000");
ewts2uni_test("\\u0eff", "\u0eff");
ewts2uni_test("\\u0eff", "\u0eff");
ewts2uni_test("\\u0f00", "\u0f00");
ewts2uni_test("\\u0f40", "\u0f40");
ewts2uni_test("\\u0f70", "\u0f70");
ewts2uni_test("\\u0fff", "\u0fff");
assert_EWTS_error("\\u0f70"); // reserved codepoint
assert_EWTS_error("\\u0fff"); // reserved codepoint
ewts2uni_test("\\uf000", "\uf000");
ewts2uni_test("\\uf01f", "\uf01f");
ewts2uni_test("\\uefff", "\uefff");
ewts2uni_test("\\ucafe0000", "\ucafe0000");
ewts2uni_test("\\ucafe0eff", "\ucafe0eff");
ewts2uni_test("\\ucafe0eff", "\ucafe0eff");
ewts2uni_test("\\ucafe0f00", "\ucafe0f00");
ewts2uni_test("\\ucafe0f40", "\ucafe0f40");
ewts2uni_test("\\ucafe0f70", "\ucafe0f70");
ewts2uni_test("\\ucafe0fff", "\ucafe0fff");
ewts2uni_test("\\ucafef000", "\ucafef000");
ewts2uni_test("\\ucafef01f", "\ucafef01f");
ewts2uni_test("\\ucafeefff", "\ucafeefff");
ewts2uni_test("\\u00000000", "\u00000000");
ewts2uni_test("\\u00000eff", "\u00000eff");
ewts2uni_test("\\u00000eff", "\u00000eff");
ewts2uni_test("\\u00000f00", "\u00000f00");
ewts2uni_test("\\u00000f40", "\u00000f40");
ewts2uni_test("\\u00000f70", "\u00000f70");
ewts2uni_test("\\u00000fff", "\u00000fff");
ewts2uni_test("\\u0000f000", "\u0000f000");
ewts2uni_test("\\u0000f01f", "\u0000f01f");
ewts2uni_test("\\u0000efff", "\u0000efff");
ewts2uni_test("\\u00000000", "\u0000");
ewts2uni_test("\\u00000eff", "\u0eff");
ewts2uni_test("\\u00000eff", "\u0eff");
ewts2uni_test("\\u00000f00", "\u0f00");
ewts2uni_test("\\u00000f40", "\u0f40");
ewts2uni_test("\\u00000f70", "\u0f70");
ewts2uni_test("\\u00000fff", "\u0fff");
ewts2uni_test("\\u0000f000", "\uf000");
ewts2uni_test("\\u0000f01f", "\uf01f");
ewts2uni_test("\\u0000efff", "\uefff");
ewts2uni_test("\\UcaFe0000", "\ucaFe0000");
ewts2uni_test("\\UcaFe0eff", "\ucaFe0eff");
ewts2uni_test("\\UcaFe0eff", "\ucaFe0eff");
ewts2uni_test("\\UcaFe0f00", "\ucaFe0f00");
ewts2uni_test("\\UcaFe0f40", "\ucaFe0f40");
ewts2uni_test("\\UcaFe0f70", "\ucaFe0f70");
ewts2uni_test("\\UcaFe0fff", "\ucaFe0fff");
ewts2uni_test("\\UcaFef000", "\ucaFef000");
ewts2uni_test("\\UcaFef01f", "\ucaFef01f");
ewts2uni_test("\\UcaFeefff", "\ucaFeefff");
// Below was semiautomatically generated from the EWTS spec's
// 'ewts.xml' representation (early August 2004 edition):
ewts2uni_test("v", "\u0F56\u0F39");
ewts2uni_test("f", "\u0F55\u0F39");
ewts2uni_test("\u0f88+ka", "\u0f88\u0f90");
ewts2uni_test("\u0f88+kha", "\u0f88\u0f91");
ewts2uni_test("oM", "\u0F00");
ewts2uni_test("\\u0F01", "\u0F01");
ewts2uni_test("\\u0F02", "\u0F02");
@ -599,13 +693,13 @@ public class EWTSTest extends TestCase {
ewts2uni_test("s", "\u0F66");
ewts2uni_test("h", "\u0F67");
ewts2uni_test("a", "\u0F68");
ewts2uni_test("k+Sh", "\u0F69");
ewts2uni_test("k+Sh", "\u0f40\u0fb5"); // there is no way in EWTS to specify \u0f69 in particular without using \\u0f69
ewts2uni_test("R+", "\u0F6A"); // TODO(DLC)[EWTS->Tibetan]: move to illegal test
ewts2uni_test("A", "\u0F71");
ewts2uni_test("A", "\u0F71"); // TODO(DLC)[EWTS->Tibetan]: no?! see above
ewts2uni_test("i", "\u0F72");
ewts2uni_test("I", "\u0F73");
ewts2uni_test("I", "\u0F71\u0F72");
ewts2uni_test("u", "\u0F74");
ewts2uni_test("U", "\u0F75");
ewts2uni_test("U", "\u0F71\u0F74");
ewts2uni_test("r-i", "\u0F76");
ewts2uni_test("r-I", "\u0F77");
ewts2uni_test("l-i", "\u0F78");
@ -731,15 +825,75 @@ public class EWTSTest extends TestCase {
ewts2uni_test("\\uF041", "\uF041");
ewts2uni_test("\\uF042", "\uF042");
}
public void test__EWTS__long_wowels() {
ewts2uni_test("k-I~M`~X", "\u0f40\u0f81\u0f82\u0f35"); // TODO(DLC)[EWTS->Tibetan]: actually the 0f68 stuff could be true... ask
}
public void test__EWTS__32bit_unicode_escapes() {
assert_EWTS_error("\\u00010000"); // TODO(dchandler): make it work
assert_EWTS_error("\\uF0010000"); // TODO(dchandler): make it work
ewts2uni_test("\\ucafe0000",
"[#ERROR Sorry, we don't yet support Unicode escape sequences above 0x0000FFFF! File a bug.]");
// TODO(dchandler): make it "\ucafe0000");
if (false) {
ewts2uni_test("\\ucafe0eff", "\ucafe0eff");
ewts2uni_test("\\ucafe0eff", "\ucafe0eff");
ewts2uni_test("\\ucafe0f00", "\ucafe0f00");
ewts2uni_test("\\ucafe0f40", "\ucafe0f40");
ewts2uni_test("\\ucafe0f70", "\ucafe0f70");
ewts2uni_test("\\ucafe0fff", "\ucafe0fff");
ewts2uni_test("\\ucafef000", "\ucafef000");
ewts2uni_test("\\ucafef01f", "\ucafef01f");
ewts2uni_test("\\ucafeefff", "\ucafeefff");
ewts2uni_test("\\uffffffff", "\uffffffff");
ewts2uni_test("\\ueeeeeee2", "\ueeeeeee2");
}
ewts2uni_test("\\u00000000", "\u00000000");
ewts2uni_test("\\u00000eff", "\u00000eff");
ewts2uni_test("\\u00000eff", "\u00000eff");
ewts2uni_test("\\u00000f00", "\u00000f00");
ewts2uni_test("\\u00000f40", "\u00000f40");
ewts2uni_test("\\u00000f70", "\u00000f70");
ewts2uni_test("\\u00000fff", "\u00000fff");
ewts2uni_test("\\u0000f000", "\u0000f000");
ewts2uni_test("\\u0000f01f", "\u0000f01f");
ewts2uni_test("\\u0000efff", "\u0000efff");
ewts2uni_test("\\u00000000", "\u0000");
ewts2uni_test("\\u00000eff", "\u0eff");
ewts2uni_test("\\u00000eff", "\u0eff");
ewts2uni_test("\\u00000f00", "\u0f00");
ewts2uni_test("\\u00000f40", "\u0f40");
ewts2uni_test("\\u00000f70", "\u0f70");
ewts2uni_test("\\u00000fff", "\u0fff");
ewts2uni_test("\\u0000f000", "\uf000");
ewts2uni_test("\\u0000f01f", "\uf01f");
ewts2uni_test("\\u0000efff", "\uefff");
assert_EWTS_error("\\UcaFe0000");
if (false) { // TODO(dchandler): make these work
ewts2uni_test("\\UcaFe0000", "\ucaFe0000");
ewts2uni_test("\\UcaFe0eff", "\ucaFe0eff");
ewts2uni_test("\\UcaFe0eff", "\ucaFe0eff");
ewts2uni_test("\\UcaFe0f00", "\ucaFe0f00");
ewts2uni_test("\\UcaFe0f40", "\ucaFe0f40");
ewts2uni_test("\\UcaFe0f70", "\ucaFe0f70");
ewts2uni_test("\\UcaFe0fff", "\ucaFe0fff");
ewts2uni_test("\\UcaFef000", "\ucaFef000");
ewts2uni_test("\\UcaFef01f", "\ucaFef01f");
ewts2uni_test("\\UcaFeefff", "\ucaFeefff");
}
}
// TODO(DLC)[EWTS->Tibetan]: test that "\[JAVA_SOURCE_WILL_NOT_COMPILE_WITHOUT_ME]uxxxx " works out well
/** Tests that certain strings are not legal EWTS. */
public void test__EWTS__illegal_things() {
assert_EWTS_error("k\\u0f19"); // only numbers combine with f19,f18,f3e,f3f
assert_EWTS_error("k\\u0f18"); // only numbers combine with f19,f18,f3e,f3f
assert_EWTS_error("k\\u0f3e"); // only numbers combine with f19,f18,f3e,f3f
assert_EWTS_error("k\\u0f3f"); // only numbers combine with f19,f18,f3e,f3f
assert_EWTS_error("m+");
assert_EWTS_error("kSha"); // use "k+Sha" instead
@ -763,7 +917,27 @@ public class EWTSTest extends TestCase {
assert_EWTS_error("al-I");
assert_EWTS_error("g..ya"); // use "g.ya" instead
assert_EWTS_error("m..");
assert_EWTS_error("g"); // use "ga" instead TODO(DLC)[EWTS->Tibetan]:?
assert_EWTS_error("k\\u0f19"); // only numbers combine with f19,f18,f3e,f3f
assert_EWTS_error("k\\u0f18"); // only numbers combine with f19,f18,f3e,f3f
assert_EWTS_error("k\\u0f3e"); // only numbers combine with f19,f18,f3e,f3f
assert_EWTS_error("k\\u0f3f"); // only numbers combine with f19,f18,f3e,f3f
}
public void testDLCFailingNow() { // TODO(DLC)[EWTS->Tibetan]
assert_EWTS_error("\\u0f19");
assert_EWTS_error("\\u0f18");
assert_EWTS_error("\\u0f19\u0f20"); // wrong order...
{
ewts2uni_test("'a+r-i", "\u0f60\u0fb2\u0f80"); // TODO(DLC)[EWTS->Tibetan]: NOW: prefix rules should make this invalid!
ewts2uni_test("'a+r-I", "\u0f60\u0fb2\u0f81");
ewts2uni_test("'a+l-i", "\u0f60\u0fb3\u0f80");// TODO(DLC)[EWTS->Tibetan]: NOW error handling is CRAP
ewts2uni_test("'a+l-I", "\u0f60\u0fb3\u0f81");
}
}
}
@ -779,8 +953,6 @@ public class EWTSTest extends TestCase {
// \u0f40\u0f7a\u0f74 is illegal (thus \u0f40\u0f74\u0f7a is
// what you probably intended), have it find \u0f7a\u0f74.
//
// TODO(DLC)[EWTS->Tibetan]:: and have it find \u0f7a\u0f7a and suggest \u0f7b, etc.
//
// TODO(DLC)[EWTS->Tibetan]: and \u0f7f\u0f7e is probably illegal and should be switched?
// TODO(DLC)[EWTS->Tibetan]: flesh out \[JAVA_SOURCE_WILL_NOT_COMPILE_WITHOUT_ME]u rules in lexing, is it like Java (where in Java source code, escapes are done in a pre-lexing pass)? no, right, \u0060 causes \u0060 in the output... and \u0f40a is not like ka. escapes separate tsheg bars as far as lexing is concerned, yes? But we use them (and only them, i.e. there is no other transliteration available) for some Tibetan Unicode characters, and then ka\[JAVA_SOURCE_WILL_NOT_COMPILE_WITHOUT_ME]u0fXX may need to seem Java-ish, maybe?

View file

@ -16,10 +16,15 @@ All Rights Reserved.
Contributor(s): ______________________________________.
*/
// TODO(DLC)[EWTS->Tibetan]: TibetanMachineWeb has duplication of much of this!
package org.thdl.tib.text.ttt;
import java.util.ArrayList;
import org.thdl.tib.text.DuffCode;
import org.thdl.tib.text.TibetanMachineWeb;
import org.thdl.util.ThdlDebug;
/** A singleton class that should contain (but due to laziness and
* ignorance probably does not contain) all the traits that make EWTS
@ -46,41 +51,68 @@ public final class EWTSTraits implements TTraits {
/** Returns '.'. */
public char disambiguatorChar() { return '.'; }
// TODO(DLC)[EWTS->Tibetan]: isClearlyIllegal and hasSimpleError are different why?
public boolean hasSimpleError(TPair p) {
return ("a".equals(p.getLeft()) && null == p.getRight()); // TODO(DLC)[EWTS->Tibetan]: (a.e) is bad, one of (.a) or (a.) is bad
if (pairHasBadWowel(p)) return true;
return (("a".equals(p.getLeft()) && null == p.getRight())
|| ("a".equals(p.getLeft())
&& null != p.getRight()
&& TibetanMachineWeb.isWylieVowel(p.getRight()))); // TODO(DLC)[EWTS->Tibetan]: or Unicode wowels? test "a\u0f74" and "a\u0f7e"
// TODO(DLC)[EWTS->Tibetan]: (a.e) is bad, one of (.a) or (a.) is bad
}
/** {tsh}, the longest consonant, has 3 characters, so this is
* three. */
public int maxConsonantLength() { return 3; }
/** {-i~M`}, in a tie for the longest wowel, has 6 characters, so
* this is six. (No, 'l-i' and 'r-i' are not wowels (but '-i'
* is). */
public int maxWowelLength() { return 5; }
/** {-i~M`}, in a tie for the longest wowel, has 5 characters, so
* this is five. (No, 'l-i' and 'r-i' are not wowels (but '-i'
* is). (TODO(DLC)[EWTS->Tibetan]: this is crap! you can put arbitrary wowels
* together using plus signs or Unicode escapes) */
public int maxWowelLength() { return 3; /* a~M` (TODO(DLC)[EWTS->Tibetan]:! why the 'a'?) */}
public boolean isUnicodeConsonant(char ch) {
return ((ch != '\u0f48' && ch >= '\u0f40' && ch <= '\u0f6a')
|| (ch != '\u0f98' && ch >= '\u0f90' && ch <= '\u0fbc'));
}
public boolean isUnicodeWowel(char ch) {
// TODO(DLC)[EWTS->Tibetan]: what about combiners that combine only with digits? TEST
return ((ch >= '\u0f71' && ch <= '\u0f84')
|| isUnicodeWowelThatRequiresAChen(ch));
}
// TODO(DLC)[EWTS->Tibetan]: u,e,i,o? If not, document the special treatment in this function's comment
public boolean isConsonant(String s) {
if (s.length() == 1 && isUnicodeConsonant(s.charAt(0))) return true;
if (aVowel().equals(s)) return false; // In EWTS, "a" is both a consonant and a vowel, but we treat it as just a vowel and insert the implied a-chen if you have a TPair ( . a) (TODO(DLC)[EWTS->Tibetan]: right?)
// TODO(DLC)[EWTS->Tibetan]: numbers are consonants?
// TODO(DLC)[EWTS->Tibetan]: just g for now
return "g".equals(s);
return TibetanMachineWeb.isWylieChar(s);
}
public boolean isWowel(String s) {
return (getUnicodeForWowel(s) != null);
/* TODO(DLC)[EWTS->Tibetan]: test ko+m+e etc.
// TODO(DLC)[EWTS->Tibetan]: all non-consonant combiners? 0f71 0f87 etc.?
if (s.length() == 1 && isUnicodeWowel(s.charAt(0))) return true;
return ("a".equals(s)
|| "e".equals(s)
|| "i".equals(s)
|| "o".equals(s)
|| "u".equals(s)
|| "?".equals(s) // TODO(DLC)[EWTS->Tibetan]: 0f84 virama???
// TODO(DLC)[EWTS->Tibetan]: & ~M` ~M ???
|| "U".equals(s)
|| "I".equals(s)
|| "A".equals(s)
|| "-i".equals(s)
|| "-I".equals(s)
|| "H".equals(s)
|| "M".equals(s)); // TODO(DLC)[EWTS->Tibetan]:???
|| "au".equals(s)
|| "ai".equals(s)
|| isWowelThatRequiresAChen(s));
// TODO(DLC)[EWTS->Tibetan]:???
*/
}
public String aVowel() { return "a"; }
@ -125,5 +157,222 @@ public final class EWTSTraits implements TTraits {
throw new Error("TODO(DLC)[EWTS->Tibetan]");
}
public String getUnicodeFor(String l, boolean subscribed) { throw new Error("TODO(DLC)[EWTS->Tibetan]"); }
public String getUnicodeForWowel(String wowel) {
if ("a".equals(wowel))
return "";
return helpGetUnicodeForWowel(wowel);
}
private String helpGetUnicodeForWowel(String wowel) {
if ("a".equals(wowel))
return null; // ko+a+e is invalid, e.g.
if (wowel.length() == 1 && isUnicodeWowel(wowel.charAt(0)))
return wowel;
// handle o+u, etc.
int i;
if ((i = wowel.indexOf("+")) >= 0) {
// recurse.
// Chris Fynn says \u0f7c\u0f7c is different from \u0f7d.
// So o+o is not the same as au. e+e is not the same as
// ai.
String left = helpGetUnicodeForWowel(wowel.substring(0, i));
String right = helpGetUnicodeForWowel(wowel.substring(i + 1));
if (null != left && null != right)
return left + right;
else
return null;
} else {
// Handle vowels. (TODO(dchandler): tibwn.ini has this
// info, use that instead of duplicating it in this code.)
if ("i".equals(wowel)) return "\u0f72";
if ("u".equals(wowel)) return "\u0f74";
if ("A".equals(wowel)) return "\u0f71";
if ("U".equals(wowel)) return "\u0f71\u0f74"; // \u0f75 is discouraged
if ("e".equals(wowel)) return "\u0f7a";
if ("o".equals(wowel)) return "\u0f7c";
if ("-i".equals(wowel)) return "\u0f80";
if ("ai".equals(wowel)) return "\u0f7b";
if ("au".equals(wowel)) return "\u0f7d";
if ("-I".equals(wowel)) return "\u0f81";
if ("I".equals(wowel)) return "\u0f71\u0f72"; // \u0f73 is discouraged
// TODO(DLC)[EWTS->Tibetan]: fix me!
// DLC say ah if ("aM".equals(wowel)) return "\u0f7e";
if ("M".equals(wowel)) return "\u0f7e";
// DLC say ah if ("aH".equals(wowel)) return "\u0f7f";
if ("H".equals(wowel)) return "\u0f7f";
// DLC say ah if ("a?".equals(wowel)) return "\u0f84";
if ("?".equals(wowel)) return "\u0f84";
// DLC say ah if ("a~M".equals(wowel)) return "\u0f83";
if ("~M".equals(wowel)) return "\u0f83";
// DLC say ah if ("a~M`".equals(wowel)) return "\u0f82";
if ("~M`".equals(wowel)) return "\u0f82";
// DLC say ah if ("aX".equals(wowel)) return "\u0f37";
if ("X".equals(wowel)) return "\u0f37";
// DLC say ah if ("a~X".equals(wowel)) return "\u0f35";
if ("~X".equals(wowel)) return "\u0f35";
return null;
}
}
public String getUnicodeFor(String l, boolean subscribed) {
// First, handle "\u0f71\u0f84\u0f86", "", "\u0f74", etc.
{
boolean already_done = true;
for (int i = 0; i < l.length(); i++) {
if (!(l.charAt(0) >= '\u0f00' && l.charAt(0) <= '\u0fff')) {
already_done = false;
break;
}
}
if (already_done)
return l; // TODO(dchandler): \u0fff etc. are not valid code points, though. Do we handle that well?
}
// TODO(DLC)[EWTS->Tibetan]:: vowels !subscribed could mean (a . i)???? I doubt it but test "i"->"\u0f68\u0f72" etc.
if (subscribed) {
if ("R".equals(l)) return "\u0fbc";
if ("Y".equals(l)) return "\u0fbb";
if ("W".equals(l)) return "\u0fba";
// g+h etc. should not be inputs to this function, but for
// completeness they're here.
if ("k".equals(l)) return "\u0F90";
if ("kh".equals(l)) return "\u0F91";
if ("g".equals(l)) return "\u0F92";
if ("g+h".equals(l)) return "\u0F93";
if ("ng".equals(l)) return "\u0F94";
if ("c".equals(l)) return "\u0F95";
if ("ch".equals(l)) return "\u0F96";
if ("j".equals(l)) return "\u0F97";
if ("ny".equals(l)) return "\u0F99";
if ("T".equals(l)) return "\u0F9A";
if ("Th".equals(l)) return "\u0F9B";
if ("D".equals(l)) return "\u0F9C";
if ("D+h".equals(l)) return "\u0F9D";
if ("N".equals(l)) return "\u0F9E";
if ("t".equals(l)) return "\u0F9F";
if ("th".equals(l)) return "\u0FA0";
if ("d".equals(l)) return "\u0FA1";
if ("d+h".equals(l)) return "\u0FA2";
if ("n".equals(l)) return "\u0FA3";
if ("p".equals(l)) return "\u0FA4";
if ("ph".equals(l)) return "\u0FA5";
if ("b".equals(l)) return "\u0FA6";
if ("b+h".equals(l)) return "\u0FA7";
if ("m".equals(l)) return "\u0FA8";
if ("ts".equals(l)) return "\u0FA9";
if ("tsh".equals(l)) return "\u0FAA";
if ("dz".equals(l)) return "\u0FAB";
if ("dz+h".equals(l)) return "\u0FAC";
if ("w".equals(l)) return "\u0FAD"; // TODO(DLC)[EWTS->Tibetan]:: ???
if ("zh".equals(l)) return "\u0FAE";
if ("z".equals(l)) return "\u0FAF";
if ("'".equals(l)) return "\u0FB0";
if ("y".equals(l)) return "\u0FB1";
if ("r".equals(l)) return "\u0FB2";
if ("l".equals(l)) return "\u0FB3";
if ("sh".equals(l)) return "\u0FB4";
if ("Sh".equals(l)) return "\u0FB5";
if ("s".equals(l)) return "\u0FB6";
if ("h".equals(l)) return "\u0FB7";
if ("a".equals(l)) return "\u0FB8";
if ("k+Sh".equals(l)) return "\u0FB9";
if (false) throw new Error("TODO(DLC)[EWTS->Tibetan]:: subscribed for " + l);
return null;
} else {
if ("R".equals(l)) return "\u0f6a";
if ("Y".equals(l)) return "\u0f61";
if ("W".equals(l)) return "\u0f5d";
if (!TibetanMachineWeb.isKnownHashKey(l)) {
ThdlDebug.noteIffyCode();
return null;
}
String s = TibetanMachineWeb.getUnicodeForWylieForGlyph(l);
if (null == s)
ThdlDebug.noteIffyCode();
return s;
}
}
public String shortTranslitName() { return "EWTS"; }
private boolean pairHasBadWowel(TPair p) {
return (null != p.getRight()
&& !disambiguator().equals(p.getRight())
&& !"+".equals(p.getRight())
&& null == getUnicodeForWowel(p.getRight()));
}
public boolean isClearlyIllegal(TPair p) {
if (pairHasBadWowel(p)) return true;
if (p.getLeft() == null
&& (p.getRight() == null ||
(!disambiguator().equals(p.getRight())
&& !isWowel(p.getRight()))))
return true;
if ("+".equals(p.getLeft()))
return true;
if (p.getLeft() != null && isWowel(p.getLeft())
&& !aVowel().equals(p.getLeft())) // achen
return true;
return false;
}
public TPairList[] breakTshegBarIntoChunks(String tt, boolean sh) {
if (sh) throw new IllegalArgumentException("Don't do that, silly!");
try {
return TPairListFactory.breakEWTSIntoChunks(tt);
} catch (StackOverflowError e) {
throw new IllegalArgumentException("Input too large[1]: " + tt);
} catch (OutOfMemoryError e) {
throw new IllegalArgumentException("Input too large[2]: " + tt);
}
}
public boolean isACIP() { return false; }
public boolean vowelAloneImpliesAChen() { return true; }
public boolean vowelsMayStack() { return true; }
public boolean isWowelThatRequiresAChen(String s) {
// TODO(DLC)[EWTS->Tibetan]: fix me!
return ((s.length() == 1 && (isUnicodeWowelThatRequiresAChen(s.charAt(0))
|| "?MHX".indexOf(s.charAt(0)) >= 0))
// DLC say ah || "aM".equals(s) // DLC funny... (DLC NOW too funny! affects longest wowel length!)
// DLC say ah || "a?".equals(s) // DLC funny...
// DLC say ah || "aH".equals(s) // DLC funny...
// DLC say ah || "aX".equals(s) // DLC funny...
|| "~X".equals(s)
// DLC say ah || "a~X".equals(s) // DLC funny...
|| "~M".equals(s)
// DLC say ah || "a~M".equals(s) // DLC funny...
|| "~M`".equals(s)
// DLC say ah || "a~M`".equals(s) // DLC funny...
);
}
public boolean isUnicodeWowelThatRequiresAChen(char ch) {
// TODO(DLC)[EWTS->Tibetan]: ask if 18 19 3e 3f combine only with digits
return "\u0f35\u0f37\u0f18\u0f19\u0f3e\u0f3f\u0f86\u0f87\u0fc6".indexOf(ch) >= 0;
}
public boolean couldBeValidStack(TPairList pl) {
StringBuffer hashKey = new StringBuffer();
boolean allHavePlus = true;
for (int i = 0; i < pl.size(); i++) {
if (i + 1 < pl.size() && !"+".equals(pl.get(i).getRight()))
allHavePlus = false;
if (0 != hashKey.length())
hashKey.append('-');
hashKey.append(pl.get(i).getLeft());
}
return (allHavePlus
|| TibetanMachineWeb.hasGlyph(hashKey.toString())); // TODO(DLC)[EWTS->Tibetan]: test with smra and tsma and bdgya
}
}

View file

@ -18,6 +18,7 @@ Contributor(s): ______________________________________.
package org.thdl.tib.text.ttt;
import java.math.BigInteger;
import java.util.ArrayList;
/**
@ -31,16 +32,130 @@ import java.util.ArrayList;
*
* @author David Chandler */
class EWTSTshegBarScanner extends TTshegBarScanner {
/** Returns true iff ch can appear within an EWTS tsheg bar. */
protected static boolean isValidInsideTshegBar(char ch) {
// '\\' is absent, but should it be? TODO(DLC)[EWTS->Tibetan]
return ((ch >= '0' && ch <= '9')
|| (ch >= '\u0f71' && ch <= '\u0f84')
|| EWTSTraits.instance().isUnicodeConsonant(ch)
|| EWTSTraits.instance().isUnicodeWowel(ch)
|| (ch >= '\u0f20' && ch <= '\u0f33')
|| "khgncjytdpbmtstdzwzz'rlafvTDNSWYReuioIAUMHX?^\u0f39\u0f35\u0f37.+~'`-\u0f19\u0f18\u0f3f\u0f3e\u0f86\u0f87\u0f88".indexOf(ch) >= 0);
}
/** See the comment in TTshegBarScanner. This does not find
errors and warnings that you'd think of a parser finding (DLC
errors and warnings that you'd think of a parser finding (TODO(DLC)[EWTS->Tibetan]:
DOES IT?). */
public ArrayList scan(String s, StringBuffer errors, int maxErrors,
public ArrayList scan(String s, StringBuffer errors, int maxErrors, // TODO(DLC)[EWTS->Tibetan]: ignored
boolean shortMessages, String warningLevel) {
// the size depends on whether it's mostly Tibetan or mostly
// Latin and a number of other factors. This is meant to be
// an underestimate, but not too much of an underestimate.
ArrayList al = new ArrayList(s.length() / 10);
throw new Error("DLC unimplemented");
// TODO(DLC)[EWTS->Tibetan]: use jflex, javacc or something similar
// TODO(DLC)[EWTS->Tibetan]: what about Unicode escapes like \u0f20? When do you do that? Immediately like Java source files? I think so and then we can say that oddballs like \u0f19 are valid within tsheg bars.
StringBuffer sb = new StringBuffer(s);
ExpandEscapeSequences(sb);
int sl = sb.length();
for (int i = 0; i < sl; i++) {
if (isValidInsideTshegBar(sb.charAt(i))) {
StringBuffer tbsb = new StringBuffer();
for (; i < sl; i++) {
if (isValidInsideTshegBar(sb.charAt(i)))
tbsb.append(sb.charAt(i));
else {
--i;
break;
}
}
al.add(new TString("EWTS", tbsb.toString(),
TString.TIBETAN_NON_PUNCTUATION));
} else {
if (" /;|!:=_@#$%<>()\r\n\t".indexOf(sb.charAt(i)) >= 0)
al.add(new TString("EWTS", sb.substring(i, i+1),
TString.TIBETAN_PUNCTUATION));
else
al.add(new TString("EWTS", "ERROR TODO(DLC)[EWTS->Tibetan]: this character is illegal in EWTS: " + sb.substring(i, i+1),
TString.ERROR));
}
}
return al;
}
/** Modifies the EWTS in sb such that Unicode escape sequences are
* expanded. */
public static void ExpandEscapeSequences(StringBuffer sb) {
int sl;
for (int i = 0; i < (sl = sb.length()); i++) {
if (i + "\\u00000000".length() <= sl) {
if (sb.charAt(i) == '\\' && sb.charAt(i + 1) == 'u' || sb.charAt(i + 1) == 'U') {
boolean isEscape = true;
for (int j = 0; j < "00000000".length(); j++) {
char ch = sb.charAt(i + "\\u".length() + j);
if (!((ch <= '9' && ch >= '0')
|| (ch <= 'F' && ch >= 'A')
|| (ch <= 'f' && ch >= 'a'))) {
isEscape = false;
break;
}
}
if (isEscape) {
long x = -1;
try {
BigInteger bigx = new java.math.BigInteger(sb.substring(i+2, i+10), 16);
x = bigx.longValue();
if (!(bigx.compareTo(new BigInteger("0", 16)) >= 0
&& bigx.compareTo(new BigInteger("FFFFFFFF", 16)) <= 0))
x = -1;
} catch (NumberFormatException e) {
// leave x == -1
}
if (x >= 0 && x <= 0xFFFF) {
sb.replace(i, i + "\\uXXXXyyyy".length(), new String(new char[] { (char)x }));
continue;
} else if (x >= 0x00000000L
&& x <= 0xFFFFFFFFL) {
// TODO(DLC)[EWTS->Tibetan]: do nothing? test errors al.add(new TString("EWTS", "Sorry, we don't yet support Unicode escape sequences above 0x0000FFFF! File a bug.",
//TString.ERROR));
i += "uXXXXYYYY".length();
continue;
}
}
}
}
if (i + "\\u0000".length() <= sl) {
if (sb.charAt(i) == '\\' && sb.charAt(i + 1) == 'u' || sb.charAt(i + 1) == 'U') {
boolean isEscape = true;
for (int j = 0; j < "0000".length(); j++) {
char ch = sb.charAt(i + "\\u".length() + j);
if (!((ch <= '9' && ch >= '0')
|| (ch <= 'F' && ch >= 'A')
|| (ch <= 'f' && ch >= 'a'))) {
isEscape = false;
break;
}
}
if (isEscape) {
int x = -1;
try {
if (!((x = Integer.parseInt(sb.substring(i+2, i+6), 16)) >= 0x0000
&& x <= 0xFFFF))
x = -1;
} catch (NumberFormatException e) {
// leave x == -1
}
if (x >= 0) {
sb.replace(i, i + "\\uXXXX".length(), new String(new char[] { (char)x }));
continue;
}
}
}
}
}
}
/** non-public because this is a singleton */

View file

@ -18,12 +18,10 @@ Contributor(s): ______________________________________.
package org.thdl.tib.text.ttt;
import org.thdl.util.ThdlOptions;
import java.util.ArrayList;
import junit.framework.TestCase;
import org.thdl.util.ThdlOptions;
/** Tests this package's ability to understand EWTS and turn it into
* the appropriate TMW or Unicode by throwing a lot of
@ -67,6 +65,15 @@ public class EWTStibwniniTest extends TestCase {
EWTSTest.assert_EWTS_error(ewts);
}
/** Asserts that ewts is valid EWTS. Call this for those strings
that someone might intend a stack in TMW for, but that really
mean two or more stacks in EWTS thanks to prefix rules. g+ga,
for example, might be mistakenly input as gga. If so, it's
legal EWTS because ga takes a ga prefix. */
private static void special_case(String ewts) {
assertTrue(!EWTSTest.hasEwtsError(ewts));
}
/** Tests that all of the standard stacks are treated like
* standard stacks and that none of the non-standard stacks in
* the TMW font are treated like standard stacks. I generated
@ -393,7 +400,7 @@ public class EWTStibwniniTest extends TestCase {
ewts2uni_test("N", "\u0F4E");
ewts2uni_test("Sh", "\u0F65");
ewts2uni_test("k+Sh", "\u0F69");
ewts2uni_test("k+Sh", "\u0f40\u0fb5"); // TODO(DLC)[EWTS->Tibetan]: \u0F69 instead? Shouldn't matter by the unicode standard's terms, and a tiny, separate translator on unicode-to-unicode ought to be better. But maybe change tibwn.ini?
ewts2uni_test("k+k", "\u0f40\u0f90");
ewts2uni_test("k+kh", "\u0f40\u0f91");
ewts2uni_test("k+ng", "\u0f40\u0f94");
@ -437,16 +444,16 @@ public class EWTStibwniniTest extends TestCase {
ewts2uni_test("g+m", "\u0f42\u0fa8");
ewts2uni_test("g+m+y", "\u0f42\u0fa8\u0fb1");
ewts2uni_test("g+r+y", "\u0f42\u0fb2\u0fb1");
ewts2uni_test("g+h", "\u0F43");
ewts2uni_test("g+h+g+h", "\u0f43\u0f92\u0fb7");
ewts2uni_test("g+h+ny", "\u0f43\u0f99");
ewts2uni_test("g+h+n", "\u0f43\u0fa3");
ewts2uni_test("g+h+n+y", "\u0f43\u0fa3\u0fb1");
ewts2uni_test("g+h+m", "\u0f43\u0fa8");
ewts2uni_test("g+h+l", "\u0f43\u0fb3");
ewts2uni_test("g+h+y", "\u0f43\u0fb1");
ewts2uni_test("g+h+r", "\u0f43\u0fb2");
ewts2uni_test("g+h+w", "\u0f43\u0fad");
ewts2uni_test("g+h", "\u0f42\u0fb7"); // TODO(DLC)[EWTS->Tibetan]: \u0F43 instead? Shouldn't matter by the unicode standard's terms, and a tiny, separate translator on unicode-to-unicode ought to be better. But maybe change tibwn.ini? (Same goes for every occurrence of \u0f42\u0fb7 in this file.)
ewts2uni_test("g+h+g+h", "\u0f42\u0fb7\u0f92\u0fb7");
ewts2uni_test("g+h+ny", "\u0f42\u0fb7\u0f99");
ewts2uni_test("g+h+n", "\u0f42\u0fb7\u0fa3");
ewts2uni_test("g+h+n+y", "\u0f42\u0fb7\u0fa3\u0fb1");
ewts2uni_test("g+h+m", "\u0f42\u0fb7\u0fa8");
ewts2uni_test("g+h+l", "\u0f42\u0fb7\u0fb3");
ewts2uni_test("g+h+y", "\u0f42\u0fb7\u0fb1");
ewts2uni_test("g+h+r", "\u0f42\u0fb7\u0fb2");
ewts2uni_test("g+h+w", "\u0f42\u0fb7\u0fad");
ewts2uni_test("ng+k", "\u0f44\u0f90");
ewts2uni_test("ng+k+t", "\u0f44\u0f90\u0f9f");
ewts2uni_test("ng+k+t+y", "\u0f44\u0f90\u0f9f\u0fb1");
@ -499,11 +506,11 @@ public class EWTStibwniniTest extends TestCase {
ewts2uni_test("dz+y", "\u0f5b\u0fb1");
ewts2uni_test("dz+r", "\u0f5b\u0fb2");
ewts2uni_test("dz+w", "\u0f5b\u0fad");
ewts2uni_test("dz+h", "\u0F5C");
ewts2uni_test("dz+h+y", "\u0f5c\u0fb1");
ewts2uni_test("dz+h+r", "\u0f5c\u0fb2");
ewts2uni_test("dz+h+l", "\u0f5c\u0fb3");
ewts2uni_test("dz+h+w", "\u0f5c\u0fad");
ewts2uni_test("dz+h", "\u0F5B\u0FB7"); // TODO(DLC)[EWTS->Tibetan]: 0f5c is what tibwn.ini has
ewts2uni_test("dz+h+y", "\u0f5b\u0fb7\u0fb1"); // TODO(DLC)[EWTS->Tibetan]: 0f5c is what tibwn.ini has
ewts2uni_test("dz+h+r", "\u0f5b\u0fb7\u0fb2"); // TODO(DLC)[EWTS->Tibetan]: 0f5c is what tibwn.ini has
ewts2uni_test("dz+h+l", "\u0f5b\u0fb7\u0fb3"); // TODO(DLC)[EWTS->Tibetan]: 0f5c is what tibwn.ini has
ewts2uni_test("dz+h+w", "\u0f5b\u0fb7\u0fad"); // TODO(DLC)[EWTS->Tibetan]: 0f5c is what tibwn.ini has
ewts2uni_test("ny+ts", "\u0f49\u0fa9");
ewts2uni_test("ny+ts+m", "\u0f49\u0fa9\u0fa8");
ewts2uni_test("ny+ts+y", "\u0f49\u0fa9\u0fb1");
@ -541,12 +548,16 @@ public class EWTStibwniniTest extends TestCase {
ewts2uni_test("D+y", "\u0f4c\u0fb1");
ewts2uni_test("D+r", "\u0f4c\u0fb2");
ewts2uni_test("D+w", "\u0f4c\u0fad");
ewts2uni_test("D+h", "\u0F4D");
ewts2uni_test("D+h+D+h", "\u0f4d\u0f9d");
ewts2uni_test("D+h+m", "\u0f4d\u0fa8");
ewts2uni_test("D+h+y", "\u0f4d\u0fb1");
ewts2uni_test("D+h+r", "\u0f4d\u0fb2");
ewts2uni_test("D+h+w", "\u0f4d\u0fad");
ewts2uni_test("D+h", "\u0F4C\u0FB7"); // TODO(DLC)[EWTS->Tibetan]: 0f4d is what tibwn.ini has
{
// TODO(DLC)[EWTS->Tibetan]: 0f4d is what tibwn.ini has
ewts2uni_test("D+h+D+h", "\u0f4c\u0fb7\u0f9c\u0fb7");
// TODO(DLC)[EWTS->Tibetan]: 0f9d is what tibwn.ini has
}
ewts2uni_test("D+h+m", "\u0f4c\u0fb7\u0fa8"); // TODO(DLC)[EWTS->Tibetan]: 0f4d is what tibwn.ini has
ewts2uni_test("D+h+y", "\u0f4c\u0fb7\u0fb1"); // TODO(DLC)[EWTS->Tibetan]: 0f4d is what tibwn.ini has
ewts2uni_test("D+h+r", "\u0f4c\u0fb7\u0fb2"); // TODO(DLC)[EWTS->Tibetan]: 0f4d is what tibwn.ini has
ewts2uni_test("D+h+w", "\u0f4c\u0fb7\u0fad"); // TODO(DLC)[EWTS->Tibetan]: 0f4d is what tibwn.ini has
ewts2uni_test("N+T", "\u0f4e\u0f9a");
ewts2uni_test("N+Th", "\u0f4e\u0f9b");
ewts2uni_test("N+D", "\u0f4e\u0f9c");
@ -592,7 +603,8 @@ public class EWTStibwniniTest extends TestCase {
ewts2uni_test("t+s+w", "\u0f4f\u0fb6\u0fad");
ewts2uni_test("t+r+y", "\u0f4f\u0fb2\u0fb1");
ewts2uni_test("t+w+y", "\u0f4f\u0fad\u0fb1");
ewts2uni_test("t+k+Sh", "\u0f4f\u0fb9");
ewts2uni_test("t+k+Sh", "\u0f4f\u0f90\u0fb5"); // TODO(DLC)[EWTS->Tibetan]: 0fb9 is what tibwn.ini has
ewts2uni_test("th+y", "\u0f50\u0fb1");
ewts2uni_test("th+w", "\u0f50\u0fad");
ewts2uni_test("d+g", "\u0f51\u0f92");
@ -620,14 +632,14 @@ public class EWTStibwniniTest extends TestCase {
ewts2uni_test("d+y", "\u0f51\u0fb1");
ewts2uni_test("d+r+y", "\u0f51\u0fb2\u0fb1");
ewts2uni_test("d+w+y", "\u0f51\u0fad\u0fb1");
ewts2uni_test("d+h", "\u0F52");
ewts2uni_test("d+h+n", "\u0f52\u0fa3");
ewts2uni_test("d+h+n+y", "\u0f52\u0fa3\u0fb1");
ewts2uni_test("d+h+m", "\u0f52\u0fa8");
ewts2uni_test("d+h+y", "\u0f52\u0fb1");
ewts2uni_test("d+h+r", "\u0f52\u0fb2");
ewts2uni_test("d+h+r+y", "\u0f52\u0fb2\u0fb1");
ewts2uni_test("d+h+w", "\u0f52\u0fad");
ewts2uni_test("d+h", "\u0F51\u0fb7"); // TODO(DLC)[EWTS->Tibetan]: 0f52 is what tibwn.ini has
ewts2uni_test("d+h+n", "\u0f51\u0fb7\u0fa3"); // TODO(DLC)[EWTS->Tibetan]: 0f52 is what tibwn.ini has
ewts2uni_test("d+h+n+y", "\u0f51\u0fb7\u0fa3\u0fb1"); // TODO(DLC)[EWTS->Tibetan]: 0f52 is what tibwn.ini has
ewts2uni_test("d+h+m", "\u0f51\u0fb7\u0fa8"); // TODO(DLC)[EWTS->Tibetan]: 0f52 is what tibwn.ini has
ewts2uni_test("d+h+y", "\u0f51\u0fb7\u0fb1"); // TODO(DLC)[EWTS->Tibetan]: 0f52 is what tibwn.ini has
ewts2uni_test("d+h+r", "\u0f51\u0fb7\u0fb2"); // TODO(DLC)[EWTS->Tibetan]: 0f52 is what tibwn.ini has
ewts2uni_test("d+h+r+y", "\u0f51\u0fb7\u0fb2\u0fb1"); // TODO(DLC)[EWTS->Tibetan]: 0f52 is what tibwn.ini has
ewts2uni_test("d+h+w", "\u0f51\u0fb7\u0fad"); // TODO(DLC)[EWTS->Tibetan]: 0f52 is what tibwn.ini has
ewts2uni_test("n+k", "\u0f53\u0f90");
ewts2uni_test("n+k+t", "\u0f53\u0f90\u0f9f");
ewts2uni_test("n+g+h", "\u0f53\u0f92\u0fb7");
@ -651,7 +663,7 @@ public class EWTStibwniniTest extends TestCase {
ewts2uni_test("n+d+h+r", "\u0f53\u0fa1\u0fb7\u0fb2");
ewts2uni_test("n+d+h+y", "\u0f53\u0fa1\u0fb7\u0fb1");
ewts2uni_test("n+n", "\u0f53\u0fa3");
ewts2uni_test("n+n+y", "\u0f53\u0fa3\u0f61");
ewts2uni_test("n+n+y", "\u0f53\u0fa3\u0fb1");
ewts2uni_test("n+p", "\u0f53\u0fa4");
ewts2uni_test("n+p+r", "\u0f53\u0fa4\u0fb2");
ewts2uni_test("n+ph", "\u0f53\u0fa5");
@ -692,13 +704,13 @@ public class EWTStibwniniTest extends TestCase {
ewts2uni_test("b+b+h", "\u0f56\u0fa6\u0fb7");
ewts2uni_test("b+b+h+y", "\u0f56\u0fa6\u0fb7\u0fb1");
ewts2uni_test("b+m", "\u0f56\u0fa8");
ewts2uni_test("b+h", "\u0F57");
ewts2uni_test("b+h+N", "\u0f57\u0f9e");
ewts2uni_test("b+h+n", "\u0f57\u0fa3");
ewts2uni_test("b+h+m", "\u0f57\u0fa8");
ewts2uni_test("b+h+y", "\u0f57\u0fb1");
ewts2uni_test("b+h+r", "\u0f57\u0fb2");
ewts2uni_test("b+h+w", "\u0f57\u0fad");
ewts2uni_test("b+h", "\u0F56\u0fb7"); // TODO(DLC)[EWTS->Tibetan]: 0f57 is what tibwn.ini has
ewts2uni_test("b+h+N", "\u0f56\u0fb7\u0f9e"); // TODO(DLC)[EWTS->Tibetan]: 0f57 is what tibwn.ini has
ewts2uni_test("b+h+n", "\u0f56\u0fb7\u0fa3"); // TODO(DLC)[EWTS->Tibetan]: 0f57 is what tibwn.ini has
ewts2uni_test("b+h+m", "\u0f56\u0fb7\u0fa8"); // TODO(DLC)[EWTS->Tibetan]: 0f57 is what tibwn.ini has
ewts2uni_test("b+h+y", "\u0f56\u0fb7\u0fb1"); // TODO(DLC)[EWTS->Tibetan]: 0f57 is what tibwn.ini has
ewts2uni_test("b+h+r", "\u0f56\u0fb7\u0fb2"); // TODO(DLC)[EWTS->Tibetan]: 0f57 is what tibwn.ini has
ewts2uni_test("b+h+w", "\u0f56\u0fb7\u0fad"); // TODO(DLC)[EWTS->Tibetan]: 0f57 is what tibwn.ini has
ewts2uni_test("m+ny", "\u0f58\u0f99");
ewts2uni_test("m+N", "\u0f58\u0f9e");
ewts2uni_test("m+n", "\u0f58\u0fa3");
@ -736,13 +748,13 @@ public class EWTStibwniniTest extends TestCase {
ewts2uni_test("r+t+s+n+y", "\u0f62\u0f9f\u0fb6\u0fa3\u0fb1");
ewts2uni_test("r+th", "\u0f62\u0fa0");
ewts2uni_test("r+th+y", "\u0f62\u0fa0\u0fb1");
ewts2uni_test("r+d+d+h", "\u0f62\u0fa1\u0fa2");
ewts2uni_test("r+d+d+h+y", "\u0f62\u0fa1\u0fa2\u0fb1");
ewts2uni_test("r+d+d+h", "\u0f62\u0fa1\u0fa1\u0fb7"); // TODO(DLC)[EWTS->Tibetan]: 0fa2 is what tibwn.ini has
ewts2uni_test("r+d+d+h+y", "\u0f62\u0fa1\u0fa1\u0fb7\u0fb1"); // TODO(DLC)[EWTS->Tibetan]: 0fa2 is what tibwn.ini has
ewts2uni_test("r+d+y", "\u0f62\u0fa1\u0fb1");
ewts2uni_test("r+d+h", "\u0f62\u0fa1\u0fb7");
ewts2uni_test("r+d+h+m", "\u0f62\u0fa1\u0fb7\u0fa8");
ewts2uni_test("r+d+h+y", "\u0f62\u0fa2\u0fb1");
ewts2uni_test("r+d+h+r", "\u0f62\u0fa2\u0fb2");
ewts2uni_test("r+d+h", "\u0f62\u0fa1\u0fb7"); // TODO(DLC)[EWTS->Tibetan]: 0fa2 is what tibwn.ini has
ewts2uni_test("r+d+h+m", "\u0f62\u0fa1\u0fb7\u0fa8"); // TODO(DLC)[EWTS->Tibetan]: 0fa2 is what tibwn.ini has
ewts2uni_test("r+d+h+y", "\u0f62\u0fa1\u0fb7\u0fb1"); // TODO(DLC)[EWTS->Tibetan]: 0fa2 is what tibwn.ini has
ewts2uni_test("r+d+h+r", "\u0f62\u0fa1\u0fb7\u0fb2"); // TODO(DLC)[EWTS->Tibetan]: 0fa2 is what tibwn.ini has
ewts2uni_test("r+p", "\u0f62\u0fa4");
ewts2uni_test("r+b+p", "\u0f62\u0fa6\u0fa4");
ewts2uni_test("r+b+b", "\u0f62\u0fa6\u0fa6");
@ -780,22 +792,22 @@ public class EWTStibwniniTest extends TestCase {
assert_EWTS_error("khkha");
assert_EWTS_error("khna");
assert_EWTS_error("khla");
assert_EWTS_error("gga");
special_case("gga");
assert_EWTS_error("ggha");
assert_EWTS_error("gnya");
assert_EWTS_error("gda");
special_case("gnya");
special_case("gda");
assert_EWTS_error("gdha");
assert_EWTS_error("gdhya");
assert_EWTS_error("gdhwa");
assert_EWTS_error("gna");
assert_EWTS_error("gnya");
assert_EWTS_error("gpa");
special_case("gna");
special_case("gnya");
special_case("gpa");
assert_EWTS_error("gbha");
assert_EWTS_error("gbhya");
assert_EWTS_error("gma");
assert_EWTS_error("gmya");
special_case("gma");
special_case("gmya");
assert_EWTS_error("grya");
assert_EWTS_error("gha");
special_case("gha");
assert_EWTS_error("ghgha");
assert_EWTS_error("ghnya");
assert_EWTS_error("ghna");
@ -803,8 +815,8 @@ public class EWTStibwniniTest extends TestCase {
assert_EWTS_error("ghma");
assert_EWTS_error("ghla");
assert_EWTS_error("ghya");
assert_EWTS_error("ghra");
assert_EWTS_error("ghwa");
special_case("ghra");
special_case("ghwa");
assert_EWTS_error("ngka");
assert_EWTS_error("ngkta");
assert_EWTS_error("ngktya");
@ -939,7 +951,7 @@ public class EWTStibwniniTest extends TestCase {
assert_EWTS_error("tmya");
assert_EWTS_error("tya");
assert_EWTS_error("trna");
assert_EWTS_error("tsa");
special_case("tsa");
assert_EWTS_error("tstha");
assert_EWTS_error("tsna");
assert_EWTS_error("tsnya");
@ -947,45 +959,45 @@ public class EWTStibwniniTest extends TestCase {
assert_EWTS_error("tsmya");
assert_EWTS_error("tsya");
assert_EWTS_error("tsra");
assert_EWTS_error("tswa");
special_case("tswa");
assert_EWTS_error("trya");
assert_EWTS_error("twya");
assert_EWTS_error("tkSha");
assert_EWTS_error("thya");
assert_EWTS_error("thwa");
assert_EWTS_error("dga");
assert_EWTS_error("dgya");
assert_EWTS_error("dgra");
special_case("dga");
special_case("dgya");
special_case("dgra");
assert_EWTS_error("dgha");
assert_EWTS_error("dghra");
assert_EWTS_error("ddza");
assert_EWTS_error("dda");
special_case("ddza");
special_case("dda");
assert_EWTS_error("ddya");
assert_EWTS_error("ddra");
assert_EWTS_error("ddwa");
special_case("ddra");
special_case("ddwa");
assert_EWTS_error("ddha");
assert_EWTS_error("ddhna");
assert_EWTS_error("ddhya");
assert_EWTS_error("ddhra");
assert_EWTS_error("ddhwa");
assert_EWTS_error("dna");
assert_EWTS_error("dba");
assert_EWTS_error("dbra");
special_case("dna");
special_case("dba");
special_case("dbra");
assert_EWTS_error("dbha");
assert_EWTS_error("dbhya");
assert_EWTS_error("dbhra");
assert_EWTS_error("dma");
assert_EWTS_error("dya");
special_case("dma");
special_case("dya");
assert_EWTS_error("drya");
assert_EWTS_error("dwya");
assert_EWTS_error("dha");
special_case("dha");
assert_EWTS_error("dhna");
assert_EWTS_error("dhnya");
assert_EWTS_error("dhma");
assert_EWTS_error("dhya");
assert_EWTS_error("dhra");
special_case("dhra");
assert_EWTS_error("dhrya");
assert_EWTS_error("dhwa");
special_case("dhwa");
assert_EWTS_error("nka");
assert_EWTS_error("nkta");
assert_EWTS_error("ngha");
@ -1016,7 +1028,7 @@ public class EWTStibwniniTest extends TestCase {
assert_EWTS_error("nma");
assert_EWTS_error("nbhya");
assert_EWTS_error("ntsa");
assert_EWTS_error("nya");
special_case("nya");
assert_EWTS_error("nra");
assert_EWTS_error("nwa");
assert_EWTS_error("nwya");
@ -1039,39 +1051,39 @@ public class EWTStibwniniTest extends TestCase {
assert_EWTS_error("pswa");
assert_EWTS_error("psya");
assert_EWTS_error("bgha");
assert_EWTS_error("bdza");
assert_EWTS_error("bda");
special_case("bdza");
special_case("bda");
assert_EWTS_error("bddza");
assert_EWTS_error("bdha");
assert_EWTS_error("bdhwa");
assert_EWTS_error("bta");
assert_EWTS_error("bna");
assert_EWTS_error("bba");
special_case("bta");
special_case("bna");
special_case("bba");
assert_EWTS_error("bbha");
assert_EWTS_error("bbhya");
assert_EWTS_error("bma");
assert_EWTS_error("bha");
special_case("bma");
special_case("bha");
assert_EWTS_error("bhNa");
assert_EWTS_error("bhna");
assert_EWTS_error("bhma");
assert_EWTS_error("bhya");
assert_EWTS_error("bhra");
assert_EWTS_error("bhwa");
assert_EWTS_error("mnya");
assert_EWTS_error("mNa");
assert_EWTS_error("mna");
assert_EWTS_error("mnya");
assert_EWTS_error("mpa");
assert_EWTS_error("mpra");
assert_EWTS_error("mpha");
assert_EWTS_error("mba");
special_case("bhra");
special_case("bhwa");
special_case("mnya");
special_case("mNa"); // TODO(DLC)[EWTS->Tibetan]: do prefix rules really allow mNa? I think not.
special_case("mna");
special_case("mnya");
special_case("mpa");
special_case("mpra");
special_case("mpha");
special_case("mba");
assert_EWTS_error("mbha");
assert_EWTS_error("mbhya");
assert_EWTS_error("mma");
assert_EWTS_error("mla");
assert_EWTS_error("mwa");
assert_EWTS_error("msa");
assert_EWTS_error("mha");
special_case("mma");
special_case("mla");
special_case("mwa");
special_case("msa");
special_case("mha");
assert_EWTS_error("yYa");
assert_EWTS_error("yra");
assert_EWTS_error("ywa");
@ -1089,7 +1101,7 @@ public class EWTStibwniniTest extends TestCase {
assert_EWTS_error("rNa");
assert_EWTS_error("rtwa");
assert_EWTS_error("rtta");
assert_EWTS_error("rtsa");
special_case("rtsa");
assert_EWTS_error("rtsna");
assert_EWTS_error("rtsnya");
assert_EWTS_error("rtha");

View file

@ -18,11 +18,11 @@ Contributor(s): ______________________________________.
package org.thdl.tib.text.ttt;
import java.util.HashMap;
import org.thdl.util.ThdlDebug;
import org.thdl.util.ThdlOptions;
import java.util.HashMap;
/** A noninstantiable class that knows about every user-visible error
* or warning message. Each has a unique integer key starting at 101
* for those messages that are errors and starting at 501 for those
@ -96,7 +96,8 @@ public class ErrorsAndWarnings {
messages that take more than one "parameter", if you will,
like message 501. */
static String getMessage(int code, boolean shortMessages,
String translit) {
String translit,
TTraits traits) {
// Let's make sure that no unknown code is used during
// development:
ThdlDebug.verify("unknown code " + code,
@ -123,27 +124,35 @@ public class ErrorsAndWarnings {
return "" + code + ": There's not even a unique, non-illegal parse for {" + translit + "}";
case 102:
ThdlDebug.verify(traits.isACIP());
return "" + code + ": Found an open bracket, '" + translit + "', within a [#COMMENT]-style comment. Brackets may not appear in comments.";
case 103:
ThdlDebug.verify(traits.isACIP());
return "" + code + ": Found a truly unmatched close bracket, '" + translit + "'.";
case 104: // See also 140
ThdlDebug.verify(traits.isACIP());
return "" + code + ": Found a closing bracket, '" + translit + "', without a matching open bracket. Perhaps a [#COMMENT] incorrectly written as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], caused this.";
case 105:
ThdlDebug.verify(traits.isACIP());
return "" + code + ": Found a truly unmatched open bracket, '[' or '{', prior to this current illegal open bracket, '" + translit + "'.";
case 106: // see also 139
ThdlDebug.verify(traits.isACIP());
return "" + code + ": Found an illegal open bracket (in context, this is '" + translit + "'). Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open bracket?";
case 107:
ThdlDebug.verify(traits.isACIP());
return "" + code + ": Found an illegal at sign, @ (in context, this is " + translit + "). This folio marker has a period, '.', at the end of it, which is illegal.";
case 108:
ThdlDebug.verify(traits.isACIP());
return "" + code + ": Found an illegal at sign, @ (in context, this is " + translit + "). This folio marker is not followed by whitespace, as is expected.";
case 109:
ThdlDebug.verify(traits.isACIP());
return "" + code + ": Found an illegal at sign, @ (in context, this is " + translit + "). @012B is an example of a legal folio marker.";
case 110:
@ -152,21 +161,26 @@ public class ErrorsAndWarnings {
/////NYA/. We warn about // for this reason. \\ causes
a tsheg-bar //error.
*/
ThdlDebug.verify(traits.isACIP());
return "" + code + ": Found //, which could be legal (the Unicode would be \\u0F3C\\u0F3D), but is likely in an illegal construct like //NYA\\\\.";
case 111:
ThdlDebug.verify(traits.isACIP());
return "" + code + ": Found an illegal open parenthesis, '('. Nesting of parentheses is not allowed.";
case 112:
ThdlDebug.verify(traits.isACIP());
return "" + code + ": Unexpected closing parenthesis, ')', found.";
case 113:
return "" + code + ": The ACIP {?}, found alone, may intend U+0F08, but it may intend a question mark, i.e. '?', in the output. It may even mean that the original text could not be deciphered with certainty, like the ACIP {[?]} does.";
ThdlDebug.verify(traits.isACIP());
return "" + code + ": The " + traits.shortTranslitName() + " {?}, found alone, may intend U+0F08, but it may intend a question mark, i.e. '?', in the output. It may even mean that the original text could not be deciphered with certainty, like the " + traits.shortTranslitName() + " {[?]} does.";
case 114:
return "" + code + ": Found an illegal, unprintable character.";
case 115:
ThdlDebug.verify(traits.isACIP());
return "" + code + ": Found a backslash, \\, which the ACIP Tibetan Input Code standard says represents a Sanskrit virama. In practice, though, this is so often misused (to represent U+0F3D) that {\\} always generates this error. If you want a Sanskrit virama, change the input document to use {\\u0F84} instead of {\\}. If you want U+0F3D, use {/NYA/} or {/NYA\\u0F3D}.";
case 116:
@ -174,37 +188,44 @@ public class ErrorsAndWarnings {
return "" + code + ": Found an illegal character, '" + translit + "', with ordinal (in decimal) " + (int)translit.charAt(0) + ".";
case 117:
ThdlDebug.verify(traits.isACIP());
return "" + code + ": Unexpected end of input; truly unmatched open bracket found.";
case 118:
ThdlDebug.verify(traits.isACIP());
return "" + code + ": Unmatched open bracket found. A comment does not terminate.";
case 119:
ThdlDebug.verify(traits.isACIP());
return "" + code + ": Unmatched open bracket found. A correction does not terminate.";
case 120:
ThdlDebug.verify(traits.isACIP());
return "" + code + ": Slashes are supposed to occur in pairs, but the input had an unmatched '/' character.";
case 121:
ThdlDebug.verify(traits.isACIP());
return "" + code + ": Parentheses are supposed to occur in pairs, but the input had an unmatched parenthesis, '('.";
case 122:
return "" + code + ": Warning, empty tsheg bar found while converting from ACIP!";
return "" + code + ": Warning, empty tsheg bar found while converting from " + traits.shortTranslitName() + "!";
case 123:
return "" + code + ": Cannot convert ACIP {" + translit + "} because it contains a number but also a non-number.";
return "" + code + ": Cannot convert " + traits.shortTranslitName() + " {" + translit + "} because it contains a number but also a non-number.";
case 124:
ThdlDebug.verify(traits.isACIP());
return "" + code + ": Cannot convert ACIP {" + translit + "} because {V}, wa-zur, appears without being subscribed to a consonant.";
case 125:
ThdlDebug.verify(traits.isACIP());
return "" + code + ": Cannot convert ACIP {" + translit + "} because we would be required to assume that {A} is a consonant, when it is not clear if it is a consonant or a vowel.";
case 126:
return "" + code + ": Cannot convert ACIP {" + translit + "} because it ends with a '+'.";
return "" + code + ": Cannot convert " + traits.shortTranslitName() + " {" + translit + "} because it ends with a '+'.";
case 127:
return "" + code + ": Cannot convert ACIP {" + translit + "} because it ends with a '-'.";
return "" + code + ": Cannot convert " + traits.shortTranslitName() + " {" + translit + "} because it ends with a disambiguator (i.e., '" + traits.disambiguator() + "').";
case 128: // fall through
case 129:
@ -214,13 +235,14 @@ public class ErrorsAndWarnings {
return "" + code + ": The tsheg bar (\"syllable\") {" + translit + "} is essentially nothing.";
case 131:
ThdlDebug.verify(traits.isACIP());
return "" + code + ": The ACIP caret, {^}, must precede a tsheg bar.";
case 132:
return "" + code + ": The ACIP {" + translit + "} must be glued to the end of a tsheg bar, but this one was not.";
return "" + code + ": The " + traits.shortTranslitName() + " {" + translit + "} must be glued to the end of a tsheg bar, but this one was not.";
case 133:
return "" + code + ": Cannot convert the ACIP {" + translit + "} to Tibetan because it is unclear what the result should be. The correct output would likely require special mark-up.";
return "" + code + ": Cannot convert the " + traits.shortTranslitName() + " {" + translit + "} to Tibetan because it is unclear what the result should be. The correct output would likely require special mark-up.";
case 134:
return "" + code + ": The tsheg bar (\"syllable\") {" + translit + "} has no legal parses.";
@ -241,21 +263,26 @@ public class ErrorsAndWarnings {
// See also 106.
case 139:
ThdlDebug.verify(traits.isACIP());
return "" + code + ": Found an illegal open bracket (in context, this is '" + translit + "'). There is no matching closing bracket.";
case 140:
// see also 104
ThdlDebug.verify(traits.isACIP());
ThdlDebug.verify(translit.length() == 1);
return "" + code + ": Unmatched closing bracket, '" + translit + "', found. Pairs are expected, as in [#THIS] or [THAT]. Nesting is not allowed.";
case 141:
ThdlDebug.verify(traits.isACIP());
ThdlDebug.verify(translit.length() == 1);
return "" + code + ": While waiting for a closing bracket, an opening bracket, '" + translit + "', was found instead. Nesting of bracketed expressions is not permitted.";
case 142: // this number is referenced in error 143's message
ThdlDebug.verify(traits.isACIP());
return "" + code + ": Because you requested conversion to a Unicode text file, there is no way to indicate that the font size is supposed to decrease starting here and continuing until error 143. That is, this is the beginning of a region in YIG CHUNG.";
case 143: // this number is referenced in error 142's message
ThdlDebug.verify(traits.isACIP());
return "" + code + ": Because you requested conversion to a Unicode text file, there is no way to indicate that the font size is supposed to increase (go back to the size it was before the last error 142, that is) starting here. That is, this is the end of a region in YIG CHUNG.";
@ -270,27 +297,32 @@ public class ErrorsAndWarnings {
return "" + code + ": The last stack does not have a vowel in {" + translit + "}; this may indicate a typo, because Sanskrit, which this probably is (because it's not legal Tibetan), should have a vowel after each stack.";
case 503:
ThdlDebug.verify(traits.isACIP());
return "" + code + ": Though {" + translit + "} is unambiguous, it would be more computer-friendly if '+' signs were used to stack things because there are two (or more) ways to interpret this ACIP if you're not careful.";
case 504:
ThdlDebug.verify(traits.isACIP());
return "" + code + ": The ACIP {" + translit + "} is treated by this converter as U+0F35, but sometimes might represent U+0F14 in practice. To avoid seeing this warning again, change the input to use {\\u0F35} instead of {" + translit + "}.";
case 505:
return "" + code + ": There is a useless disambiguator in {" + translit + "}.";
case 506:
ThdlDebug.verify(traits.isACIP());
return "" + code + ": There is a stack of three or more consonants in {" + translit + "} that uses at least one '+' but does not use a '+' between each consonant.";
case 507:
ThdlDebug.verify(traits.isACIP());
return "" + code + ": There is a chance that the ACIP {" + translit + "} was intended to represent more consonants than we parsed it as representing -- GHNYA, e.g., means GH+NYA, but you can imagine seeing GH+N+YA and typing GHNYA for it too."; // TMW has glyphs for both GH+N+YA (G+H+N+YA) and GH+NYA (G+H+NYA).
case 508: // see 509 also
return "" + code + ": The ACIP {" + translit + "} has been interpreted as two stacks, not one, but you may wish to confirm that the original text had two stacks as it would be an easy mistake to make to see one stack (because there is such a stack used in Sanskrit transliteration for this particular sequence) and forget to input it with '+' characters.";
return "" + code + ": The " + traits.shortTranslitName() + " {" + translit + "} has been interpreted as two stacks, not one, but you may wish to confirm that the original text had two stacks as it would be an easy mistake to make to see one stack (because there is such a stack used in Sanskrit transliteration for this particular sequence) and forget to input it with '+' characters.";
case 509: // see 508 also
return "" + code + ": The ACIP {" + translit + "} has an initial sequence that has been interpreted as two stacks, a prefix and a root stack, not one nonnative stack, but you may wish to confirm that the original text had two stacks as it would be an easy mistake to make to see one stack (because there is such a stack used in Sanskrit transliteration for this particular sequence) and forget to input it with '+' characters.";
return "" + code + ": The " + traits.shortTranslitName() + " {" + translit + "} has an initial sequence that has been interpreted as two stacks, a prefix and a root stack, not one nonnative stack, but you may wish to confirm that the original text had two stacks as it would be an easy mistake to make to see one stack (because there is such a stack used in Sanskrit transliteration for this particular sequence) and forget to input it with '+' characters.";
case 510:
ThdlDebug.verify(traits.isACIP());
return "" + code + ": A non-breaking tsheg, '" + translit + "', appeared, but not like \"...,\" or \".,\" or \".dA\" or \".DA\".";
@ -298,9 +330,10 @@ public class ErrorsAndWarnings {
// ERROR 137 and WARNING 511 are the same:
case 137: /* fall through */
case 511:
return "" + code + ": The ACIP {" + translit + "} cannot be represented with the TibetanMachine or TibetanMachineWeb fonts because no such glyph exists in these fonts. The TibetanMachineWeb font has only a limited number of ready-made, precomposed glyphs, and {" + translit + "} is not one of them.";
return "" + code + ": The " + traits.shortTranslitName() + " {" + translit + "} cannot be represented with the TibetanMachine or TibetanMachineWeb fonts because no such glyph exists in these fonts. The TibetanMachineWeb font has only a limited number of ready-made, precomposed glyphs, and {" + translit + "} is not one of them.";
case 512:
ThdlDebug.verify(traits.isACIP());
return "" + code + ": There is a chance that the ACIP {" + translit + "} was intended to represent more consonants than we parsed it as representing -- GHNYA, e.g., means GH+NYA, but you can imagine seeing GH+N+YA and typing GHNYA for it too. In fact, there are glyphs in the Tibetan Machine font for N+N+Y, N+G+H, G+N+Y, G+H+N+Y, T+N+Y, T+S+TH, T+S+N, T+S+N+Y, TS+NY, TS+N+Y, H+N+Y, M+N+Y, T+S+M, T+S+M+Y, T+S+Y, T+S+R, T+S+V, N+T+S, T+S, S+H, R+T+S, R+T+S+N, R+T+S+N+Y, and N+Y, indicating the importance of these easily mistyped stacks, so the possibility is very real.";
@ -391,11 +424,11 @@ public class ErrorsAndWarnings {
severityMap.put(new Integer(num), (null != opt) ? opt : defaultSeverities[num - 501]);
}
// DLC FIXME: make 506 an error? or a new, super-high priority class of warning?
// TODO(DLC)[EWTS->Tibetan] FIXME: make 506 an error? or a new, super-high priority class of warning?
}
/** Prints out the long forms of the error messages, which will
help a user to decipher the short forms. */
help a user to decipher the short forms. TODO(DLC)[EWTS->Tibetan]: ACIP only */
public static void printErrorAndWarningDescriptions(java.io.PrintStream out) {
final String translit = "X";
out.println("ACIP->Tibetan ERRORS are as follows, and appear in their short forms, embedded");
@ -407,7 +440,8 @@ public class ErrorsAndWarnings {
} else if (129 == num) {
out.println("129: Cannot convert ACIP {" + translit + "} because " + "+" + " is not an ACIP consonant.");
} else {
out.println(getMessage(num, false, translit));
out.println(getMessage(num, false, translit,
ACIPTraits.instance()));
}
out.println("");
}
@ -419,7 +453,8 @@ public class ErrorsAndWarnings {
if (501 == num) {
out.println("501: Using " + translit + ", but only because the tool's knowledge of prefix rules (see the documentation) says that " + "XX" + " is not a legal Tibetan tsheg bar (\"syllable\")");
} else {
out.println(getMessage(num, false, translit));
out.println(getMessage(num, false, translit,
ACIPTraits.instance()));
}
out.println("");
}

View file

@ -21,10 +21,10 @@ Contributor(s): ______________________________________.
package org.thdl.tib.text.ttt;
import org.thdl.util.ThdlOptions;
import junit.framework.TestCase;
import org.thdl.util.ThdlOptions;
/** Tests ACIP-to-Tibetan conversions using tsheg bars from real ACIP
* files. Lots of tsheg bars.

View file

@ -18,12 +18,12 @@ Contributor(s): ______________________________________.
package org.thdl.tib.text.ttt;
import org.thdl.util.ThdlOptions;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.StringTokenizer;
import org.thdl.util.ThdlOptions;
/** MidLexSubstitution is a hack that lets the end user clumsily fix
* the EWTS-to-Tibetan and ACIP-to-Tibetan converters without having
* to modify the source code.

View file

@ -21,12 +21,12 @@ Contributor(s): ______________________________________.
package org.thdl.tib.text.ttt;
import org.thdl.util.ThdlOptions;
import java.util.ArrayList;
import junit.framework.TestCase;
import org.thdl.util.ThdlOptions;
/** Tests this package, especially {@link #TPairListFactory} and
* {@link TPairList}. Tests use ACIP more than EWTS.
@ -275,7 +275,8 @@ public class PackageTest extends TestCase {
String[] expectedLegalParses,
String expectedBestParse,
int pairListToUse) {
TPairList[] la = TPairListFactory.breakACIPIntoChunks(acip, true);
TPairList[] la
= ACIPTraits.instance().breakTshegBarIntoChunks(acip, true);
TPairList l = la[(pairListToUse == -1) ? 0 : ((pairListToUse >= 1) ? 1 : pairListToUse)];
if (sdebug || debug)
System.out.println("ACIP=" + acip + " and l'=" + l);
@ -302,9 +303,9 @@ public class PackageTest extends TestCase {
return;
} else {
String s;
if ((s = pt.getWarning("Most", l, acip, false)) != null) {
if ((s = pt.getWarning("Most", l, acip, false, ACIPTraits.instance())) != null) {
System.out.println(s);
} else if ((s = pt.getWarning("All", l, acip, false)) != null)
} else if ((s = pt.getWarning("All", l, acip, false, ACIPTraits.instance())) != null)
if (sdebug || debug) System.out.println("Paranoiac warning is this: " + s);
}
int np = pt.numberOfParses();
@ -447,9 +448,9 @@ public class PackageTest extends TestCase {
tstHelper("9012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678");
}
/** Tests {@link TPairListFactory#breakACIPIntoChunks(String,
* boolean)}, {@link TPairList#getACIPError(String, boolean)}, and {@link
* TPairList#recoverACIP()}. */
/** Tests {@link ACIPTraits#breakTshegBarIntoChunks(String,
* boolean)}, {@link TPairList#getACIPError(String, boolean)},
* and {@link TPairList#recoverACIP()}. */
public void testBreakACIPIntoChunks() {
tstHelper("GASN"); // ambiguous with regard to prefix rules
tstHelper("BARMA"); // ambiguous with regard to prefix rules

View file

@ -18,9 +18,9 @@ Contributor(s): ______________________________________.
package org.thdl.tib.text.ttt;
import java.util.ArrayList;
import java.util.ListIterator;
import java.util.NoSuchElementException;
import java.util.ArrayList;
/** An object that can iterate over an {@link TParseTree}.
*

View file

@ -18,16 +18,19 @@ Contributor(s): ______________________________________.
package org.thdl.tib.text.ttt;
import java.io.*;
import java.util.ArrayList;
import java.util.Stack;
import java.awt.Color;
import java.io.BufferedWriter;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.util.ArrayList;
import org.thdl.util.ThdlDebug;
import org.thdl.util.ThdlOptions;
import org.thdl.tib.text.DuffCode;
import org.thdl.tib.text.TibetanDocument;
import org.thdl.tib.text.TibetanMachineWeb;
import org.thdl.tib.text.DuffCode;
import org.thdl.util.ThdlDebug;
import org.thdl.util.ThdlOptions;
// TODO(DLC)[EWTS->Tibetan]: THis class is broken for ewts. But kill this class unless it needs to exist.
/**
@ -338,9 +341,9 @@ public class TConverter {
if (smallFontSize >= regularFontSize)
smallFontSize = regularFontSize - 1;
if (colors)
tdoc.enableColors();
TibetanDocument.enableColors();
else
tdoc.disableColors();
TibetanDocument.disableColors();
}
int sz = scan.size();
@ -371,7 +374,8 @@ public class TConverter {
}
} else if (stype == TString.TSHEG_BAR_ADORNMENT) {
if (lastGuyWasNonPunct) {
String err = "[#ERROR " + ErrorsAndWarnings.getMessage(133, shortMessages, s.getText()) + "]";
String err = "[#ERROR " + ErrorsAndWarnings.getMessage(133, shortMessages, s.getText(),
ttraits) + "]";
if (null != writer) {
String uni = ttraits.getUnicodeFor(s.getText(), false);
if (null == uni) {
@ -434,7 +438,9 @@ public class TConverter {
Object[] duff = null;
if (stype == TString.TIBETAN_NON_PUNCTUATION) {
lastGuyWasNonPunct = true;
TPairList pls[] = TPairListFactory.breakACIPIntoChunks(s.getText(), false);
TPairList pls[]
= ttraits.breakTshegBarIntoChunks(s.getText(),
false);
String acipError;
if ((acipError = pls[0].getACIPError(s.getText(), shortMessages)) != null
@ -457,7 +463,8 @@ public class TConverter {
hasErrors = true;
String errorMessage
= ("[#ERROR "
+ ErrorsAndWarnings.getMessage(130, shortMessages, s.getText())
+ ErrorsAndWarnings.getMessage(130, shortMessages, s.getText(),
ttraits)
+ "]");
if (null != writer) writer.write(errorMessage);
if (null != tdoc) {
@ -478,7 +485,8 @@ public class TConverter {
"[#ERROR "
+ ErrorsAndWarnings.getMessage(134,
shortMessages,
s.getText())
s.getText(),
ttraits)
+ "]";
if (null != writer)
writer.write(errorMessage);
@ -516,7 +524,8 @@ public class TConverter {
warning = pt.getWarning(warningLevel,
pl,
s.getText(),
shortMessages);
shortMessages,
ttraits);
}
if (null != warning) {
if (writeWarningsToOut) {
@ -632,7 +641,7 @@ public class TConverter {
// one) and then a comma:
peekaheadFindsSpacesAndComma(scan, i+1))) {
if (null != writer) {
unicode = " "; // DLC NOW FIXME: allow for U+00A0 between two <i>shad</i>s (0F0D or 0F0E), and optionally insert a U+200B after the <i>shad</i> following the whitespace so that stupid software will break lines more nicely
unicode = " "; // TODO(DLC)[EWTS->Tibetan]: FIXME: allow for U+00A0 between two <i>shad</i>s (0F0D or 0F0E), and optionally insert a U+200B after the <i>shad</i> following the whitespace so that stupid software will break lines more nicely
done = true;
}
if (null != tdoc) {
@ -692,7 +701,8 @@ public class TConverter {
writer.write("[ERROR "
+ ErrorsAndWarnings.getMessage(142,
shortMessages,
"(" /* hard-coded ACIP value */) + "]");
"(" /* hard-coded ACIP value */,
ttraits) + "]");
if (null != tdoc) {
tdoc.setTibetanFontSize(smallFontSize);
}
@ -702,7 +712,8 @@ public class TConverter {
writer.write("[ERROR "
+ ErrorsAndWarnings.getMessage(143,
shortMessages,
")" /* hard-coded ACIP value */) + "]");
")" /* hard-coded ACIP value. TODO(DLC)[EWTS->Tibetan]: and above*/,
ttraits) + "]");
if (null != tdoc) {
tdoc.setTibetanFontSize(regularFontSize);
}
@ -717,7 +728,8 @@ public class TConverter {
"[#ERROR "
+ ErrorsAndWarnings.getMessage(135,
shortMessages,
"" + ch)
"" + ch,
ttraits)
+ "]";
writer.write(errorMessage);
if (null != errors)
@ -729,7 +741,8 @@ public class TConverter {
"[#ERROR "
+ ErrorsAndWarnings.getMessage(138,
shortMessages,
"" + ch)
"" + ch,
ttraits)
+ "]";
writer.write(errorMessage);
if (null != errors)
@ -746,7 +759,8 @@ public class TConverter {
"[#ERROR "
+ ErrorsAndWarnings.getMessage(136,
shortMessages,
s.getText())
s.getText(),
ttraits)
+ "]";
tdoc.appendRoman(tdocLocation[0],
errorMessage,

View file

@ -19,10 +19,6 @@ Contributor(s): ______________________________________.
package org.thdl.tib.text.ttt;
import org.thdl.util.ThdlDebug;
import org.thdl.tib.text.TibetanMachineWeb;
import org.thdl.tib.text.DuffCode;
import java.util.ArrayList;
/** An ordered pair used in ACIP/EWTS-to-TMW/Unicode conversion. The
* left side is the consonant or empty; the right side is either the
@ -182,8 +178,14 @@ class TPair {
/** Returns true if this pair contains a Tibetan number. */
boolean isNumeric() {
char ch;
return (l != null && l.length() == 1 && (ch = l.charAt(0)) >= '0' && ch <= '9');
if (l != null && l.length() == 1) {
char ch = l.charAt(0);
return ((ch >= '0' && ch <= '9')
|| (ch >= '\u0f18' && ch <= '\u0f33')
|| ch == '\u0f3e' || ch == '\u0f3f');
}
return false;
// TODO(DLC)[EWTS->Tibetan]: what about half-numbers?
}
String getWylie() {
@ -209,7 +211,7 @@ class TPair {
if (null == leftWylie) leftWylie = "";
if (justLeft) return leftWylie;
String rightWylie = null;
if ("-".equals(getRight()))
if (traits.disambiguator().equals(getRight()))
rightWylie = ".";
else if ("+".equals(getRight()))
rightWylie = "+";
@ -238,8 +240,9 @@ class TPair {
consonantSB.append(x);
}
if (null != getRight()
&& !("-".equals(getRight()) || "+".equals(getRight()) || "A".equals(getRight()))) {
String x = traits.getUnicodeFor(getRight(), subscribed);
&& !(traits.disambiguator().equals(getRight())
|| "+".equals(getRight()) || traits.aVowel().equals(getRight()))) {
String x = traits.getUnicodeForWowel(getRight());
if (null == x) throw new Error("TPair: " + getRight() + " has no Uni");
vowelSB.append(x);
}

View file

@ -20,13 +20,12 @@ Contributor(s): ______________________________________.
package org.thdl.tib.text.ttt;
import org.thdl.tib.text.TibetanMachineWeb;
import org.thdl.tib.text.DuffCode;
import org.thdl.tib.text.TGCPair;
import org.thdl.util.ThdlDebug;
import java.util.HashMap;
import java.util.ArrayList;
import java.util.HashMap;
import org.thdl.tib.text.TGCPair;
import org.thdl.tib.text.TibetanMachineWeb;
import org.thdl.util.ThdlDebug;
/** A list of {@link TPair TPairs}, typically corresponding to
* one tsheg bar. <i>l</i>' in the design doc is a TPairList.
@ -101,6 +100,11 @@ class TPairList {
al.add(0, p);
}
/** Appends p to the current list of TPairs. */
public void append(TPair p) {
al.add(p);
}
/** Returns the number of TPairs in this list. */
public int size() { return al.size(); }
@ -145,12 +149,11 @@ class TPairList {
/** Returns true if this list contains ( . <vowel>) or (A . ),
* which are two simple errors you encounter if you interpret DAA
* or TAA or DAI or DAE the wrong way. TODO(DLC)[EWTS->Tibetan]: ACIP vs. EWTS */
boolean hasSimpleError(TTraits ttraits) {
boolean hasSimpleError() {
int sz = size();
for (int i = 0; i < sz; i++) {
TPair p = get(i);
if ((null == p.getLeft() && !ttraits.disambiguator().equals(p.getRight()))
|| ttraits.hasSimpleError(p))
if (traits.hasSimpleError(p))
return true;
}
return false;
@ -161,7 +164,7 @@ class TPairList {
* Returns an error message, or null if there is no error that
* you can find without the help of tsheg bar syntax rules. */
// FIXME: This is needlessly ACIP specific -- rename and change text of messages
String getACIPError(String originalACIP, boolean shortMessages) {
String getACIPError(String originalACIP, boolean shortMessages) { // TODO(DLC)[EWTS->Tibetan] misnomer.
// FIXME: this returns just the first error. List all errors
// at once.
int sz = size();
@ -169,46 +172,60 @@ class TPairList {
return ErrorsAndWarnings.getMessage(122, shortMessages,
((null != originalACIP)
? originalACIP
: ""));
: ""),
traits);
String translit
= (null != originalACIP) ? originalACIP : recoverACIP();
boolean mustBeEntirelyNumeric = get(0).isNumeric();
for (int i = 0; i < sz; i++) {
TPair p = get(i);
if (mustBeEntirelyNumeric != p.isNumeric())
return ErrorsAndWarnings.getMessage(123, shortMessages, translit);
return ErrorsAndWarnings.getMessage(123, shortMessages, translit, traits);
if ((i == 0 && "V".equals(p.getLeft()))
|| (i > 0 && "V".equals(p.getLeft())
&& (null != get(i - 1).getRight()
&& !"+".equals(get(i - 1).getRight())))) {
return ErrorsAndWarnings.getMessage(124, shortMessages, translit);
} else if ("A".equals(p.getLeft()) && (null == p.getRight() || "".equals(p.getRight()))) {
return ErrorsAndWarnings.getMessage(125, shortMessages, translit);
} else if ((null == p.getLeft() && !"-".equals(p.getRight()))
if (traits.isACIP()
&& ((i == 0 && "V".equals(p.getLeft()))
|| (i > 0 && "V".equals(p.getLeft())
&& (null != get(i - 1).getRight()
&& !"+".equals(get(i - 1).getRight()))))) {
return ErrorsAndWarnings.getMessage(124, shortMessages, translit, traits);
} else if (traits.aVowel().equals(p.getLeft())
&& (null == p.getRight()
|| "".equals(p.getRight()))) {
return ErrorsAndWarnings.getMessage(125, shortMessages, translit, traits);
} else if (null != p.getRight()
&& !"+".equals(p.getRight())
&& !traits.disambiguator().equals(p.getRight())
&& !traits.isWowel(p.getRight())
&& false /* TODO(DLC)[EWTS->Tibetan]: think about this harder. */) {
return "ErrorNumberDLC1: We don't yet support stacking vowels, convert {" + translit + "} manually.";
// TODO(DLC)[EWTS->Tibetan]: test, i think we do support it
} else if ((null == p.getLeft()
&& (!traits.disambiguator().equals(p.getRight())
&& (!traits.vowelAloneImpliesAChen()
|| !traits.aVowel().equals(p.getRight()))))
|| (null != p.getLeft()
&& !traits.isConsonant(p.getLeft())
&& (!traits.isConsonant(p.getLeft()) && (!traits.vowelAloneImpliesAChen() || !traits.aVowel().equals(p.getLeft())))
&& !p.isNumeric())) {
// FIXME: stop handling this outside of ErrorsAndWarnings:
if (null == p.getLeft()) {
if (shortMessages)
return "128: {" + translit + "}";
else
return "128: Cannot convert ACIP {" + translit + "} because " + p.getRight() + " is a \"vowel\" without an associated consonant.";
return "128: Cannot convert " + traits.shortTranslitName() + " {" + translit + "} because " + p.getRight() + " is a \"vowel\" without an associated consonant.";
} else {
if (shortMessages)
return "129: {" + translit + "}";
else
return "129: Cannot convert ACIP {" + translit + "} because " + p.getLeft() + " is not an ACIP consonant.";
return "129: Cannot convert " + traits.shortTranslitName() + " {" + translit + "} because " + p.getLeft() + " is not an " + traits.shortTranslitName() + " consonant.";
}
}
}
if ("+".equals(get(sz - 1).getRight())) {
return ErrorsAndWarnings.getMessage(126, shortMessages, translit);
return ErrorsAndWarnings.getMessage(126, shortMessages, translit, traits);
}
// FIXME: really this is a warning, not an error:
if ("-".equals(get(sz - 1).getRight())) {
return ErrorsAndWarnings.getMessage(127, shortMessages, translit);
if (traits.disambiguator().equals(get(sz - 1).getRight())) {
return ErrorsAndWarnings.getMessage(127, shortMessages, translit, traits);
}
return null;
}
@ -245,6 +262,9 @@ class TPairList {
* empty parse tree.
*/
public TParseTree getParseTree() {
// TODO(DLC)[EWTS->Tibetan]: EWTS NOTE: this is still useful for EWTS: In EWTS, bkra
// is b.k+ra, smra is s+m+ra, and tshmra is invalid.
// We treat [(B . ), (G . +), (K . ), (T . A)] as if it could
// be {B+G+K+T} or {B}{G+K+T}; we handle prefixes specially
// this way. [(T . ), (G . +), (K . ), (T . A)] is clearly
@ -254,22 +274,10 @@ class TPairList {
// master list of stacks.
int sz = size();
for (int i = 0; i < sz; i++) {
TPair p = get(i);
if (p.getLeft() == null && !"-".equals(p.getRight()))
return null; // clearly illegal.
if ("+".equals(p.getLeft()))
return null; // clearly illegal.
if (":".equals(p.getLeft()))
return null; // clearly illegal.
if ("m".equals(p.getLeft()))
return null; // clearly illegal.
if ("m:".equals(p.getLeft()))
return null; // clearly illegal.
}
for (int i = 0; i < sz; i++)
if (traits.isClearlyIllegal(get(i)))
return null;
TParseTree pt = new TParseTree();
if (sz < 1) return null;
// When we see a stretch of ACIP without a disambiguator or a
@ -387,7 +395,7 @@ class TPairList {
if ((breakLocations[1] >= 0 && breakLocations[1] <= breakLocations[0])
|| (breakLocations[2] >= 0 && breakLocations[2] <= breakLocations[1]))
throw new Error("breakLocations is monotonically increasing, ain't it?");
TParseTree pt = new TParseTree();
for (int i = 0; i < sz; i++) {
if (i+1 == sz || get(i).endsACIPStack()) {
TStackListList sll = new TStackListList(4); // maximum is 4.
@ -412,35 +420,54 @@ class TPairList {
// and only if b1 is one, etc.
for (int counter = 0; counter < (1<<numBreaks); counter++) {
TStackList sl = new TStackList();
boolean slIsInvalid = false;
TPairList currentStack = new TPairList(traits);
TPairList currentStackUnmodified = new TPairList(traits);
for (int k = startLoc; k <= i; k++) {
if (!get(k).isDisambiguator()) {
if (get(k).isNumeric()
|| (get(k).getLeft() != null
&& traits.isConsonant(get(k).getLeft())))
&& (traits.isConsonant(get(k).getLeft())
|| traits.vowelAloneImpliesAChen() && traits.aVowel().equals(get(k).getLeft())))) {
currentStack.add(get(k).insideStack());
else
currentStackUnmodified.add(get(k));
} else {
return null; // sA, for example, is illegal.
}
}
if (k == i || get(k).endsACIPStack()) {
if (!currentStack.isEmpty())
sl.add(currentStack.asStack());
if (!currentStack.isEmpty()) {
if (traits.couldBeValidStack(currentStackUnmodified)) {
sl.add(currentStack.asStack());
} else {
slIsInvalid = true;
break;
}
}
currentStack = new TPairList(traits);
currentStackUnmodified = new TPairList(traits);
} else {
if (numBreaks > 0) {
for (int j = 0; breakStart+j < 3; j++) {
if (k == breakLocations[breakStart+j]
&& 1 == ((counter >> j) & 1)) {
if (!currentStack.isEmpty())
sl.add(currentStack.asStack());
if (!currentStack.isEmpty()) {
if (traits.couldBeValidStack(currentStackUnmodified)) {
sl.add(currentStack.asStack());
} else {
slIsInvalid = true;
break;
}
}
currentStack = new TPairList(traits);
currentStackUnmodified = new TPairList(traits);
break; // shouldn't matter, but you never know
}
}
}
}
}
if (!sl.isEmpty()) {
if (!slIsInvalid && !sl.isEmpty()) {
sll.add(sl);
}
}
@ -467,7 +494,7 @@ class TPairList {
TPair lastPair = get(size() - 1);
if ("+".equals(lastPair.getRight()))
al.set(size() - 1, new TPair(traits, lastPair.getLeft(), null));
else if ("-".equals(lastPair.getRight()))
else if (traits.disambiguator().equals(lastPair.getRight()))
al.set(size() - 1, new TPair(traits, lastPair.getLeft(), null));
}
return this;
@ -507,14 +534,15 @@ class TPairList {
boolean add_U0F7F = false;
int where;
if (p.getRight() != null
&& (where = p.getRight().indexOf(':')) >= 0) {
&& (where = p.getRight().indexOf(':')) >= 0) { // TODO(DLC)[EWTS->Tibetan]
// this ':' guy is his own TGCPair.
add_U0F7F = true;
StringBuffer rr = new StringBuffer(p.getRight());
rr.deleteCharAt(where);
p = new TPair(traits, p.getLeft(), rr.toString());
}
boolean hasNonAVowel = (!"A".equals(p.getRight()) && null != p.getRight());
boolean hasNonAVowel = (!traits.aVowel().equals(p.getRight())
&& null != p.getRight());
String thislWylie = traits.getEwtsForConsonant(p.getLeft());
if (thislWylie == null) {
char ch;
@ -560,7 +588,7 @@ class TPairList {
pl.add(tp);
if (add_U0F7F) {
indexList.add(new Integer(index));
pl.add(new TGCPair("H", null, TGCPair.TYPE_OTHER));
pl.add(new TGCPair("H", null, TGCPair.TYPE_OTHER)); // TODO(DLC)[EWTS->Tibetan]
}
}
}
@ -618,7 +646,7 @@ class TPairList {
unicodeExceptionsMap.put("\u0f62\u0fb6", "\u0f6a\u0fb6"); // RS
}
String mapEntry = (String)unicodeExceptionsMap.get(nonVowelSB.toString());
if (null != mapEntry)
if (traits.isACIP() && null != mapEntry)
sb.append(mapEntry);
else
sb.append(nonVowelSB);
@ -696,11 +724,13 @@ class TPairList {
? 137
: 511,
shortMessages,
recoverACIP()));
recoverACIP(),
traits));
return;
}
}
if (lastPair.getRight() == null || lastPair.equals("-")) {
if (lastPair.getRight() == null
|| lastPair.equals(traits.disambiguator())) {
duffsAndErrors.add(TibetanMachineWeb.getGlyph(hashKey));
} else {
traits.getDuffForWowel(duffsAndErrors,

View file

@ -23,88 +23,23 @@ package org.thdl.tib.text.ttt;
/** A factory for creating {@link TPairList TPairLists} from
* Strings of ACIP.
* @author David Chandler */
// TODO(DLC)[EWTS->Tibetan]: kill this class; put it all in TTraits.
class TPairListFactory {
/** This class is not instantiable. */
private TPairListFactory() { }
/** Returns one or two new TPairList instances. Breaks an ACIP
* tsheg bar (roughly a &quot;syllable&quot;) into chunks; this
* computes l' (for you design doc enthusiasts).
*
* <p>Here's a rough sketch of the algorithm: run along getting
* the current TPair as big as you can. If you get it very
* big, but there's something illegal afterward that wouldn't
* otherwise be illegal, undo as little as possible to correct.
* For example, G'A'I becomes [(G . 'A), (' . I)], and TAA
* becomes [(T . A)] in a first pass but then we see that the
* rest would be suboptimal, so we backtrack to [(T . )] and then
* finally become [(T . ), (A . A)]. We look for (A . ) and (
* . <vowel>) in the rest in order to say "the rest would be
* suboptimal", i.e. we use TPairList.hasSimpleError(TTraits).</p>
*
* <p>There is one case where we break things up into two pair
* lists if and only if specialHandlingForAppendages is true -- I
* thought the converter had a bug because I saw SNYAM'AM in
* KD0003I2.ACT. I asked Robert Chilton, though, and he said
* "SNYAM'AM " was likely a typo for "SNYAM 'AM", so leave
* specialHandlingForAppendages false.</p>
*
* <p>I found out about (OK, as it turns out, imagined) this case
* too late to do anything clean about it. SNYAM'AM, e.g.,
* breaks up into [(S . ), (NY . A), (M . 'A), (M . )], which is
* incorrect -- [(S . ), (NY . A), (M . ), (' . A), (M . )] is
* correct. But we don't know which is correct without parsing,
* so both are returned. The clean treatment would be to lex
* into a form that didn't insist 'A was either a vowel or a
* consonant. Then the parser would figure it out. But don't
* bother, because specialHandlingForAppendages should be false
* always.</p>
*
* @param acip a string of ACIP with no punctuation in it
* @param specialHandlingForAppendages true if and only if you
* want SNYAM'AM to ultimately parse as {S+NYA}{M}{'A}{M} instead
* of {S+NYA}{M'A}{M}
* @return an array of one or two pair lists, if the former, then
* the second element will be null, if the latter, the second
* element will have (* . ), (' . *) instead of (* . '*) which
* the former has
* @throws IllegalArgumentException if acip is too large for us
* to break into chunks (we're recursive, not iterative, so the
* boundary can be increased a lot if you care, but you don't) */
static TPairList[] breakACIPIntoChunks(String acip,
boolean specialHandlingForAppendages)
throws IllegalArgumentException
{
try {
TTraits ttraits = ACIPTraits.instance();
TPairList a = breakHelperACIP(acip, true, false, ttraits);
TPairList b = null;
if (specialHandlingForAppendages)
b = breakHelperACIP(acip, false, false, ttraits);
if (null != b && a.equals(b))
return new TPairList[] { a, null };
else
return new TPairList[] { a, b };
} catch (StackOverflowError e) {
throw new IllegalArgumentException("Input too large[1]: " + acip);
} catch (OutOfMemoryError e) {
throw new IllegalArgumentException("Input too large[2]: " + acip);
}
}
/** TODO(DLC)[EWTS->Tibetan]: doc */
static TPairList[] breakEWTSIntoChunks(String ewts)
throws IllegalArgumentException
{
try {
return new TPairList[] {
breakHelperEWTS(ewts, EWTSTraits.instance()), null
};
} catch (StackOverflowError e) {
throw new IllegalArgumentException("Input too large[1]: " + ewts);
} catch (OutOfMemoryError e) {
throw new IllegalArgumentException("Input too large[2]: " + ewts);
}
/** See {@link TTraits#breakTshegBarIntoChunks}. */
static TPairList[] breakACIPIntoChunks(String tt,
boolean specialHandlingForAppendages) {
TTraits ttraits = ACIPTraits.instance();
TPairList a = breakHelperACIP(tt, true, false, ttraits);
TPairList b = null;
if (specialHandlingForAppendages)
b = breakHelperACIP(tt, false, false, ttraits);
if (null != b && a.equals(b))
return new TPairList[] { a, null };
else
return new TPairList[] { a, b };
}
/** Helps {@link #breakACIPIntoChunks(String,boolean)}.
@ -149,7 +84,7 @@ class TPairListFactory {
|| (head.getRight() != null
&& !"+".equals(head.getRight())
&& !"-".equals(head.getRight())),
ttraits)).hasSimpleError(ttraits)) {
ttraits)).hasSimpleError()) {
for (int i = 1; i < howMuch; i++) {
// try giving i characters back if that leaves us with
// a legal head and makes the rest free of simple
@ -164,7 +99,7 @@ class TPairListFactory {
|| (newHead.getRight() != null
&& !"+".equals(newHead.getRight())
&& !"-".equals(newHead.getRight())),
ttraits)).hasSimpleError(ttraits)) {
ttraits)).hasSimpleError()) {
newTail.prepend(newHead);
return newTail;
}
@ -176,6 +111,136 @@ class TPairListFactory {
return tail;
}
/** See {@link TTraits#breakTshegBarIntoChunks}. */
static TPairList[] breakEWTSIntoChunks(String ewts)
throws IllegalArgumentException
{
EWTSTraits traits = EWTSTraits.instance();
TPairList pl = breakHelperEWTS(ewts, traits);
TPairList npl = pl;
// TODO(DLC)[EWTS->Tibetan]: this crap ain't workin' for kaHM. But kaeM and kaMe shouldn't work, right? Figure out what EWTS really says...
// TODO(DLC)[EWTS->Tibetan]: for "a\\0f86" e.g.:
if (pl.size() > 1) {
npl = new TPairList(traits, pl.size());
for (int i = pl.size() - 1; i >= 1; i--) {
TPair left = pl.get(i - 1);
TPair right = pl.get(i);
if (traits.aVowel().equals(left.getRight())
&& left.getLeft() == null
&& right.getLeft() == null
&& traits.isWowelThatRequiresAChen(right.getRight())) {
npl.prepend(new TPair(traits, traits.aVowel(), right.getRight()));
--i;
} else if (traits.aVowel().equals(left.getRight())
&& left.getLeft() != null
&& right.getLeft() == null
&& traits.isWowelThatRequiresAChen(right.getRight())
&& false /* TODO(DLC)[EWTS->Tibetan]: ewts kaM is bothersome now */) {
npl.prepend(new TPair(traits, left.getLeft(), right.getRight()));
--i;
} else {
npl.prepend(right);
if (i == 1)
npl.prepend(left);
}
}
}
TPairList nnpl;
if (true) {
// Collapse ( . wowel1) ( . wowel2) into (
// . wowel1+wowel2). Then collapse (* . a) ( . x) into (*
// . x). Also, if an a-chen (\u0f68) is implied, then
// insert it.
TPairList xnnpl = new TPairList(traits, pl.size());
for (int i = 0; i < npl.size(); ) {
TPair p = npl.get(i);
int set_i_to = i + 1;
if (p.getLeft() == null
&& p.getRight() != null
&& !traits.disambiguator().equals(p.getRight())
&& !"+".equals(p.getRight())) {
StringBuffer sb = new StringBuffer(p.getRight());
for (int j = i + 1; j < npl.size(); j++) {
TPair p2 = npl.get(j);
if (p2.getLeft() == null
&& p2.getRight() != null
&& !traits.disambiguator().equals(p2.getRight())
&& !"+".equals(p2.getRight()))
{
sb.append("+" + p2.getRight());
set_i_to = j + 1;
} else {
break;
}
}
p = new TPair(traits, traits.aVowel(), sb.toString());
}
// TODO(DLC)[EWTS->Tibetan]: Do we still have "ai" converting to the wrong thing. "ae"?
xnnpl.append(p);
i = set_i_to;
}
nnpl = new TPairList(traits, pl.size());
// (* . a ) ( . x) ... ( . y) -> (* . a+x+...+y)
for (int i = 0; i < xnnpl.size(); ) {
TPair p = xnnpl.get(i);
int set_i_to = i + 1;
if (traits.aVowel().equals(p.getRight())) {
StringBuffer sb = new StringBuffer(p.getRight());
for (int j = i + 1; j < xnnpl.size(); j++) {
TPair p2 = xnnpl.get(j);
if (p2.getLeft() == null
&& p2.getRight() != null
&& !traits.disambiguator().equals(p2.getRight())
&& !"+".equals(p2.getRight()))
{
// TODO(DLC)[EWTS->Tibetan] a+o+e is what we'll get.. maybe we want just o+e?
sb.append("+" + p2.getRight());
set_i_to = j + 1;
} else {
break;
}
}
p = new TPair(traits, p.getLeft(), sb.toString());
}
if (false) { // TODO(DLC)[EWTS->Tibetan]: bra is screwed up, do in it stacklist?
// EWTS does not think that kra is k+ra. Replace
// (consonant . ) with (consonant . DISAMBIGUATOR):
if (p.getRight() == null && p.getLeft() != null
&& i + 1 < xnnpl.size())
p = new TPair(traits, p.getLeft(), traits.disambiguator());
}
nnpl.append(p);
i = set_i_to;
}
} else {
// TODO(DLC)[EWTS->Tibetan]: this block is not executing. kill it after testing and thinking
nnpl = new TPairList(traits, pl.size());
for (int i = npl.size() - 1; i >= 0; i--) {
TPair p = npl.get(i);
if (p.getLeft() == null
&& p.getRight() != null
&& !traits.disambiguator().equals(p.getRight())
&& !"+".equals(p.getRight())) /* TODO(DLC)[EWTS->Tibetan] this should be equivalent to isWowel(p.getRight()) but o+o shows that's not true yet */
p = new TPair(traits, traits.aVowel(), p.getRight());
// TODO(DLC)[EWTS->Tibetan]: do you still have "ai" converting to the wrong thing? ("ae" also?)
nnpl.prepend(p);
}
}
// TODO(DLC)[EWTS->Tibetan]: this nnpl crap was before getFirstConsonantAndVowel got fixed. Try killing it!
return new TPairList[] {
nnpl, null
};
}
// TODO(DLC)[EWTS->Tibetan]: doc
private static TPairList breakHelperEWTS(String ewts, TTraits ttraits) {
@ -190,7 +255,7 @@ class TPairListFactory {
TPairList tail;
if ((tail = breakHelperEWTS(ewtsBuf.substring(howMuch),
ttraits)).hasSimpleError(ttraits)) {
ttraits)).hasSimpleError()) {
for (int i = 1; i < howMuch; i++) {
// try giving i characters back if that leaves us with
// a legal head and makes the rest free of simple
@ -199,7 +264,7 @@ class TPairListFactory {
TPair newHead;
if ((newHead = head.minusNRightmostTransliterationCharacters(i)).isLegal()
&& !(newTail
= breakHelperEWTS(ewtsBuf.substring(howMuch - i), ttraits)).hasSimpleError(ttraits)) {
= breakHelperEWTS(ewtsBuf.substring(howMuch - i), ttraits)).hasSimpleError()) {
newTail.prepend(newHead);
return newTail;
}
@ -211,101 +276,193 @@ class TPairListFactory {
return tail;
}
/** Returns the largest TPair we can make from the acip starting
* from the left. This will return a size zero pair if and only
* if acip is the empty string; otherwise, it may return a pair
* with either the left or right component empty. This mutates
* acip when we run into {NA+YA}; it mutates acip into {N+YA}.
* For {NE+YA}, it does not mutate acip or behave intelligently.
* A later phase will need to turn that into {N+YE} or an error
* or whatever you like. howMuch[0] will be set to the number of
* characters of acip that this call has consumed. */
private static TPair getFirstConsonantAndVowel(StringBuffer acip, // TODO(DLC)[EWTS->Tibetan]: function name needs ACIP in it?
private static String GetInitialVowel(TTraits ttraits, String tx,
String startOfVowel) {
if (null == startOfVowel) startOfVowel = "";
boolean startsWithPlus = false;
if (!"".equals(startOfVowel)
&& (!ttraits.vowelsMayStack()
|| (tx.length() < 1 || !(startsWithPlus = tx.substring(0, 1).equals("+")))))
return ("".equals(startOfVowel) ? null : startOfVowel);
if (startsWithPlus)
tx = tx.substring(1);
for (int i = Math.min(ttraits.maxWowelLength(), tx.length()); i >= 1; i--) {
String t = tx.substring(0, i);
if (ttraits.isWowel(t)
|| (ttraits.isACIP()
// Or these, which we massage into "Am", "Am:", and
// "A:" because I didn't think {Pm} should be treated
// like {PAm} originally:
// TODO(DLC)[EWTS->Tibetan]: NOW NIGHTMARE
&& ("m".equals(t) || "m:".equals(t) || ":".equals(t)))) {
// If this is followed by +wowel[+wowel[+wowel... in EWTS then that's part of the vowel also:
return GetInitialVowel(ttraits,
tx.substring(i),
startOfVowel + (startsWithPlus ? "+" : "") + t);
}
}
return null;
}
/** Returns the largest TPair we can make from the transliteration
* starting from the left. This will return a size zero pair if
* and only if tx is the empty string; otherwise, it may return a
* pair with either the left or right component empty. [FOR
* ACIP:] This mutates tx when we run into {NA+YA}; it mutates tx
* into {N+YA}. For {NE+YA}, it does not mutate tx or behave
* intelligently. A later phase will need to turn that into
* {N+YE} or an error or whatever you like. howMuch[0] will be
* set to the number of characters of tx that this call has
* consumed. */
private static TPair getFirstConsonantAndVowel(StringBuffer tx, // TODO(DLC)[EWTS->Tibetan]: function name needs ACIP in it?
int howMuch[],
TTraits ttraits) {
// Note that it is *not* the case that if acip.substring(0, N)
// To handle EWTS "phywa\\u0f84\u0f86" [yes that's two slashes
// and then one slash], for example, we need to make the wowel
// (the getRight() field of the returned TPair) contain
// everything that it should.
//
// It can't hurt in ACIP, though I don't recall if ACIP's lexer
// allows Unicode characters.
TPair og = helpGetFirstConsonantAndVowel(tx, howMuch, ttraits);
int len = tx.length();
StringBuffer x = null;
while (howMuch[0] < len) {
if (isUnicodeWowelChar(tx.charAt(howMuch[0]))) {
if (null == x) x = new StringBuffer(); // rarely happens
if (x.length() > 0) x.append('+');
x.append(tx.charAt(howMuch[0]++));
} else {
break;
}
}
// In EWTS, deal with M, ~M`, etc. They're much like
// UnicodeWowelCharacters.
if (ttraits instanceof EWTSTraits) {
EWTSTraits tt = (EWTSTraits)ttraits;
while (howMuch[0] < len) {
int howMuchExtra[] = new int[] { 0 };
TPair p
= helpGetFirstConsonantAndVowel(new StringBuffer(tx.substring(howMuch[0])),
howMuchExtra,
ttraits);
if (p.getLeft() == null
&& p.getRight() != null
&& tt.isWowelThatRequiresAChen(p.getRight())) {
if (null == x) x = new StringBuffer(); // rarely happens
String extra;
if (x.length() > 0) x.append('+');
x.append(extra = tx.substring(howMuch[0], howMuch[0] + howMuchExtra[0]));
// System.out.println("extra is " + extra); TODO(DLC)[EWTS->Tibetan]
howMuch[0] += howMuchExtra[0];
} else {
break;
}
}
}
if (null != x)
return new TPair(ttraits, og.getLeft(),
(null == og.getRight() || ttraits.aVowel().equals(og.getRight()))
? x.toString()
: (og.getRight() + "+" + x.toString()));
else
return og;
}
private static TPair helpGetFirstConsonantAndVowel(StringBuffer tx, // TODO(DLC)[EWTS->Tibetan]: function name needs ACIP in it?
int howMuch[],
TTraits ttraits) {
// Note that it is *not* the case that if tx.substring(0, N)
// is legal (according to TPair.isLegal()), then
// acip.substring(0, N-1) is legal for all N. For example,
// tx.substring(0, N-1) is legal for all N. For example,
// think of ACIP's {shA} and {KshA}. However, 's' is the only
// tricky fellow, so it is true that acip.substring(0, N-1) is
// either legal or ends with 's' if acip.substring(0, N) is
// legal.
// tricky fellow in ACIP, so in ACIP it is true that
// tx.substring(0, N-1) is either legal or ends with 's' if
// tx.substring(0, N) is legal.
//
// We don't, however, use this approach. We just try to find
// a consonant of length 3, and then, failing that, of length
// 2, etc. Likewise with vowels. This avoids the issue.
int i, xl = acip.length();
int i, xl = tx.length();
// TODO(DLC)[EWTS->Tibetan]: nasty special case!
if (false && !ttraits.isACIP() /* TODO(DLC)[EWTS->Tibetan]: isEWTS! */
&& xl >= 2 && tx.charAt(0) == 'a' && (tx.charAt(1) == 'i' || tx.charAt(1) == 'u')) {
howMuch[0] = 2;
return new TPair(ttraits, null, tx.substring(0, 2));
// TODO(DLC)[EWTS->Tibetan]: test that "au" alone is \u0f68\u0f7d, "ai" alone is \u0f68\u0f7b in EWTS.
}
if (0 == xl) {
howMuch[0] = 0;
return new TPair(ttraits, null, null);
}
if (acip.charAt(0) == ttraits.disambiguatorChar()) {
if (tx.charAt(0) == ttraits.disambiguatorChar()) {
howMuch[0] = 1;
return new TPair(ttraits, null, ttraits.disambiguator());
}
char ch = acip.charAt(0);
char ch = tx.charAt(0);
// Numbers never appear in stacks, so if you see 1234, that's
// like seeing 1-2-3-4.
// like seeing 1-2-3-4. Though in EWTS you can have '0\u0f19'
if (ch >= '0' && ch <= '9') {
// TODO(DLC)[EWTS->Tibetan]: test case: 0e should have a-chen and 0\u0f74 should go through without errors.
if (xl > 1 && ttraits.isUnicodeWowel(tx.charAt(1))) {
howMuch[0] = 2;
return new TPair(ttraits, tx.substring(0, 1), tx.substring(1, 2));
}
howMuch[0] = 1; // not 2...
return new TPair(ttraits, acip.substring(0, 1), (xl == 1) ? null : ttraits.disambiguator());
return new TPair(ttraits, tx.substring(0, 1), (xl == 1) ? null : ttraits.disambiguator());
}
String l = null, r = null;
for (i = Math.min(ttraits.maxConsonantLength(), xl); i >= 1; i--) {
String t = null;
if (ttraits.isConsonant(t = acip.substring(0, i))) {
if (ttraits.isConsonant(t = tx.substring(0, i))
|| (ttraits.vowelAloneImpliesAChen() // handle EWTS {a+yo}
&& ttraits.aVowel().equals(tx.substring(0, i))
&& i < xl && tx.substring(i, i + i).equals("+"))) {
l = t;
break;
}
}
int ll = (null == l) ? 0 : l.length();
if (null != l && xl > ll && acip.charAt(ll) == ttraits.disambiguatorChar()) {
if (null != l && xl > ll && tx.charAt(ll) == ttraits.disambiguatorChar()) {
howMuch[0] = l.length() + 1;
return new TPair(ttraits, l, ttraits.disambiguator());
}
if (null != l && xl > ll && acip.charAt(ll) == '+') {
if (null != l && xl > ll && tx.charAt(ll) == '+') {
howMuch[0] = l.length() + 1;
return new TPair(ttraits, l, "+");
}
for (i = Math.min(ttraits.maxWowelLength(), xl - ll); i >= 1; i--) {
String t = null;
if (ttraits.isWowel(t = acip.substring(ll, ll + i))
// Or these, which we massage into "Am", "Am:", and
// "A:" because I didn't think {Pm} should be treated
// like {PAm} originally:
// TODO(DLC)[EWTS->Tibetan]: NOW NIGHTMARE
|| "m".equals(t) || "m:".equals(t) || ":".equals(t)) {
r = t;
break;
}
}
// Treat {BATA+SA'I} like {BAT+SA'I}:
int z;
if (null != l && /* TODO(DLC)[EWTS->Tibetan]: */"A".equals(r) && ((z = ll + /* TODO(DLC)[EWTS->Tibetan]: */"A".length()) < xl)
&& acip.charAt(z) == '+') {
acip.deleteCharAt(z-1);
howMuch[0] = l.length() + 1;
return new TPair(ttraits, l, "+");
}
// Allow Pm to mean PAm, P: to mean PA:, Pm: to mean PAm:. /* TODO(DLC)[EWTS->Tibetan]: */
int mod = 0;
if ("m".equals(r)) { r = "Am"; mod = -1; }
if (":".equals(r)) { r = "A:"; mod = -1; }
if ("m:".equals(r)) { r = "Am:"; mod = -1; }
if (":m".equals(r)) { r = "A:m"; mod = -1; } // not seen, though...
r = GetInitialVowel(ttraits, tx.substring(ll), null);
if (ttraits.isACIP()) {
// Treat {BATA+SA'I} like {BAT+SA'I}: // TODO(DLC)[EWTS->Tibetan]: in EWTS???
int z;
if (null != l
&& ttraits.aVowel().equals(r)
&& ((z = ll + ttraits.aVowel().length()) < xl)
&& tx.charAt(z) == '+') {
tx.deleteCharAt(z-1);
howMuch[0] = l.length() + 1;
return new TPair(ttraits, l, "+");
}
// Allow Pm to mean PAm, P: to mean PA:, Pm: to mean PAm:. /* TODO(DLC)[EWTS->Tibetan]: in EWTS? */
if ("m".equals(r)) { r = "Am"; mod = -1; }
if (":".equals(r)) { r = "A:"; mod = -1; }
if ("m:".equals(r)) { r = "Am:"; mod = -1; }
if (":m".equals(r)) { r = "A:m"; mod = -1; } // not seen, though...
}
// what if we see a character that's not part of any wowel or
// consonant? We return it.
if (null == l && null == r) {
howMuch[0] = 1; // not 2...
// add a disambiguator to avoid exponential running time:
return new TPair(ttraits, acip.substring(0, 1),
return new TPair(ttraits, tx.substring(0, 1),
(xl == 1) ? null : ttraits.disambiguator());
}
@ -314,6 +471,13 @@ class TPairListFactory {
+ mod);
return new TPair(ttraits, l, r);
} // TODO(DLC)[EWTS->Tibetan]:
private static boolean isUnicodeWowelChar(char ch) {
return ((ch >= '\u0f71' && ch <= '\u0f84')
|| "\u0f35\u0f37\u0f18\u0f19\u0f3e\u0f3f\u0f86\u0f87\u0fc6".indexOf(ch) >= 0);
// TODO(dchandler): should we really allow "phywa\\u0f18", or
// does \u0f18 only combine with digits?
}
}

View file

@ -18,8 +18,6 @@ Contributor(s): ______________________________________.
package org.thdl.tib.text.ttt;
import org.thdl.util.ThdlDebug;
import java.util.ArrayList;
/** A list of non-empty list of {@link TStackListList
@ -129,6 +127,10 @@ class TParseTree {
if (sz == 1) {
return up.get(0);
} else if (sz > 1) {
// TODO(DLC)[EWTS->Tibetan]: does this still happen? If so, when?
//
// System.out.println("SHO NUFF, >1 non-illegal parses still happens");
// {PADMA}, for example. Our technique is to go from the
// left and stack as much as we can. So {PA}{D}{MA} is
// inferior to {PA}{D+MA}, and {PA}{D+MA}{D}{MA} is
@ -279,7 +281,8 @@ class TParseTree {
public String getWarning(String warningLevel,
TPairList pl,
String originalACIP,
boolean shortMessages) {
boolean shortMessages,
TTraits traits) {
// ROOM_FOR_IMPROVEMENT: Allow one tsheg bar to have multiple
// warnings/errors associated with it. Make this a private
// subroutine, and have the public getWarning(..) call on this
@ -301,7 +304,7 @@ class TParseTree {
if (shortMessages)
return "501: Using " + bestParse + ", not " + noPrefixTestsUniqueParse.get(0);
else
return "501: Using " + bestParse + ((null != originalACIP) ? (" for the ACIP {" + originalACIP + "}") : "") + ", but only because the tool's knowledge of prefix rules (see the documentation) says that " + noPrefixTestsUniqueParse.get(0) + " is not a legal Tibetan tsheg bar (\"syllable\")";
return "501: Using " + bestParse + ((null != originalACIP) ? (" for the " + traits.shortTranslitName() + " {" + originalACIP + "}") : "") + ", but only because the tool's knowledge of prefix rules (see the documentation) says that " + noPrefixTestsUniqueParse.get(0) + " is not a legal Tibetan tsheg bar (\"syllable\")";
}
}
@ -321,27 +324,31 @@ class TParseTree {
// FIXME: The caller will prepend "WARNING " to this error!
if (ErrorsAndWarnings.isEnabled(101, warningLevel))
return ErrorsAndWarnings.getMessage(101, shortMessages,
translit);
translit,
traits);
} else {
if (bestParse.hasStackWithoutVowel(pl, isLastStack)) {
if (isLastStack[0]) {
if (ErrorsAndWarnings.isEnabled(502, warningLevel))
return ErrorsAndWarnings.getMessage(502, shortMessages,
translit);
translit,
traits);
} else {
throw new Error("Can't happen now that we stack greedily");
}
}
if (ErrorsAndWarnings.isEnabled(503, warningLevel))
return ErrorsAndWarnings.getMessage(503, shortMessages,
translit);
translit,
traits);
}
} else {
if (nip.get(0).hasStackWithoutVowel(pl, isLastStack)) {
if (isLastStack[0]) {
if (ErrorsAndWarnings.isEnabled(502, warningLevel))
return ErrorsAndWarnings.getMessage(502, shortMessages,
translit);
translit,
traits);
} else {
throw new Error("Can't happen now that we stack greedily [2]");
}
@ -362,7 +369,8 @@ class TParseTree {
++plnum;
if (ErrorsAndWarnings.isEnabled(505, warningLevel))
return ErrorsAndWarnings.getMessage(505, shortMessages,
translit);
translit,
traits);
}
plnum = 0;
for (int stackNum = 0; stackNum < bestParse.size(); stackNum++) {
@ -380,14 +388,16 @@ class TParseTree {
else if (type == 1)
if (ErrorsAndWarnings.isEnabled(506, warningLevel))
return ErrorsAndWarnings.getMessage(506, shortMessages,
translit);
translit,
traits);
} else {
if (type == 0)
type = 1;
else if (type == -1)
if (ErrorsAndWarnings.isEnabled(506, warningLevel))
return ErrorsAndWarnings.getMessage(506, shortMessages,
translit);
translit,
traits);
}
}
if (stackSize > 1 && tp.getLeft() != null && tp.getLeft().length() > 1) {
@ -445,14 +455,16 @@ n+t+s
if (ErrorsAndWarnings.isEnabled(warningNum, warningLevel))
return ErrorsAndWarnings.getMessage(warningNum,
shortMessages,
translit);
translit,
traits);
}
while (plnum < pl.size() && pl.get(plnum).isDisambiguator()) {
++plnum;
if (ErrorsAndWarnings.isEnabled(505, warningLevel))
return ErrorsAndWarnings.getMessage(505, shortMessages,
translit);
translit,
traits);
}
}
}
@ -472,11 +484,13 @@ n+t+s
if (pl.size() == 3) {
if (ErrorsAndWarnings.isEnabled(508, warningLevel))
return ErrorsAndWarnings.getMessage(508, shortMessages,
translit);
translit,
traits);
} else {
if (ErrorsAndWarnings.isEnabled(509, warningLevel))
return ErrorsAndWarnings.getMessage(509, shortMessages,
translit);
translit,
traits);
}
}
}
@ -497,11 +511,13 @@ n+t+s
if (pl.size() == 2) {
if (ErrorsAndWarnings.isEnabled(508, warningLevel))
return ErrorsAndWarnings.getMessage(508, shortMessages,
translit);
translit,
traits);
} else {
if (ErrorsAndWarnings.isEnabled(509, warningLevel))
return ErrorsAndWarnings.getMessage(509, shortMessages,
translit);
translit,
traits);
}
}
}
@ -513,7 +529,7 @@ n+t+s
/** Returns something akin to the ACIP input (okay, maybe 1-2-3-4
* instead of 1234, and maybe AUTPA instead of AUT-PA)
* corresponding to this parse tree. */
public String recoverACIP() {
public String recoverACIP() { // TODO(DLC)[EWTS->Tibetan]: acip-specific
ParseIterator pi = getParseIterator();
if (pi.hasNext()) {
return pi.next().recoverACIP();

View file

@ -18,14 +18,12 @@ Contributor(s): ______________________________________.
package org.thdl.tib.text.ttt;
import org.thdl.tib.text.TibTextUtils;
import org.thdl.tib.text.TGCList;
import org.thdl.tib.text.DuffCode;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.ListIterator;
import org.thdl.tib.text.TGCList;
import org.thdl.tib.text.TibTextUtils;
/** A list of {@link TPairList TPairLists}, each of which is for
* a stack (a grapheme cluster), typically corresponding to one tsheg
* bar.
@ -165,7 +163,7 @@ class TStackList {
TPairList pl = get(pairListIndex);
TPair p = pl.get(pl.size() - 1);
isLegalAndHasAVowelOnRoot
= (p.getRight() != null && p.getRight().startsWith("A")); // could be {A:}, e.g.
= (p.getRight() != null && p.getRight().startsWith("A")); // could be {A:}, e.g. TODO(DLC)[EWTS->Tibetan]: ???
if (isLegalAndHasAVowelOnRoot)
break;
}

View file

@ -18,12 +18,11 @@ Contributor(s): ______________________________________.
package org.thdl.tib.text.ttt;
import org.thdl.util.ThdlOptions;
import org.thdl.util.ThdlDebug;
import org.thdl.tib.text.tshegbar.UnicodeUtils;
import java.util.HashSet;
import java.io.*;
import org.thdl.tib.text.tshegbar.UnicodeUtils;
import org.thdl.util.ThdlDebug;
import org.thdl.util.ThdlOptions;
/**
* An TString is some Latin text and a type, the type stating whether

View file

@ -18,11 +18,11 @@ Contributor(s): ______________________________________.
package org.thdl.tib.text.ttt;
import java.util.ArrayList;
import org.thdl.tib.text.TGCList;
import org.thdl.tib.text.TGCPair;
import java.util.ArrayList;
/** A list of grapheme clusters.
*
* @author David Chandler */

View file

@ -19,6 +19,7 @@ Contributor(s): ______________________________________.
package org.thdl.tib.text.ttt;
import java.util.ArrayList;
import org.thdl.tib.text.DuffCode;
/** A TTraits object encapsulates all the things that make a
@ -65,6 +66,11 @@ interface TTraits {
* any wowel) */
boolean isConsonant(String s);
/** Returns true if and only if this transliteration scheme supports
* Tibetan Unicode characters and if ch is such a character and is a
* wowel. */
boolean isUnicodeWowel(char ch);
/** Returns true if and only if <em>s</em> is a stretch of
* transliteration corresponding to a Tibetan wowel (without any
* [achen or other] consonant) */
@ -120,6 +126,10 @@ interface TTraits {
* null if l is unknown. */
String getUnicodeFor(String l, boolean subscribed);
/** Returns the unicode for a wowel. Returns null if l is
* unknown. */
String getUnicodeForWowel(String wowel);
/** Returns a scanner that can break up a string of
transliteration. */
TTshegBarScanner scanner();
@ -127,4 +137,78 @@ interface TTraits {
/** Gets the duffcodes for wowel, such that they look good with
* the preceding glyph, and appends them to duff. */
void getDuffForWowel(ArrayList duff, DuffCode preceding, String wowel);
/** Human-readable name of this transliteration for short error
strings. */
String shortTranslitName();
/** Returns true if and only pair is clearly not valid
transliteration. */
boolean isClearlyIllegal(TPair pair);
/** Returns one or two new TPairList instances. Breaks a
* transliterated tsheg bar (roughly a &quot;syllable&quot;) into
* chunks; this computes l' (for you design doc enthusiasts).
*
* <p>Here's a rough sketch of the algorithm: run along getting
* the current TPair as big as you can. If you get it very big,
* but there's something illegal afterward that wouldn't
* otherwise be illegal, undo as little as possible to correct.
* For example, ACIP {G'A'I} becomes [(G . 'A), (' . I)], and
* ACIP {TAA} becomes [(T . A)] in a first pass but then we see
* that the rest would be suboptimal, so we backtrack to [(T . )]
* and then finally become [(T . ), (A . A)]. We look for (A . )
* and ( . <vowel>) in the rest in order to say "the rest would
* be suboptimal", i.e. we use {@link
* TPairList.hasSimpleError()}.</p>
*
* <p>There is one case where we break things up into two pair
* lists if and only if specialHandlingForAppendages is true -- I
* thought the converter had a bug because I saw ACIP {SNYAM'AM}
* in KD0003I2.ACT. I asked Robert Chilton, though, and he said
* "SNYAM'AM " was likely a typo for "SNYAM 'AM", so leave
* specialHandlingForAppendages false.</p>
*
* <p>I found out about (OK, as it turns out, imagined) this case
* too late to do anything clean about it. ACIP {SNYAM'AM},
* e.g., breaks up into [(S . ), (NY . A), (M . 'A), (M . )],
* which is incorrect -- [(S . ), (NY . A), (M . ), (' . A), (M
* . )] is correct. But we don't know which is correct without
* parsing, so both are returned. The clean treatment would be
* to lex into a form that didn't insist ACIP {'A} was either a
* vowel or a consonant. Then the parser would figure it out.
* But don't bother, because specialHandlingForAppendages should
* be false always.</p>
*
* @param tt a string of transliteration corresponding to a tsheg
* bar (i.e., it has no punctuation in it)
* @param specialHandlingForAppendages true if and only if you
* want ACIP {SNYAM'AM} to ultimately parse as {S+NYA}{M}{'A}{M}
* instead of {S+NYA}{M'A}{M}
* @return an array of length two consisting of one or two pair
* lists. If the former, then the second element will be null,
* if the latter, the second element will have (* . ), (' . *)
* instead of (* . '*) which the former has. */
TPairList[] breakTshegBarIntoChunks(String tt,
boolean specialHandlingForAppendages);
/** Returns true if and only if these are ACIP transliteration's
traits. TODO(dchandler): get rid of this function. Any
caller is employing a hack. */
boolean isACIP();
/** Returns true if and only if a vowel all by its lonesome has an
* implied a-chen (U+0F68) with it. (ACIP requires "AI" to
* represent a-chen with gigu, but EWTS requires "i".)*/
boolean vowelAloneImpliesAChen();
/** Returns true if and only if multiple vowels (TODO(dchandler):
* wowels?) may appear on a single consonant stack via the
* stacking operator, '+'. */
boolean vowelsMayStack();
/** Returns true if and only if pl could represent one TPairList
in a tsheg bar. (EWTS's list of standard stacks comes into
play; ACIP always returns true.) */
boolean couldBeValidStack(TPairList pl);
}

View file

@ -18,16 +18,12 @@ Contributor(s): ______________________________________.
package org.thdl.tib.text.ttt;
import java.io.IOException;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.io.InputStream;
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Stack;
import org.thdl.util.ThdlDebug;
import org.thdl.util.ThdlOptions;
/**
* A TTshegBarScanner is able to break up Strings of transliterated

View file

@ -21,8 +21,9 @@ package org.thdl.util;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import javax.swing.JScrollPane;
import javax.swing.JEditorPane;
import javax.swing.JScrollPane;
/** An HTMLPane is a JScrollPane displaying the contents of an HTML
* file. DLC FIXME: at present, neither internal nor external

View file

@ -17,7 +17,6 @@ Contributor(s): ______________________________________.
*/
package org.thdl.util;
import java.io.*;
/** Used by {@link SimplifiedLinkedList} to provide the implementation of a
simple dynamic link list.

View file

@ -18,13 +18,11 @@ Contributor(s): ______________________________________.
package org.thdl.util;
import org.thdl.util.ThdlDebug;
import java.util.ArrayList;
import java.io.IOException;
import java.io.FilterInputStream;
import java.io.BufferedInputStream;
import java.io.FilterInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
/** Provides an input stream that fixes another RTF input stream so

View file

@ -18,11 +18,12 @@ Contributor(s): ______________________________________.
package org.thdl.util;
import junit.framework.TestCase;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import junit.framework.TestCase;
/**
* @author David Chandler
*

View file

@ -21,11 +21,12 @@ package org.thdl.util;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import javax.swing.JScrollPane;
import javax.swing.JTextPane;
import javax.swing.text.BadLocationException;
import javax.swing.text.DefaultStyledDocument;
import javax.swing.text.rtf.RTFEditorKit;
import javax.swing.text.BadLocationException;
/** An RTFPane is a JScrollPane displaying the contents of a rich text
file (an RTF file). */

View file

@ -18,13 +18,12 @@ Contributor(s): ______________________________________.
package org.thdl.util;
import javax.swing.JFrame;
import java.awt.Container;
import java.awt.Component;
import java.awt.Container;
import java.awt.event.ComponentAdapter;
import java.awt.event.ComponentEvent;
import org.thdl.util.RTFPane;
import javax.swing.JFrame;
/** An SimpleFrame is a top-level window displaying a JScrollPane. */
public class SimpleFrame extends JFrame {

View file

@ -18,7 +18,7 @@ Contributor(s): ______________________________________.
package org.thdl.util;
import java.io.*;
import java.io.PrintWriter;
/** Implementation of a simple dynamic link list. Be careful with word order!
Why not just use java.util.LinkedList? It is not supported for the

View file

@ -17,7 +17,7 @@ Contributor(s): ______________________________________.
*/
package org.thdl.util;
import java.util.*;
import java.util.LinkedList;
/** Used by {@link LinkedList} to provide the implementation of a
simple dynamic link list.

View file

@ -18,11 +18,13 @@ Contributor(s): ______________________________________.
package org.thdl.util;
import java.awt.*;
import java.awt.event.*;
import javax.swing.*;
import java.util.Stack;
import javax.swing.BoxLayout;
import javax.swing.JLabel;
import javax.swing.JPanel;
import javax.swing.SwingConstants;
/** A StatusBar can be added to a component, typically to the bottom
of it, in order to show the user the status of the program. There
are methods to change the status, and there are actually a LIFO

View file

@ -18,11 +18,10 @@ Contributor(s): ______________________________________.
package org.thdl.util;
import javax.swing.AbstractAction;
import javax.swing.Icon;
import java.awt.event.ActionEvent;
import org.thdl.util.ThdlDebug;
import javax.swing.AbstractAction;
import javax.swing.Icon;
/**
* This ActionListener is like any other except in the way that it

View file

@ -18,10 +18,8 @@ Contributor(s): ______________________________________.
package org.thdl.util;
import java.awt.event.ActionListener;
import java.awt.event.ActionEvent;
import org.thdl.util.ThdlDebug;
import java.awt.event.ActionListener;
/**
* This ActionListener is like any other except in the way that it

View file

@ -18,12 +18,9 @@ Contributor(s): ______________________________________.
package org.thdl.util;
import java.io.PrintStream;
import java.io.FileOutputStream;
import java.io.File;
import org.thdl.util.TeeStream;
import org.thdl.util.ThdlOptions;
import java.io.FileOutputStream;
import java.io.PrintStream;
/**
* This uninstantiable class provides assertions and the like in a

View file

@ -2,6 +2,7 @@ package org.thdl.util;
import java.util.Locale;
import java.util.ResourceBundle;
import javax.swing.JComponent;
public class ThdlI18n {

View file

@ -18,9 +18,9 @@ Contributor(s): ______________________________________.
package org.thdl.util;
import junit.framework.TestCase;
import java.io.IOException;
import java.io.IOException; /* a checked exception */
import junit.framework.TestCase;
/**
* @author David Chandler

View file

@ -18,17 +18,14 @@ Contributor(s): ______________________________________.
package org.thdl.util;
import java.io.InputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.InputStream;
import java.util.Properties;
import org.thdl.util.ThdlLazyException;
import org.thdl.util.OperatingSystemUtils;
/**
* Provides a clean interface to the multi-tiered system of user
* preferences (also known as options).

View file

@ -81,7 +81,6 @@ Contributor(s): ______________________________________.
package org.thdl.util;
import org.thdl.util.ThdlDebug;
/**
* A digital search trie for 7-bit ASCII text. The API is a subset of