Robert Chilton's experience inspired me to make the handling of errors and

warnings in ACIP->Tibetan conversion much more configurable.  You can
now choose from short or long error messages, for one thing.  You can change
the severity of almost all warnings.  Each error and warning has an error code.
Errors and warnings are better tested.

The converter GUI has a new checkbox for short messages; the converter
CLI has a new mandatory option for short messages.

I also fixed a bug whereby certain errors were not being appended to the
'errors' StringBuffer.
This commit is contained in:
dchandler 2004-04-24 17:49:16 +00:00
parent cc5d096918
commit e2d42f36eb
22 changed files with 1106 additions and 506 deletions

View file

@ -213,7 +213,7 @@ Contributor(s): ______________________________________.
inheritAll="false" inheritRefs="false"> inheritAll="false" inheritRefs="false">
<reference refid="entire.class.path"/> <reference refid="entire.class.path"/>
<property name="junitbin" value="${junitbin}"/> <property name="junitbin" value="${junitbin}"/>
</ant> </ant>
<ant antfile="junitbuild.xml" target="run-headless-junit-tests" <ant antfile="junitbuild.xml" target="run-headless-junit-tests"
inheritAll="false" inheritRefs="false"> inheritAll="false" inheritRefs="false">
<reference refid="entire.class.path"/> <reference refid="entire.class.path"/>

View file

@ -144,3 +144,21 @@ thdl.log.line.breaking.algorithm = false
# disappears from the input. We turn these guys into Unicode escapes # disappears from the input. We turn these guys into Unicode escapes
# when this is false. We leave it buggy when this is true. # when this is false. We leave it buggy when this is true.
thdl.do.not.fix.rtf.hex.escapes = false thdl.do.not.fix.rtf.hex.escapes = false
# ACIP->Tibetan conversions have numerous warnings. If you want to
# see warning 501 even at the "Some" level, just change the option
# thdl.acip.to.tibetan.warning.severity.501 to Some. You cannot make
# a warning into an error, and you cannot make an error into a
# warning. 504 and 510 cannot be downgraded; they are always
# "Some"-level.
thdl.acip.to.tibetan.warning.severity.501 = Most
thdl.acip.to.tibetan.warning.severity.502 = All
thdl.acip.to.tibetan.warning.severity.503 = All
thdl.acip.to.tibetan.warning.severity.504 = Some
thdl.acip.to.tibetan.warning.severity.505 = Some
thdl.acip.to.tibetan.warning.severity.506 = Some
thdl.acip.to.tibetan.warning.severity.507 = Most
thdl.acip.to.tibetan.warning.severity.508 = Some
thdl.acip.to.tibetan.warning.severity.509 = Most
thdl.acip.to.tibetan.warning.severity.510 = Some
thdl.acip.to.tibetan.warning.severity.511 = Some

View file

@ -40,6 +40,9 @@ class ConvertDialog extends JDialog
private JCheckBox colors; private JCheckBox colors;
private static final String colorDesc = "Color-coding (ACIP to RTF only)"; private static final String colorDesc = "Color-coding (ACIP to RTF only)";
private JCheckBox shortMessages;
private static final String shortMessagesDesc = "Short warning and error messages (ACIP to Tibetan only)";
// Attributes // Attributes
private FontConversion controller; private FontConversion controller;
@ -99,11 +102,17 @@ class ConvertDialog extends JDialog
updateWarningLevels(); updateWarningLevels();
temp.add(warningLevels); temp.add(warningLevels);
content.add(temp);
temp = new JPanel(new FlowLayout(FlowLayout.CENTER,5,5));
this.colors = new JCheckBox(colorDesc, false); this.colors = new JCheckBox(colorDesc, false);
this.colors.addActionListener(tal); this.colors.addActionListener(tal);
this.shortMessages = new JCheckBox(shortMessagesDesc, false);
this.shortMessages.addActionListener(tal);
updateWarningLevels(); updateWarningLevels();
temp.add(colors); temp.add(colors);
temp.add(shortMessages);
content.add(temp); content.add(temp);
temp = new JPanel(new FlowLayout(FlowLayout.CENTER,5,5)); temp = new JPanel(new FlowLayout(FlowLayout.CENTER,5,5));
@ -160,7 +169,7 @@ class ConvertDialog extends JDialog
content.add(buttonBox); content.add(buttonBox);
setContentPane(content); setContentPane(content);
pack(); pack();
setSize(new Dimension(640,235)); setSize(new Dimension(600,240));
} }
private void setChoices(String[] choices) private void setChoices(String[] choices)
@ -301,6 +310,7 @@ class ConvertDialog extends JDialog
convertedFile, convertedFile,
(String)choices.getSelectedItem(), (String)choices.getSelectedItem(),
(String)warningLevels.getSelectedItem(), (String)warningLevels.getSelectedItem(),
shortMessages.isSelected(),
colors.isSelected()); colors.isSelected());
} catch (OutOfMemoryError e) { } catch (OutOfMemoryError e) {
JOptionPane.showMessageDialog(this, JOptionPane.showMessageDialog(this,

View file

@ -49,7 +49,7 @@ public class ConverterGUI implements FontConversion, FontConverterConstants {
public boolean doConversion(ConvertDialog cd, File oldFile, File newFile, public boolean doConversion(ConvertDialog cd, File oldFile, File newFile,
String whichConversion, String warningLevel, String whichConversion, String warningLevel,
boolean colors) { boolean shortMessages, boolean colors) {
PrintStream ps; PrintStream ps;
try { try {
if (whichConversion == ACIP_TO_UNI_TEXT) { if (whichConversion == ACIP_TO_UNI_TEXT) {
@ -64,6 +64,7 @@ public class ConverterGUI implements FontConversion, FontConverterConstants {
false), false),
whichConversion, whichConversion,
warningLevel, warningLevel,
shortMessages,
colors); colors);
ps.close(); ps.close();
} catch (FileNotFoundException e) { } catch (FileNotFoundException e) {

View file

@ -36,9 +36,12 @@ interface FontConversion
conversion performed is specified by the interned String conversion performed is specified by the interned String
whichConversion, which must be one of the known conversions. whichConversion, which must be one of the known conversions.
If you want colors to be used in the output (which is only If you want colors to be used in the output (which is only
supported by a few conversions), then colors must be true. supported by a few conversions), then colors must be true. If
you want short error and warning messages for ACIP to Tibetan
conversions, then shortMessages must be true.
@return true on success, false otherwise */ @return true on success, false otherwise */
boolean doConversion(ConvertDialog cd, File oldFile, boolean doConversion(ConvertDialog cd, File oldFile,
File newFile, String whichConversion, File newFile, String whichConversion,
String warningLevel, boolean colors); String warningLevel, boolean shortMessages,
boolean colors);
} }

View file

@ -63,6 +63,6 @@ interface FontConverterConstants
final String suggested_TO_TM_prefix = "TM_"; final String suggested_TO_TM_prefix = "TM_";
// String Constants // String Constants
public final String PROGRAM_TITLE = "THDL Font Conversion (with Jskad Technology)"; public final String PROGRAM_TITLE = "THDL Tibetan Converters -- featuring Jskad Technology";
} }

View file

@ -268,7 +268,7 @@ public class Jskad extends JPanel implements DocumentListener {
fileMenu = new JMenu("File"); fileMenu = new JMenu("File");
JMenuItem newItem = new JMenuItem("New"); JMenuItem newItem = new JMenuItem("New...");
// newItem.setAccelerator(KeyStroke.getKeyStroke(KeyEvent.VK_N,java.awt.Event.CTRL_MASK)); //Ctrl-n // newItem.setAccelerator(KeyStroke.getKeyStroke(KeyEvent.VK_N,java.awt.Event.CTRL_MASK)); //Ctrl-n
newItem.addActionListener(new ThdlActionListener() { newItem.addActionListener(new ThdlActionListener() {
public void theRealActionPerformed(ActionEvent e) { public void theRealActionPerformed(ActionEvent e) {
@ -652,7 +652,7 @@ public class Jskad extends JPanel implements DocumentListener {
JMenu helpMenu = new JMenu("Help"); JMenu helpMenu = new JMenu("Help");
{ {
JMenuItem helpItem = new JMenuItem("Help"); JMenuItem helpItem = new JMenuItem("Help...");
helpItem.addActionListener(new ThdlActionListener() { helpItem.addActionListener(new ThdlActionListener() {
public void theRealActionPerformed(ActionEvent e) { public void theRealActionPerformed(ActionEvent e) {
CalHTMLPane helpPane = new CalHTMLPane(); CalHTMLPane helpPane = new CalHTMLPane();
@ -672,7 +672,7 @@ public class Jskad extends JPanel implements DocumentListener {
} }
{ {
JMenuItem helpItem = new JMenuItem("Jskad on the Web"); JMenuItem helpItem = new JMenuItem("Jskad on the Web...");
helpItem.addActionListener(new ThdlActionListener() { helpItem.addActionListener(new ThdlActionListener() {
public void theRealActionPerformed(ActionEvent e) { public void theRealActionPerformed(ActionEvent e) {
CalHTMLPane onlineHelpPane = new CalHTMLPane(); CalHTMLPane onlineHelpPane = new CalHTMLPane();
@ -692,7 +692,7 @@ public class Jskad extends JPanel implements DocumentListener {
for (int i = 0; i < keybdMgr.size(); i++) { for (int i = 0; i < keybdMgr.size(); i++) {
final JskadKeyboard kbd = keybdMgr.elementAt(i); final JskadKeyboard kbd = keybdMgr.elementAt(i);
if (kbd.hasQuickRefFile()) { if (kbd.hasQuickRefFile()) {
JMenuItem keybdItem = new JMenuItem(kbd.getIdentifyingString()); JMenuItem keybdItem = new JMenuItem(kbd.getIdentifyingString() + "...");
keybdItem.addActionListener(new ThdlActionListener() { keybdItem.addActionListener(new ThdlActionListener() {
public void theRealActionPerformed(ActionEvent e) { public void theRealActionPerformed(ActionEvent e) {
new SimpleFrame(kbd.getIdentifyingString(), new SimpleFrame(kbd.getIdentifyingString(),
@ -712,7 +712,7 @@ public class Jskad extends JPanel implements DocumentListener {
helpMenu.addSeparator(); helpMenu.addSeparator();
{ {
JMenuItem aboutItem = new JMenuItem("About"); JMenuItem aboutItem = new JMenuItem("About...");
aboutItem.addActionListener(new ThdlActionListener() { aboutItem.addActionListener(new ThdlActionListener() {
public void theRealActionPerformed(ActionEvent e) { public void theRealActionPerformed(ActionEvent e) {
JOptionPane.showMessageDialog(Jskad.this, JOptionPane.showMessageDialog(Jskad.this,

View file

@ -106,6 +106,8 @@ public class TMW_RTF_TO_THDL_WYLIETest extends TestCase {
"no", "no",
"--warning-level", "--warning-level",
"All", "All",
"--acip-to-tibetan-warning-and-error-messages",
"long",
mode, mode,
getTestFileName(testName) getTestFileName(testName)
}; };
@ -130,6 +132,8 @@ public class TMW_RTF_TO_THDL_WYLIETest extends TestCase {
+ "thdl" + File.separator + "thdl" + File.separator
+ "tib" + File.separator + "tib" + File.separator
+ "input" + File.separator + "input" + File.separator
// FIXME: one of the files named '.rtf' is really a text
// file:
+ "TMW_RTF_TO_THDL_WYLIE" + testName + ".rtf"; + "TMW_RTF_TO_THDL_WYLIE" + testName + ".rtf";
} }

View file

@ -30,14 +30,13 @@ import org.thdl.tib.text.ttt.ACIPConverter;
import org.thdl.tib.text.ttt.ACIPTshegBarScanner; import org.thdl.tib.text.ttt.ACIPTshegBarScanner;
import java.util.ArrayList; import java.util.ArrayList;
/** TibetanConverter is a command-line utility for converting to /** TibetanConverter is a command-line utility for converting to and
* and from Tibetan Machine Web (TMW). It converts TMW to Wylie, to * from Tibetan Machine Web (TMW). It converts TMW to Wylie, ACIP,
* Unicode, or to Tibetan Machine (TM). It also converts TM to TMW. * Unicode, or to Tibetan Machine (TM). It also converts to TMW from
* It is a TibetanMachineWeb-in-RichTextFormat to your choice of * TM or ACIP. Some conversions use RTF (rich text format); some use
* TibetanMachine-in-RichTextFormat, THDL Extended * text. Invoke it with no parameters for usage information. Full
* Wylie-in-RichTextFormat, or Unicode-in-RichTextFormat converter, * documentation is available at {@link
* more specifically, as well as converting from TM to TMW. Invoke * http://thdltools.sourceforge.net/TMW_RTF_TO_THDL_WYLIE.html}.
* it with no parameters for usage information.
* @author David Chandler */ * @author David Chandler */
public class TibetanConverter implements FontConverterConstants { public class TibetanConverter implements FontConverterConstants {
private static final boolean debug = false; private static final boolean debug = false;
@ -50,7 +49,7 @@ public class TibetanConverter implements FontConverterConstants {
/** /**
* Runs the converter. */ * Runs the converter. */
public static void main(String[] args) { public static void main(String[] args) {
// No need for the TM or TMW fonts. // No need for the TM or TMW fonts.
System.setProperty("thdl.rely.on.system.tmw.fonts", "true"); System.setProperty("thdl.rely.on.system.tmw.fonts", "true");
System.setProperty("thdl.rely.on.system.tm.fonts", "true"); System.setProperty("thdl.rely.on.system.tm.fonts", "true");
@ -80,22 +79,28 @@ public class TibetanConverter implements FontConverterConstants {
boolean findAllNonTMMode = false; boolean findAllNonTMMode = false;
boolean colors = false; boolean colors = false;
boolean shortMessages = false;
String warningLevel = null;
// Process arguments: // Process arguments:
final int numArgs = 6; final int numArgs = 8;
if ((args.length != 1 && args.length != numArgs) if ((args.length != 1 && args.length != numArgs)
|| (args.length == 1 || (args.length == 1
&& !(args[0].equals("-v") && !(args[0].equals("-v")
|| args[0].equals("--version"))) || args[0].equals("--version")))
|| (args.length == numArgs || (args.length == numArgs
&& (!(args[numArgs - 6].equals("--colors")) && (!(args[numArgs - 8].equals("--colors"))
|| !((colors = args[numArgs - 5].equals("yes")) || !((colors = args[numArgs - 7].equals("yes"))
|| args[numArgs - 5].equals("no")) || args[numArgs - 7].equals("no"))
|| !(args[numArgs - 4].equals("--warning-level")) || !(args[numArgs - 6].equals("--warning-level"))
|| !(args[numArgs - 3].equals("Most") || !((warningLevel = args[numArgs - 5]).equals("Most")
|| args[numArgs - 3].equals("Some") || warningLevel.equals("Some")
|| args[numArgs - 3].equals("All") || warningLevel.equals("All")
|| args[numArgs - 3].equals("None")) || warningLevel.equals("None"))
|| !(args[numArgs - 4].equals("--acip-to-tibetan-warning-and-error-messages"))
|| !((shortMessages = args[numArgs - 3].equals("short"))
|| args[numArgs - 3].equals("long"))
|| !((findAllNonTMWMode || !((findAllNonTMWMode
= args[numArgs - 2].equals("--find-all-non-tmw")) = args[numArgs - 2].equals("--find-all-non-tmw"))
|| (convertToTMMode || (convertToTMMode
@ -123,8 +128,15 @@ public class TibetanConverter implements FontConverterConstants {
|| (findAllNonTMMode || (findAllNonTMMode
= args[numArgs - 2].equals("--find-all-non-tm")) = args[numArgs - 2].equals("--find-all-non-tm"))
)))) { )))) {
if (args.length != numArgs) {
out.println("");
out.println("Wrong number of arguments; needs " + numArgs + " arguments.");
out.println("");
}
out.println("TibetanConverter --colors yes|no"); out.println("TibetanConverter --colors yes|no");
out.println(" --warning-level None|Some|Most|All"); out.println(" --warning-level None|Some|Most|All");
out.println(" --acip-to-tibetan-warning-and-error-messages short|long");
out.println(" --find-all-non-tmw | --find-some-non-tmw"); out.println(" --find-all-non-tmw | --find-some-non-tmw");
out.println(" | --to-tibetan-machine | --to-tibetan-machine-web"); out.println(" | --to-tibetan-machine | --to-tibetan-machine-web");
out.println(" | --to-unicode | --to-wylie | --to-acip"); out.println(" | --to-unicode | --to-wylie | --to-acip");
@ -170,29 +182,36 @@ public class TibetanConverter implements FontConverterConstants {
out.println(" not in Tibetan Machine fonts, exit zero if and only if none found"); out.println(" not in Tibetan Machine fonts, exit zero if and only if none found");
out.println(""); out.println("");
out.println(""); out.println("");
out.println(" In --to... and --acip-to... modes, needs one argument, the name of the"); out.println("In --to... and --acip-to... modes, needs one argument, the name of the");
out.println(" TibetanMachineWeb RTF file (for --to-wylie, --to-wylie-text, --to-acip-text,"); out.println("TibetanMachineWeb RTF file (for --to-wylie, --to-wylie-text, --to-acip-text,");
out.println(" --to-acip, --to-unicode, and --to-tibetan-machine) or the name of"); out.println("--to-acip, --to-unicode, and --to-tibetan-machine) or the name of");
out.println(" the TibetanMachine RTF file (for --to-tibetan-machine-web) or the name of the"); out.println("the TibetanMachine RTF file (for --to-tibetan-machine-web) or the name of the");
out.println(" ACIP text file (for --acip-to-unicode or --acip-to-tmw). Writes the"); out.println("ACIP text file (for --acip-to-unicode or --acip-to-tmw). Writes the");
out.println(" result to standard output (after dealing with the curly brace problem if"); out.println("result to standard output (after dealing with the curly brace problem if");
out.println(" the input is TibetanMachineWeb). Exit code is zero on success, 42 if some"); out.println("the input is TibetanMachineWeb). Exit code is zero on success, 42 if some");
out.println(" glyphs couldn't be converted (in which case the output is just those glyphs),"); out.println("glyphs couldn't be converted (in which case the output is just those glyphs),");
out.println(" 44 if a TMW->Wylie conversion ran into some glyphs that couldn't be"); out.println("44 if a TMW->Wylie conversion ran into some glyphs that couldn't be");
out.println(" converted, in which case ugly error messages like"); out.println("converted, in which case ugly error messages like");
out.println(" \"<<[[JSKAD_TMW_TO_WYLIE_ERROR_NO_SUCH_WYLIE: Cannot convert DuffCode...\""); out.println(" \"<<[[JSKAD_TMW_TO_WYLIE_ERROR_NO_SUCH_WYLIE: Cannot convert DuffCode...\"");
out.println(" are in your document waiting for your personal attention,"); out.println("are in your document waiting for your personal attention,");
out.println(" 43 if not even one glyph found was eligible for this conversion, which means"); out.println("43 if not even one glyph found was eligible for this conversion, which means");
out.println(" that you probably selected the wrong conversion or the wrong document, or "); out.println("that you probably selected the wrong conversion or the wrong document, or ");
out.println(" nonzero otherwise."); out.println("nonzero otherwise.");
out.println(""); out.println("");
out.println(" You may find it helpful to use `--find-some-non-tmw' mode (or"); out.println("You may find it helpful to use `--find-some-non-tmw' mode (or");
out.println(" `--find-some-non-tm' mode for Tibetan Machine input) before doing a"); out.println("`--find-some-non-tm' mode for Tibetan Machine input) before doing a");
out.println(" conversion so that you have confidence in the conversion's correctness."); out.println("conversion so that you have confidence in the conversion's correctness.");
out.println("");
out.println("When using short error and warning messages for ACIP->Tibetan conversions,");
out.println("i.e. when '--acip-to-tibetan-warning-and-error-messages short' is given,");
out.println("the output will contain error and warning numbers. The following are the");
out.println("long forms of each warning and error:");
out.println("");
org.thdl.tib.text.ttt.ErrorsAndWarnings.printErrorAndWarningDescriptions(out);
return 77; return 77;
} }
if (args[0].equals("--version") || args[0].equals("-v")) { if (args[0].equals("--version") || args[0].equals("-v")) {
out.println("TibetanConverter version 0.83"); out.println("TibetanConverter version 0.84");
out.println("Compiled at " out.println("Compiled at "
+ ThdlVersion.getTimeOfCompilation()); + ThdlVersion.getTimeOfCompilation());
return 77; return 77;
@ -237,7 +256,8 @@ public class TibetanConverter implements FontConverterConstants {
} }
} }
return reallyConvert(in, out, conversionTag, return reallyConvert(in, out, conversionTag,
args[numArgs - 3].intern(), colors); warningLevel.intern(), shortMessages,
colors);
} catch (ThdlLazyException e) { } catch (ThdlLazyException e) {
out.println("TibetanConverter has a BUG:"); out.println("TibetanConverter has a BUG:");
e.getRealException().printStackTrace(out); e.getRealException().printStackTrace(out);
@ -253,22 +273,26 @@ public class TibetanConverter implements FontConverterConstants {
e.printStackTrace(System.err); e.printStackTrace(System.err);
throw e; throw e;
} }
} }
/** Reads from in, closes in, converts (or finds some/all /** Reads from in, closes in, converts (or finds some/all
non-TM/TMW), writes the result to out, does not close out. non-TM/TMW), writes the result to out, does not close out.
The action taken depends on ct, which must be one of a set The action taken depends on ct, which must be one of a set
number of strings -- see the code. Returns an appropriate number of strings -- see the code. Uses short error and
return code so that TibetanConverter's usage message is warning messages if shortMessages is true; gives no warnings
honored. */ or many warnings depending on warningLevel. Returns an
appropriate return code so that TibetanConverter's usage
message is honored. */
static int reallyConvert(InputStream in, PrintStream out, String ct, static int reallyConvert(InputStream in, PrintStream out, String ct,
String warningLevel, boolean colors) { String warningLevel, boolean shortMessages,
boolean colors) {
if (ACIP_TO_UNI_TEXT == ct || ACIP_TO_TMW == ct) { if (ACIP_TO_UNI_TEXT == ct || ACIP_TO_TMW == ct) {
try { try {
ArrayList al = ACIPTshegBarScanner.scanStream(in, null, ArrayList al
ThdlOptions.getIntegerOption("thdl.most.errors.a.tibetan.acip.document.can.have", = ACIPTshegBarScanner.scanStream(in, null,
1000 - 1) ThdlOptions.getIntegerOption("thdl.most.errors.a.tibetan.acip.document.can.have",
); 1000 - 1),
shortMessages);
if (null == al) if (null == al)
return 47; return 47;
boolean embeddedWarnings = (warningLevel != "None"); boolean embeddedWarnings = (warningLevel != "None");
@ -277,14 +301,16 @@ public class TibetanConverter implements FontConverterConstants {
if (!ACIPConverter.convertToUnicodeText(al, out, null, if (!ACIPConverter.convertToUnicodeText(al, out, null,
null, hasWarnings, null, hasWarnings,
embeddedWarnings, embeddedWarnings,
warningLevel)) warningLevel,
shortMessages))
return 46; return 46;
} else { } else {
if (ct != ACIP_TO_TMW) throw new Error("badness"); if (ct != ACIP_TO_TMW) throw new Error("badness");
if (!ACIPConverter.convertToTMW(al, out, null, null, if (!ACIPConverter.convertToTMW(al, out, null, null,
hasWarnings, hasWarnings,
embeddedWarnings, embeddedWarnings,
warningLevel, colors)) warningLevel, shortMessages,
colors))
return 46; return 46;
} }
if (embeddedWarnings && hasWarnings[0]) if (embeddedWarnings && hasWarnings[0])

View file

@ -333,7 +333,7 @@ public class TibTextUtils implements THDLWylieConstants {
throws InvalidACIPException throws InvalidACIPException
{ {
StringBuffer errors = new StringBuffer(); StringBuffer errors = new StringBuffer();
ArrayList al = ACIPTshegBarScanner.scan(acip, errors, 500); ArrayList al = ACIPTshegBarScanner.scan(acip, errors, 500, false);
if (null == al || errors.length() > 0) { if (null == al || errors.length() > 0) {
if (errors.length() > 0) if (errors.length() > 0)
throw new InvalidACIPException(errors.toString()); throw new InvalidACIPException(errors.toString());
@ -350,7 +350,7 @@ public class TibTextUtils implements THDLWylieConstants {
int tloc[] = new int[] { loc }; int tloc[] = new int[] { loc };
ACIPConverter.convertToTMW(al, tdoc, null, null, null, ACIPConverter.convertToTMW(al, tdoc, null, null, null,
putWarningsInOutput, warningLevel, putWarningsInOutput, warningLevel,
colors, tloc); false, colors, tloc);
return tloc[0] - loc; return tloc[0] - loc;
} catch (IOException e) { } catch (IOException e) {
throw new Error("Can't happen: " + e); throw new Error("Can't happen: " + e);

View file

@ -14,10 +14,11 @@
// sure to run 'ant clean check' after your change. // sure to run 'ant clean check' after your change.
// //
// Note that some glyphs have EWTS \uF021-\uF0FF inclusive. These do // Note that some glyphs have EWTS \uF021-\uF0FF inclusive. These do
// not have anything in the Unicode column, though, because this is just // not have anything in the Unicode column, though, because this is
// the EWTS -- if someone wants to convert TMSkt3.183->Unicode and get // just the EWTS -- if someone wants to convert TMSkt3.183->Unicode
// \u0F21, let them do that, but our *->Unicode converters shouldn't // and get \u0F21, let them do that, but our *->Unicode converters
// output codes in the PUA without explicit user authorization. // shouldn't output codes in the private-use area (PUA) without
// explicit user authorization.
// //
// Note that 0F00, 0F02, 0F03, and 0F0E are made by using multiple // Note that 0F00, 0F02, 0F03, and 0F0E are made by using multiple
// glyphs from TMW. 0F6A is not listed here (DLC FIXME: should it be? // glyphs from TMW. 0F6A is not listed here (DLC FIXME: should it be?

View file

@ -390,5 +390,18 @@ public class UnicodeUtils implements UnicodeConstants {
&& '\u0F48' != cp && '\u0F48' != cp
&& '\u0F98' != cp); && '\u0F98' != cp);
} }
/** Returns true if a character is in the Tibetan range of Unicode
4.0 but is a reserved code in that range, not yet assigned to
any character. */
public static boolean isReservedTibetanCode(char cp) {
return (cp == '\u0F48'
|| cp == '\u0F98'
|| (cp >= '\u0F6B' && cp <= '\u0F70')
|| (cp >= '\u0F8C' && cp <= '\u0F8F')
|| cp == '\u0FBD'
|| (cp >= '\u0FCD' && cp <= '\u0FCE')
|| (cp >= '\u0FD0' && cp <= '\u0FFF'));
}
} }

View file

@ -10,7 +10,7 @@ License for the specific terms governing rights and limitations under the
License. License.
The Initial Developer of this software is the Tibetan and Himalayan Digital The Initial Developer of this software is the Tibetan and Himalayan Digital
Library (THDL). Portions created by the THDL are Copyright 2003 THDL. Library (THDL). Portions created by the THDL are Copyright 2003-2004 THDL.
All Rights Reserved. All Rights Reserved.
Contributor(s): ______________________________________. Contributor(s): ______________________________________.
@ -37,11 +37,14 @@ import org.thdl.tib.text.DuffCode;
*/ */
public class ACIPConverter { public class ACIPConverter {
/** Command-line converter. Gives error messages on standard /** Command-line converter for testing only -- use
* output about why we can't convert the document perfectly and * org.thdl.tib.input.TibetanConverter for production work.
* exits with non-zero return code, or is silent otherwise and * Gives error messages on standard output about why we can't
* exits with code zero. <p>FIXME: not so efficient; copies the * convert the document perfectly and exits with non-zero return
* whole file into memory first. */ * code, or is silent otherwise and exits with code zero.
*
* <p>FIXME: not so efficient; copies the whole file into memory
* first. */
public static void main(String[] args) public static void main(String[] args)
throws IOException throws IOException
{ {
@ -50,13 +53,20 @@ public class ACIPConverter {
ThdlOptions.setUserPreference("thdl.rely.on.system.tm.fonts", true); ThdlOptions.setUserPreference("thdl.rely.on.system.tm.fonts", true);
ThdlOptions.setUserPreference("thdl.debug", true); ThdlOptions.setUserPreference("thdl.debug", true);
// Only developers should use this.
if (!ThdlOptions.getBooleanOption("thdl.debug")) {
System.err.println("Use org.thdl.tib.input.TibetanConverter for production work, not ACIPConverter.");
System.exit(1);
}
boolean verbose = true; boolean verbose = true;
if (args.length != 1) { if (args.length != 1) {
System.out.println("Bad args! Need just the name of the ACIP text file."); System.out.println("Bad args! Need just the name of the ACIP text file.");
} }
StringBuffer errors = new StringBuffer(); StringBuffer errors = new StringBuffer();
int maxErrors = 1000; // DLC NOW PER CAPITA int maxErrors = 1000; // FIXME: make this PER CAPITA or else large ACIP Tibetan files are not converted for fear that they are English
ArrayList al = ACIPTshegBarScanner.scanFile(args[0], errors, maxErrors - 1); boolean shortMessages = false;
ArrayList al = ACIPTshegBarScanner.scanFile(args[0], errors, maxErrors - 1, shortMessages);
if (null == al) { if (null == al) {
System.err.println(maxErrors + " or more lexical errors occurred while scanning ACIP input file; is this"); System.err.println(maxErrors + " or more lexical errors occurred while scanning ACIP input file; is this");
@ -89,7 +99,7 @@ public class ACIPConverter {
putWarningsInOutput = true; putWarningsInOutput = true;
} }
convertToTMW(al, System.out, errors, warnings, null, convertToTMW(al, System.out, errors, warnings, null,
putWarningsInOutput, warningLevel, colors); putWarningsInOutput, warningLevel, shortMessages, colors);
int retCode = 0; int retCode = 0;
if (errors.length() > 0) { if (errors.length() > 0) {
System.err.println("Errors converting ACIP input file: "); System.err.println("Errors converting ACIP input file: ");
@ -131,13 +141,15 @@ public class ACIPConverter {
boolean[] hasWarnings, boolean[] hasWarnings,
boolean writeWarningsToResult, boolean writeWarningsToResult,
String warningLevel, String warningLevel,
boolean shortMessages,
boolean colors) boolean colors)
throws IOException throws IOException
{ {
TibetanDocument tdoc = new TibetanDocument(); TibetanDocument tdoc = new TibetanDocument();
boolean rv boolean rv
= convertToTMW(scan, tdoc, errors, warnings, hasWarnings, = convertToTMW(scan, tdoc, errors, warnings, hasWarnings,
writeWarningsToResult, warningLevel, colors, writeWarningsToResult, warningLevel,
shortMessages, colors,
new int[] { tdoc.getLength() }); new int[] { tdoc.getLength() });
tdoc.writeRTFOutputStream(out); tdoc.writeRTFOutputStream(out);
return rv; return rv;
@ -159,13 +171,15 @@ public class ACIPConverter {
boolean[] hasWarnings, boolean[] hasWarnings,
boolean writeWarningsToResult, boolean writeWarningsToResult,
String warningLevel, String warningLevel,
boolean shortMessages,
boolean colors, boolean colors,
int[] loc) int[] loc)
throws IOException throws IOException
{ {
return convertTo(false, true, scan, null, tdoc, errors, warnings, return convertTo(false, true, scan, null, tdoc, errors, warnings,
hasWarnings, writeWarningsToResult, warningLevel, hasWarnings, writeWarningsToResult, warningLevel,
colors, loc, loc[0] == tdoc.getLength()); shortMessages, colors, loc,
loc[0] == tdoc.getLength());
} }
/** Returns UTF-8 encoded Unicode. A bit indirect, so use this /** Returns UTF-8 encoded Unicode. A bit indirect, so use this
@ -175,22 +189,23 @@ public class ACIPConverter {
* written to the result. If warnings occur in scanning the ACIP * written to the result. If warnings occur in scanning the ACIP
* or in converting a tsheg bar, then they are appended to * or in converting a tsheg bar, then they are appended to
* warnings if warnings is non-null, and they are written to the * warnings if warnings is non-null, and they are written to the
* result if writeWarningsToResult is true. Returns the * result if writeWarningsToResult is true. Error and warning
* conversion upon perfect success or if there were merely * messages are long and self-contained unless shortMessages is
* warnings, null if errors occurred. * true. Returns the conversion upon perfect success or if there
*/ * were merely warnings, null if errors occurred. */
public static String convertToUnicodeText(String acip, public static String convertToUnicodeText(String acip,
StringBuffer errors, StringBuffer errors,
StringBuffer warnings, StringBuffer warnings,
boolean writeWarningsToResult, boolean writeWarningsToResult,
String warningLevel) { String warningLevel,
boolean shortMessages) {
ByteArrayOutputStream sw = new ByteArrayOutputStream(); ByteArrayOutputStream sw = new ByteArrayOutputStream();
ArrayList al = ACIPTshegBarScanner.scan(acip, errors, -1); ArrayList al = ACIPTshegBarScanner.scan(acip, errors, -1, shortMessages);
try { try {
if (null != al) { if (null != al) {
convertToUnicodeText(al, sw, errors, convertToUnicodeText(al, sw, errors,
warnings, null, writeWarningsToResult, warnings, null, writeWarningsToResult,
warningLevel); warningLevel, shortMessages);
return sw.toString("UTF-8"); return sw.toString("UTF-8");
} else { } else {
return null; return null;
@ -227,12 +242,13 @@ public class ACIPConverter {
StringBuffer warnings, StringBuffer warnings,
boolean[] hasWarnings, boolean[] hasWarnings,
boolean writeWarningsToOut, boolean writeWarningsToOut,
String warningLevel) String warningLevel,
boolean shortMessages)
throws IOException throws IOException
{ {
return convertTo(true, false, scan, out, null, errors, warnings, return convertTo(true, false, scan, out, null, errors, warnings,
hasWarnings, writeWarningsToOut, warningLevel, false, hasWarnings, writeWarningsToOut, warningLevel,
new int[] { -1 } , true); shortMessages, false, new int[] { -1 } , true);
} }
private static boolean peekaheadFindsSpacesAndComma(ArrayList /* of TString */ scan, private static boolean peekaheadFindsSpacesAndComma(ArrayList /* of TString */ scan,
@ -263,6 +279,7 @@ public class ACIPConverter {
boolean[] hasWarnings, boolean[] hasWarnings,
boolean writeWarningsToOut, boolean writeWarningsToOut,
String warningLevel, String warningLevel,
boolean shortMessages,
boolean colors, boolean colors,
// tdocLocation[0] is an // tdocLocation[0] is an
// input-output parameter. It's // input-output parameter. It's
@ -284,6 +301,10 @@ public class ACIPConverter {
throw new IllegalArgumentException("ACIP->Uni.rtf requires a TibetanDocument"); throw new IllegalArgumentException("ACIP->Uni.rtf requires a TibetanDocument");
if (null != out && !(toUnicode && !toRTF)) if (null != out && !(toUnicode && !toRTF))
throw new IllegalArgumentException("That stream is only used in ACIP->Uni.txt mode"); throw new IllegalArgumentException("That stream is only used in ACIP->Uni.txt mode");
if (null != out && null != tdoc)
throw new IllegalArgumentException("Errors are not treated properly yet; do one conversion and then the other. Is performance important enough to risk improper output for you?");
if (null == out && null == tdoc)
throw new IllegalArgumentException("Why would you?");
int smallFontSize = -1; int smallFontSize = -1;
int regularFontSize = -1; int regularFontSize = -1;
if (null != tdoc) { if (null != tdoc) {
@ -325,7 +346,7 @@ public class ACIPConverter {
if (stype == TString.ERROR) { if (stype == TString.ERROR) {
// leave lastGuyWasNonPunct and lastGuy alone; WARNINGs and ERRORs are invisible. // leave lastGuyWasNonPunct and lastGuy alone; WARNINGs and ERRORs are invisible.
hasErrors = true; hasErrors = true;
String text = "[#ERROR CONVERTING ACIP DOCUMENT: Lexical error: " + s.getText() + "]"; String text = "[#ERROR " + s.getText() + "]";
if (null != writer) writer.write(text); if (null != writer) writer.write(text);
if (null != tdoc) { if (null != tdoc) {
tdoc.appendRoman(tdocLocation[0], text, Color.RED); tdoc.appendRoman(tdocLocation[0], text, Color.RED);
@ -333,7 +354,7 @@ public class ACIPConverter {
} }
} else if (stype == TString.TSHEG_BAR_ADORNMENT) { } else if (stype == TString.TSHEG_BAR_ADORNMENT) {
if (lastGuyWasNonPunct) { if (lastGuyWasNonPunct) {
String err = "[#ERROR CONVERTING ACIP DOCUMENT: This converter cannot convert the ACIP {" + s.getText() + "} to Tibetan because it is unclear what the result should be.]"; String err = "[#ERROR " + ErrorsAndWarnings.getMessage(133, shortMessages, s.getText()) + "]";
if (null != writer) { if (null != writer) {
String uni = ACIPRules.getUnicodeFor(s.getText(), false); String uni = ACIPRules.getUnicodeFor(s.getText(), false);
if (null == uni) { if (null == uni) {
@ -363,7 +384,7 @@ public class ACIPConverter {
} else if (stype == TString.WARNING) { } else if (stype == TString.WARNING) {
// leave lastGuyWasNonPunct and lastGuy alone; WARNINGs and ERRORs are invisible. // leave lastGuyWasNonPunct and lastGuy alone; WARNINGs and ERRORs are invisible.
if (writeWarningsToOut) { if (writeWarningsToOut) {
String text = "[#WARNING CONVERTING ACIP DOCUMENT: Lexical warning: " + s.getText() + "]"; String text = "[#WARNING " + s.getText() + "]";
if (null != writer) writer.write(text); if (null != writer) writer.write(text);
if (null != tdoc) { if (null != tdoc) {
tdoc.appendRoman(tdocLocation[0], text, Color.RED); tdoc.appendRoman(tdocLocation[0], text, Color.RED);
@ -372,7 +393,7 @@ public class ACIPConverter {
} }
if (null != hasWarnings) hasWarnings[0] = true; if (null != hasWarnings) hasWarnings[0] = true;
if (null != warnings) { if (null != warnings) {
warnings.append("Warning: Lexical warning: "); warnings.append("Warning: ");
warnings.append(s.getText()); warnings.append(s.getText());
warnings.append('\n'); warnings.append('\n');
} }
@ -399,10 +420,10 @@ public class ACIPConverter {
TPairList pls[] = TPairListFactory.breakACIPIntoChunks(s.getText(), false); TPairList pls[] = TPairListFactory.breakACIPIntoChunks(s.getText(), false);
String acipError; String acipError;
if ((acipError = pls[0].getACIPError()) != null if ((acipError = pls[0].getACIPError(s.getText(), shortMessages)) != null
&& (null == pls[1] || pls[1].getACIPError() != null)) { && (null == pls[1] || pls[1].getACIPError(s.getText(), shortMessages) != null)) {
hasErrors = true; hasErrors = true;
String errorMessage = "[#ERROR CONVERTING ACIP DOCUMENT: The tsheg bar (\"syllable\") " + s.getText() + " has these errors: " + acipError + "]"; String errorMessage = "[#ERROR " + acipError + "]";
if (null != writer) writer.write(errorMessage); if (null != writer) writer.write(errorMessage);
if (null != tdoc) { if (null != tdoc) {
tdoc.appendRoman(tdocLocation[0], errorMessage, tdoc.appendRoman(tdocLocation[0], errorMessage,
@ -417,7 +438,10 @@ public class ACIPConverter {
? null : pls[1].getParseTree()); ? null : pls[1].getParseTree());
if (null == pt0 && null == pt1) { if (null == pt0 && null == pt1) {
hasErrors = true; hasErrors = true;
String errorMessage = "[#ERROR CONVERTING ACIP DOCUMENT: The tsheg bar (\"syllable\") " + s.getText() + " is essentially nothing.]"; String errorMessage
= ("[#ERROR "
+ ErrorsAndWarnings.getMessage(130, shortMessages, s.getText())
+ "]");
if (null != writer) writer.write(errorMessage); if (null != writer) writer.write(errorMessage);
if (null != tdoc) { if (null != tdoc) {
tdoc.appendRoman(tdocLocation[0], errorMessage, tdoc.appendRoman(tdocLocation[0], errorMessage,
@ -431,16 +455,16 @@ public class ACIPConverter {
TStackList sl1 = ((null == pt1) TStackList sl1 = ((null == pt1)
? null : pt1.getBestParse()); ? null : pt1.getBestParse());
if (null == sl0 && null == sl1) { if (null == sl0 && null == sl1) {
// I don't think this can happen // {A-DZU} causes this, for example.
// nowadays; early in the
// converter's life, parsing of
// tsheg bars was handled
// differently, but now, I think
// this is impossible.
ThdlDebug.noteIffyCode();
hasErrors = true; hasErrors = true;
String errorMessage = "[#ERROR CONVERTING ACIP DOCUMENT: The tsheg bar (\"syllable\") " + s.getText() + " has no legal parses.]"; String errorMessage =
if (null != writer) writer.write(errorMessage); "[#ERROR "
+ ErrorsAndWarnings.getMessage(134,
shortMessages,
s.getText())
+ "]";
if (null != writer)
writer.write(errorMessage);
if (null != tdoc) { if (null != tdoc) {
tdoc.appendRoman(tdocLocation[0], tdoc.appendRoman(tdocLocation[0],
errorMessage, errorMessage,
@ -474,12 +498,13 @@ public class ACIPConverter {
if ("None" != warningLevel) { if ("None" != warningLevel) {
warning = pt.getWarning(warningLevel, warning = pt.getWarning(warningLevel,
pl, pl,
s.getText()); s.getText(),
shortMessages);
} }
if (null != warning) { if (null != warning) {
if (writeWarningsToOut) { if (writeWarningsToOut) {
String text String text
= ("[#WARNING CONVERTING ACIP DOCUMENT: " = ("[#WARNING "
+ warning + "]"); + warning + "]");
if (null != writer) writer.write(text); if (null != writer) writer.write(text);
if (null != tdoc) { if (null != tdoc) {
@ -504,12 +529,16 @@ public class ACIPConverter {
// in TMW. That means there // in TMW. That means there
// was probably a typo in the // was probably a typo in the
// input. // input.
if ("None" != warningLevel) { if (ErrorsAndWarnings.isEnabled(511, warningLevel)) {
Object[] trialDuff = sl.getDuff(); Object[] trialDuff
= sl.getDuff(shortMessages,
false);
for (int ii = 0; ii < trialDuff.length; ii++) { for (int ii = 0; ii < trialDuff.length; ii++) {
if (trialDuff[ii] instanceof String) { if (trialDuff[ii] instanceof String) {
if (!((String)trialDuff[ii]).startsWith("511"))
throw new Error("I thought 511 was the only beast like this; FIXME: make this an assertion 324xd3");
String bwarning String bwarning
= "[#WARNING CONVERTING ACIP DOCUMENT: " = "[#WARNING "
+ (String)trialDuff[ii] + "]"; + (String)trialDuff[ii] + "]";
unicode = bwarning + unicode; unicode = bwarning + unicode;
if (null != hasWarnings) hasWarnings[0] = true; if (null != hasWarnings) hasWarnings[0] = true;
@ -522,7 +551,7 @@ public class ACIPConverter {
} }
} }
if (null != tdoc) { if (null != tdoc) {
duff = sl.getDuff(); duff = sl.getDuff(shortMessages, true);
BoolTriple bt; BoolTriple bt;
if (colors && sl.isLegalTshegBar(true).isLegal && !sl.isLegalTshegBar(false).isLegal) { if (colors && sl.isLegalTshegBar(true).isLegal && !sl.isLegalTshegBar(false).isLegal) {
color = Color.YELLOW; color = Color.YELLOW;
@ -657,7 +686,24 @@ public class ACIPConverter {
char ch = s.getText().charAt(0); char ch = s.getText().charAt(0);
if (ch >= '\uF021' && ch <= '\uF0FF') { if (ch >= '\uF021' && ch <= '\uF0FF') {
hasErrors = true; hasErrors = true;
String errorMessage = "[#ERROR CONVERTING ACIP DOCUMENT: The Unicode escape '" + ch + "' with ordinal " + (int)ch + " is in the private-use area (PUA) of Unicode and will thus not be written out into the output lest you think other tools will be able to understand this non-standard construction.]"; String errorMessage =
"[#ERROR "
+ ErrorsAndWarnings.getMessage(135,
shortMessages,
"" + ch)
+ "]";
writer.write(errorMessage);
if (null != errors)
errors.append(errorMessage + "\n");
continue; // FIXME: dropping output if null != tdoc
} else if (org.thdl.tib.text.tshegbar.UnicodeUtils.isReservedTibetanCode(ch)) {
hasErrors = true;
String errorMessage =
"[#ERROR "
+ ErrorsAndWarnings.getMessage(138,
shortMessages,
"" + ch)
+ "]";
writer.write(errorMessage); writer.write(errorMessage);
if (null != errors) if (null != errors)
errors.append(errorMessage + "\n"); errors.append(errorMessage + "\n");
@ -669,7 +715,12 @@ public class ACIPConverter {
duff = TibetanMachineWeb.mapUnicodeToTMW(s.getText().charAt(0)); duff = TibetanMachineWeb.mapUnicodeToTMW(s.getText().charAt(0));
if (null == duff) { if (null == duff) {
hasErrors = true; hasErrors = true;
String errorMessage = "[#ERROR CONVERTING ACIP DOCUMENT: The Unicode escape with ordinal " + (int)s.getText().charAt(0) + " does not match up with any TibetanMachineWeb glyph.]"; String errorMessage =
"[#ERROR "
+ ErrorsAndWarnings.getMessage(136,
shortMessages,
s.getText())
+ "]";
tdoc.appendRoman(tdocLocation[0], tdoc.appendRoman(tdocLocation[0],
errorMessage, errorMessage,
Color.RED); Color.RED);
@ -700,7 +751,7 @@ public class ACIPConverter {
else { else {
hasErrors = true; hasErrors = true;
String emsg String emsg
= "[ERROR: " + (String)duff[j] + "]"; = "[ERROR " + (String)duff[j] + "]";
if (null != errors) if (null != errors)
errors.append(emsg + "\n"); errors.append(emsg + "\n");
tdoc.appendRoman(tdocLocation[0], tdoc.appendRoman(tdocLocation[0],

View file

@ -50,7 +50,8 @@ public class ACIPTshegBarScanner {
} }
StringBuffer errors = new StringBuffer(); StringBuffer errors = new StringBuffer();
int maxErrors = 1000; int maxErrors = 1000;
ArrayList al = scanFile(args[0], errors, maxErrors - 1); ArrayList al = scanFile(args[0], errors, maxErrors - 1,
"true".equals(System.getProperty("org.thdl.tib.text.ttt.ACIPTshegBarScanner.shortMessages")));
if (null == al) { if (null == al) {
System.out.println(maxErrors + " or more errors occurred while scanning ACIP input file; is this"); System.out.println(maxErrors + " or more errors occurred while scanning ACIP input file; is this");
@ -74,25 +75,36 @@ public class ACIPTshegBarScanner {
/** Scans an ACIP file with path fname into tsheg bars. If errors /** Scans an ACIP file with path fname into tsheg bars. If errors
* is non-null, error messages will be appended to it. Returns a * is non-null, error messages will be appended to it. Returns a
* list of TStrings that is the scan. <p>FIXME: not so * list of TStrings that is the scan. Warning and error messages
* efficient; copies the whole file into memory first. * in the result will be long and self-contained unless
* @throws IOException if we cannot read in the ACIP input file */ * shortMessagse is true.
public static ArrayList scanFile(String fname, StringBuffer errors, int maxErrors) *
* <p>FIXME: not so efficient; copies the whole file into memory
* first.
*
* @throws IOException if we cannot read in the ACIP input file
* */
public static ArrayList scanFile(String fname, StringBuffer errors,
int maxErrors, boolean shortMessages)
throws IOException throws IOException
{ {
return scanStream(new FileInputStream(fname), return scanStream(new FileInputStream(fname),
errors, maxErrors); errors, maxErrors, shortMessages);
} }
/** Scans a stream of ACIP into tsheg bars. If errors is /** Scans a stream of ACIP into tsheg bars. If errors is
* non-null, error messages will be appended to it. You can * non-null, error messages will be appended to it. You can
* recover both errors and warnings (modulo offset information) * recover both errors and warnings (modulo offset information)
* from the result, though. Returns a list of TStrings that * from the result, though. They will be short messages iff
* is the scan, or null if more than maxErrors occur. <p>FIXME: * shortMessages is true. Returns a list of TStrings that is the
* not so efficient; copies the whole file into memory first. * scan, or null if more than maxErrors occur.
*
* <p>FIXME: not so efficient; copies the whole file into memory
* first.
*
* @throws IOException if we cannot read the whole ACIP stream */ * @throws IOException if we cannot read the whole ACIP stream */
public static ArrayList scanStream(InputStream stream, StringBuffer errors, public static ArrayList scanStream(InputStream stream, StringBuffer errors,
int maxErrors) int maxErrors, boolean shortMessages)
throws IOException throws IOException
{ {
StringBuffer s = new StringBuffer(); StringBuffer s = new StringBuffer();
@ -105,7 +117,7 @@ public class ACIPTshegBarScanner {
s.append(ch, 0, amt); s.append(ch, 0, amt);
} }
in.close(); in.close();
return scan(s.toString(), errors, maxErrors); return scan(s.toString(), errors, maxErrors, shortMessages);
} }
/** Helper. Here because ACIP {MTHAR%\nKHA} should be treated the /** Helper. Here because ACIP {MTHAR%\nKHA} should be treated the
@ -121,6 +133,40 @@ public class ACIPTshegBarScanner {
|| ((TString)al.get(i)).getType() == TString.TSHEG_BAR_ADORNMENT)); || ((TString)al.get(i)).getType() == TString.TSHEG_BAR_ADORNMENT));
} }
/** Helper function that increments numErrorsArray[0] by one and
adds an ERROR to the end of al and appends to the end of
errors if it is nonnull. (Nothing else is mutated.)
@return true if and only if the error count has gone too high
and caller should abort scanning */
private static boolean queueError(int code,
String translit,
boolean shortMessages,
int i,
int numNewlines,
int maxErrors,
ArrayList al,
StringBuffer errors,
int numErrorsArray[]) {
String errMsg;
al.add(new TString("ACIP",
errMsg = ErrorsAndWarnings.getMessage(code,
shortMessages,
translit),
TString.ERROR));
if (null != errors)
errors.append("Offset " + ((i < 0) ? "END" : ("" + i))
+ ((numNewlines == 0)
? ""
: (" or maybe " + (i-numNewlines)))
+ ": ERROR "
+ errMsg + "\n");
if (maxErrors >= 0 && ++numErrorsArray[0] >= maxErrors)
return true;
else
return false;
}
// DLC FIXME "H:\n\n" becomes "H: \n\n", wrongly I think. See // DLC FIXME "H:\n\n" becomes "H: \n\n", wrongly I think. See
// Tibetan! 5.1 section on formatting Tibetan texts. // Tibetan! 5.1 section on formatting Tibetan texts.
@ -145,17 +191,21 @@ public class ACIPTshegBarScanner {
* @param maxErrors if nonnegative, then scanning will stop when * @param maxErrors if nonnegative, then scanning will stop when
* more than maxErrors errors occur. In this event, null is * more than maxErrors errors occur. In this event, null is
* returned. * returned.
* @param shortMessages true iff you want short error and warning
* messages instead of long, self-contained error messages
* @return null if more than maxErrors errors occur, or the scan * @return null if more than maxErrors errors occur, or the scan
* otherwise * otherwise */
*/ public static ArrayList scan(String s, StringBuffer errors, int maxErrors,
public static ArrayList scan(String s, StringBuffer errors, int maxErrors) { boolean shortMessages) {
// FIXME: Use less memory and time by not adding in the
// warnings that are below threshold.
// the size depends on whether it's mostly Tibetan or mostly // the size depends on whether it's mostly Tibetan or mostly
// Latin and a number of other factors. This is meant to be // Latin and a number of other factors. This is meant to be
// an underestimate, but not too much of an underestimate. // an underestimate, but not too much of an underestimate.
int numErrors = 0;
ArrayList al = new ArrayList(s.length() / 10); ArrayList al = new ArrayList(s.length() / 10);
int numErrorsArray[] = new int[] { 0 };
boolean waitingForMatchingIllegalClose = false; boolean waitingForMatchingIllegalClose = false;
int sl = s.length(); int sl = s.length();
int currentType = TString.ERROR; int currentType = TString.ERROR;
@ -171,13 +221,9 @@ public class ACIPTshegBarScanner {
if (ch == '\n') ++numNewlines; if (ch == '\n') ++numNewlines;
if (TString.COMMENT == currentType && ch != ']') { if (TString.COMMENT == currentType && ch != ']') {
if ('[' == ch) { if ('[' == ch) {
String errMsg; if (queueError(102, "" + ch,
al.add(new TString("ACIP", errMsg = "Found an open bracket within a [#COMMENT]-style comment. Brackets may not appear in comments.", shortMessages, i, numNewlines, maxErrors, al, errors, numErrorsArray))
TString.ERROR)); return null;
if (null != errors)
errors.append("Offset " + i + ((numNewlines == 0) ? "" : (" or maybe " + (i-numNewlines))) + ": "
+ errMsg + "\n");
if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
} }
continue; continue;
} }
@ -191,22 +237,14 @@ public class ACIPTshegBarScanner {
currentType)); currentType));
} }
if (!waitingForMatchingIllegalClose) { if (!waitingForMatchingIllegalClose) {
String errMsg; if (queueError(103, "" + ch,
al.add(new TString("ACIP", errMsg = ("Found a truly unmatched close bracket, '" + s.substring(i, i+1)) + "'.", shortMessages, i, numNewlines, maxErrors, al, errors, numErrorsArray))
TString.ERROR)); return null;
if (null != errors) {
errors.append("Offset " + i + ((numNewlines == 0) ? "" : (" or maybe " + (i-numNewlines))) + ": "
+ errMsg + "\n");
}
if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
} }
waitingForMatchingIllegalClose = false; waitingForMatchingIllegalClose = false;
al.add(new TString("ACIP", "Found a closing bracket without a matching open bracket. Perhaps a [#COMMENT] incorrectly written as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], caused this.", if (queueError(104, "" + ch,
TString.ERROR)); shortMessages, i, numNewlines, maxErrors, al, errors, numErrorsArray))
if (null != errors) return null;
errors.append("Offset " + i + ((numNewlines == 0) ? "" : (" or maybe " + (i-numNewlines))) + ": "
+ "Found a closing bracket without a matching open bracket. Perhaps a [#COMMENT] incorrectly written as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], caused this.\n");
if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
startOfString = i+1; startOfString = i+1;
currentType = TString.ERROR; currentType = TString.ERROR;
} else { } else {
@ -459,16 +497,12 @@ public class ACIPTshegBarScanner {
// WITHOUT # MARKS]. Though "... [" could cause // WITHOUT # MARKS]. Though "... [" could cause
// this too. // this too.
if (waitingForMatchingIllegalClose) { if (waitingForMatchingIllegalClose) {
al.add(new TString("ACIP", "Found a truly unmatched open bracket, [ or {, prior to this current illegal open bracket.", if (queueError(105, "" + ch,
TString.ERROR)); shortMessages, i, numNewlines, maxErrors, al, errors, numErrorsArray))
if (null != errors) { return null;
errors.append("Offset " + i + ((numNewlines == 0) ? "" : (" or maybe " + (i-numNewlines))) + ": "
+ "Found a truly unmatched open bracket, [ or {, prior to this current illegal open bracket.\n");
}
if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
} }
waitingForMatchingIllegalClose = true; waitingForMatchingIllegalClose = true;
if (null != errors) { {
String inContext = s.substring(i, i+Math.min(sl-i, 10)); String inContext = s.substring(i, i+Math.min(sl-i, 10));
if (inContext.indexOf("\r") >= 0) { if (inContext.indexOf("\r") >= 0) {
inContext = inContext.substring(0, inContext.indexOf("\r")); inContext = inContext.substring(0, inContext.indexOf("\r"));
@ -479,11 +513,9 @@ public class ACIPTshegBarScanner {
inContext = inContext + "..."; inContext = inContext + "...";
} }
} }
al.add(new TString("ACIP", "Found an illegal open bracket (in context, this is " + inContext + "). Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open bracket?", if (queueError(106, inContext,
TString.ERROR)); shortMessages, i, numNewlines, maxErrors, al, errors, numErrorsArray))
errors.append("Offset " + i + ((numNewlines == 0) ? "" : (" or maybe " + (i-numNewlines))) + ": " return null;
+ "Found an illegal open bracket (in context, this is " + inContext + "). Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open bracket?\n");
if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
} }
startOfString = i + 1; startOfString = i + 1;
currentType = TString.ERROR; currentType = TString.ERROR;
@ -533,18 +565,21 @@ public class ACIPTshegBarScanner {
inContext = inContext + "..."; inContext = inContext + "...";
} }
} }
al.add(new TString("ACIP", "Found an illegal at sign, @ (in context, this is " + inContext + "). This folio marker has a period, '.', at the end of it, which is illegal.", if (queueError(107, inContext,
TString.ERROR)); shortMessages, i, numNewlines, maxErrors, al, errors, numErrorsArray))
if (null != errors) return null;
errors.append("Offset " + i + ((numNewlines == 0) ? "" : (" or maybe " + (i-numNewlines))) + ": "
+ "Found an illegal at sign, @ (in context, this is " + inContext + "). This folio marker has a period, '.', at the end of it, which is illegal.\n");
if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
startOfString = i+numdigits+3; startOfString = i+numdigits+3;
i = startOfString - 1; i = startOfString - 1;
currentType = TString.ERROR; currentType = TString.ERROR;
break; break;
} }
if (i+numdigits+4 < sl && (s.charAt(i+numdigits+4) == '.' || s.charAt(i+numdigits+4) == 'A' || s.charAt(i+numdigits+4) == 'B' || s.charAt(i+numdigits+4) == 'a' || s.charAt(i+numdigits+4) == 'b' || isNumeric(s.charAt(i+numdigits+4)))) { if (i+numdigits+4 < sl
&& (s.charAt(i+numdigits+4) == '.'
|| s.charAt(i+numdigits+4) == 'A'
|| s.charAt(i+numdigits+4) == 'B'
|| s.charAt(i+numdigits+4) == 'a'
|| s.charAt(i+numdigits+4) == 'b'
|| isNumeric(s.charAt(i+numdigits+4)))) {
String inContext = s.substring(i, i+Math.min(sl-i, 10)); String inContext = s.substring(i, i+Math.min(sl-i, 10));
if (inContext.indexOf("\r") >= 0) { if (inContext.indexOf("\r") >= 0) {
inContext = inContext.substring(0, inContext.indexOf("\r")); inContext = inContext.substring(0, inContext.indexOf("\r"));
@ -555,12 +590,9 @@ public class ACIPTshegBarScanner {
inContext = inContext + "..."; inContext = inContext + "...";
} }
} }
al.add(new TString("ACIP", "Found an illegal at sign, @ (in context, this is " + inContext + "). This folio marker is not followed by whitespace, as is expected.", if (queueError(108, inContext,
TString.ERROR)); shortMessages, i, numNewlines, maxErrors, al, errors, numErrorsArray))
if (null != errors) return null;
errors.append("Offset " + i + ((numNewlines == 0) ? "" : (" or maybe " + (i-numNewlines))) + ": "
+ "Found an illegal at sign, @ (in context, this is " + inContext + "). This folio marker is not followed by whitespace, as is expected.\n");
if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
startOfString = i+1; // FIXME: skip over more? test this code. startOfString = i+1; // FIXME: skip over more? test this code.
currentType = TString.ERROR; currentType = TString.ERROR;
break; break;
@ -651,12 +683,9 @@ public class ACIPTshegBarScanner {
inContext = inContext + "..."; inContext = inContext + "...";
} }
} }
al.add(new TString("ACIP", "Found an illegal at sign, @ (in context, this is " + inContext + "). @012B is an example of a legal folio marker.", if (queueError(109, inContext,
TString.ERROR)); shortMessages, i, numNewlines, maxErrors, al, errors, numErrorsArray))
if (null != errors) return null;
errors.append("Offset " + i + ((numNewlines == 0) ? "" : (" or maybe " + (i-numNewlines))) + ": "
+ "Found an illegal at sign, @ (in context, this is " + inContext + "). @012B is an example of a legal folio marker.\n");
if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
startOfString = i+1; startOfString = i+1;
currentType = TString.ERROR; currentType = TString.ERROR;
} }
@ -673,16 +702,9 @@ public class ACIPTshegBarScanner {
if (startSlashIndex >= 0) { if (startSlashIndex >= 0) {
if (startSlashIndex + 1 == i) { if (startSlashIndex + 1 == i) {
/* //NYA\\ appears in ACIP input, and I think if (queueError(110, "" + ch,
* it means /NYA/. We warn about // for this shortMessages, i, numNewlines, maxErrors, al, errors, numErrorsArray))
* reason. \\ causes a tsheg-bar error. */ return null;
al.add(new TString("ACIP", "Found //, which could be legal (the Unicode would be \\u0F3C\\u0F3D), but is likely in an illegal construct like //NYA\\\\.",
TString.ERROR));
if (errors != null) {
errors.append("Offset " + i + ((numNewlines == 0) ? "" : (" or maybe " + (i-numNewlines))) + ": "
+ "Found //, which could be legal (the Unicode would be \\u0F3C\\u0F3D), but is likely in an illegal construct like //NYA\\\\.\n");
}
if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
} }
al.add(new TString("ACIP", s.substring(i, i+1), al.add(new TString("ACIP", s.substring(i, i+1),
TString.END_SLASH)); TString.END_SLASH));
@ -712,12 +734,9 @@ public class ACIPTshegBarScanner {
if (startParenIndex >= 0) { if (startParenIndex >= 0) {
if (ch == '(') { if (ch == '(') {
al.add(new TString("ACIP", "Found an illegal open parenthesis, (. Nesting of parentheses is not allowed.", if (queueError(111, "" + ch,
TString.ERROR)); shortMessages, i, numNewlines, maxErrors, al, errors, numErrorsArray))
if (null != errors) return null;
errors.append("Offset " + i + ((numNewlines == 0) ? "" : (" or maybe " + (i-numNewlines))) + ": "
+ "Found an illegal open parenthesis, (. Nesting of parentheses is not allowed.\n");
if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
} else { } else {
al.add(new TString("ACIP", s.substring(i, i+1), TString.END_PAREN)); al.add(new TString("ACIP", s.substring(i, i+1), TString.END_PAREN));
startParenIndex = -1; startParenIndex = -1;
@ -726,12 +745,9 @@ public class ACIPTshegBarScanner {
currentType = TString.ERROR; currentType = TString.ERROR;
} else { } else {
if (ch == ')') { if (ch == ')') {
al.add(new TString("ACIP", "Unexpected closing parenthesis, ), found.", if (queueError(112, "" + ch,
TString.ERROR)); shortMessages, i, numNewlines, maxErrors, al, errors, numErrorsArray))
if (null != errors) return null;
errors.append("Offset " + i + ((numNewlines == 0) ? "" : (" or maybe " + (i-numNewlines))) + ": "
+ "Unexpected closing parenthesis, ), found.\n");
if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
} else { } else {
startParenIndex = i; startParenIndex = i;
al.add(new TString("ACIP", s.substring(i, i+1), TString.START_PAREN)); al.add(new TString("ACIP", s.substring(i, i+1), TString.START_PAREN));
@ -749,13 +765,9 @@ public class ACIPTshegBarScanner {
al.add(new TString("ACIP", s.substring(startOfString, i), al.add(new TString("ACIP", s.substring(startOfString, i),
currentType)); currentType));
} }
String errMsg = "The ACIP {?}, found alone, may intend U+0F08, but it may intend a question mark, i.e. '?', in the output. It may even mean that the original text could not be deciphered with certainty, like the ACIP {[?]} does."; if (queueError(113, "" + ch,
al.add(new TString("ACIP", errMsg, shortMessages, i, numNewlines, maxErrors, al, errors, numErrorsArray))
TString.ERROR)); return null;
if (null != errors)
errors.append("Offset " + i + ((numNewlines == 0) ? "" : (" or maybe " + (i-numNewlines))) + ": "
+ errMsg + "\n");
if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
startOfString = i+1; startOfString = i+1;
currentType = TString.ERROR; currentType = TString.ERROR;
} // else this is [*TR'A ?] or the like. } // else this is [*TR'A ?] or the like.
@ -780,7 +792,10 @@ public class ACIPTshegBarScanner {
|| (s.charAt(i+1) == '\r' || s.charAt(i+1) == '\n') || (s.charAt(i+1) == '\r' || s.charAt(i+1) == '\n')
|| (s.charAt(i+1) >= 'a' && s.charAt(i+1) <= 'z') || (s.charAt(i+1) >= 'a' && s.charAt(i+1) <= 'z')
|| (s.charAt(i+1) >= 'A' && s.charAt(i+1) <= 'Z')))) { || (s.charAt(i+1) >= 'A' && s.charAt(i+1) <= 'Z')))) {
al.add(new TString("ACIP", "A non-breaking tsheg, '.', appeared, but not like \"...,\" or \".,\" or \".dA\" or \".DA\".", al.add(new TString("ACIP",
ErrorsAndWarnings.getMessage(510,
shortMessages,
"" + ch),
TString.WARNING)); TString.WARNING));
} }
startOfString = i+1; startOfString = i+1;
@ -858,8 +873,11 @@ public class ACIPTshegBarScanner {
} }
if (!bad) if (!bad)
al.add(new TString("ACIP", "^", TString.TIBETAN_PUNCTUATION)); al.add(new TString("ACIP", "^", TString.TIBETAN_PUNCTUATION));
else else {
al.add(new TString("ACIP", "The ACIP {^} must precede a tsheg bar.", TString.ERROR)); if (queueError(131, "^",
shortMessages, i, numNewlines, maxErrors, al, errors, numErrorsArray))
return null;
}
} else { } else {
// Don't add in a "\r\n" or "\n" unless there's a // Don't add in a "\r\n" or "\n" unless there's a
// blank line. // blank line.
@ -871,8 +889,9 @@ public class ACIPTshegBarScanner {
|| ('\n' == ch && i >= 1 && s.charAt(i-1) == '\n')))) { || ('\n' == ch && i >= 1 && s.charAt(i-1) == '\n')))) {
for (int h = 0; h < (realNewline ? 2 : 1); h++) { for (int h = 0; h < (realNewline ? 2 : 1); h++) {
if (isTshegBarAdornment(ch) && !legalTshegBarAdornment) { if (isTshegBarAdornment(ch) && !legalTshegBarAdornment) {
al.add(new TString("ACIP", "The ACIP " + ch + " must be glued to the end of a tsheg bar, but this one was not", if (queueError(132, "" + ch,
TString.ERROR)); shortMessages, i, numNewlines, maxErrors, al, errors, numErrorsArray))
return null;
} else { } else {
al.add(new TString("ACIP", rn ? s.substring(i - 1, i+1) : s.substring(i, i+1), al.add(new TString("ACIP", rn ? s.substring(i - 1, i+1) : s.substring(i, i+1),
(legalTshegBarAdornment (legalTshegBarAdornment
@ -882,7 +901,10 @@ public class ACIPTshegBarScanner {
} }
} }
if ('%' == ch) { if ('%' == ch) {
al.add(new TString("ACIP", "The ACIP {%} is treated by this converter as U+0F35, but sometimes might represent U+0F14 in practice. To avoid seeing this warning again, change the input to use {\\u0F35} instead of {%}.", al.add(new TString("ACIP",
ErrorsAndWarnings.getMessage(504,
shortMessages,
"" + ch),
TString.WARNING)); TString.WARNING));
} }
} }
@ -909,11 +931,9 @@ public class ACIPTshegBarScanner {
currentType)); currentType));
} }
if ((int)ch == 65533) { if ((int)ch == 65533) {
al.add(new TString("ACIP", "Found an illegal, unprintable character.", if (queueError(114, "unknown character",
TString.ERROR)); shortMessages, i, numNewlines, maxErrors, al, errors, numErrorsArray))
if (null != errors) return null;
errors.append("Offset " + i + ((numNewlines == 0) ? "" : (" or maybe " + (i-numNewlines))) + ": "
+ "Found an illegal, unprintable character.\n");
} else if ('\\' == ch) { } else if ('\\' == ch) {
int x = -1; int x = -1;
if (!ThdlOptions.getBooleanOption("thdl.tib.text.disallow.unicode.character.escapes.in.acip") if (!ThdlOptions.getBooleanOption("thdl.tib.text.disallow.unicode.character.escapes.in.acip")
@ -934,22 +954,15 @@ public class ACIPTshegBarScanner {
startOfString = i+1; startOfString = i+1;
break; break;
} else { } else {
final String msg if (queueError(115, "\\",
= "Found a backslash, \\, which the ACIP Tibetan Input Code standard says represents a Sanskrit virama. In practice, though, this is so often misused (to represent U+0F3D) that {\\} always generates this error. If you want a Sanskrit virama, change the input document to use {\\u0F84} instead of {\\}. If you want U+0F3D, use {/NYA/} or {/NYA\\u0F3D}."; shortMessages, i, numNewlines, maxErrors, al, errors, numErrorsArray))
al.add(new TString("ACIP", msg, return null;
TString.ERROR));
if (null != errors)
errors.append("Offset " + i + ((numNewlines == 0) ? "" : (" or maybe " + (i-numNewlines))) + ": "
+ msg + "\n");
} }
} else { } else {
al.add(new TString("ACIP", "Found an illegal character, " + ch + ", with ordinal " + (int)ch + ".", if (queueError(116, "" + ch,
TString.ERROR)); shortMessages, i, numNewlines, maxErrors, al, errors, numErrorsArray))
if (null != errors) return null;
errors.append("Offset " + i + ((numNewlines == 0) ? "" : (" or maybe " + (i-numNewlines))) + ": "
+ "Found an illegal character, " + ch + ", with ordinal " + (int)ch + ".\n");
} }
if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
startOfString = i+1; startOfString = i+1;
currentType = TString.ERROR; currentType = TString.ERROR;
} else { } else {
@ -965,38 +978,25 @@ public class ACIPTshegBarScanner {
currentType)); currentType));
} }
if (waitingForMatchingIllegalClose) { if (waitingForMatchingIllegalClose) {
al.add(new TString("ACIP", "UNEXPECTED END OF INPUT", if (queueError(117, "-*-END OF FILE-*-",
TString.ERROR)); shortMessages, -1, numNewlines, maxErrors, al, errors, numErrorsArray))
if (null != errors) { return null;
errors.append("Offset END: "
+ "Truly unmatched open bracket found.\n");
}
if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
} }
if (!bracketTypeStack.empty()) { if (!bracketTypeStack.empty()) {
al.add(new TString("ACIP", "Unmatched open bracket found. A " + ((TString.COMMENT == currentType) ? "comment" : "correction") + " does not terminate.", if (queueError(((TString.COMMENT == currentType) ? 118 : 119),
TString.ERROR)); "-*-END OF FILE-*-",
if (null != errors) { shortMessages, -1, numNewlines, maxErrors, al, errors, numErrorsArray))
errors.append("Offset END: " return null;
+ "Unmatched open bracket found. A " + ((TString.COMMENT == currentType) ? "comment" : "correction") + " does not terminate.\n");
}
if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
} }
if (startSlashIndex >= 0) { if (startSlashIndex >= 0) {
al.add(new TString("ACIP", "Slashes are supposed to occur in pairs, but the input had an unmatched '/' character.", if (queueError(120, "-*-END OF FILE-*-",
TString.ERROR)); shortMessages, -1, numNewlines, maxErrors, al, errors, numErrorsArray))
if (null != errors) return null;
errors.append("Offset END: "
+ "Slashes are supposed to occur in pairs, but the input had an unmatched '/' character.\n");
if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
} }
if (startParenIndex >= 0) { if (startParenIndex >= 0) {
al.add(new TString("ACIP", "Parentheses are supposed to occur in pairs, but the input had an unmatched parenthesis.", if (queueError(121, "-*-END OF FILE-*-",
TString.ERROR)); shortMessages, -1, numNewlines, maxErrors, al, errors, numErrorsArray))
if (null != errors) return null;
errors.append("Offset END: "
+ "Unmatched open parenthesis, (, found.\n");
if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
} }
return al; return al;
} }

View file

@ -0,0 +1,392 @@
/*
The contents of this file are subject to the THDL Open Community License
Version 1.0 (the "License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License on the THDL web site
(http://www.thdl.org/).
Software distributed under the License is distributed on an "AS IS" basis,
WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
License for the specific terms governing rights and limitations under the
License.
The Initial Developer of this software is the Tibetan and Himalayan Digital
Library (THDL). Portions created by the THDL are Copyright 2004 THDL.
All Rights Reserved.
Contributor(s): ______________________________________.
*/
package org.thdl.tib.text.ttt;
import org.thdl.util.ThdlDebug;
import org.thdl.util.ThdlOptions;
import java.util.HashMap;
/** A noninstantiable class that knows about every user-visible error
* or warning message. Each has a unique integer key starting at 101
* for those messages that are errors and starting at 501 for those
* messages that are warnings. This class knows which messages are
* enabled for a given warning level (which is customizable via user
* preferences), whether a message is a warning or an error (which
* could be made configurable at runtime -- easily if you just want
* to upgrade a warning to an error -- FIXME), and how to produce
* both a short and a long error message.
*
* @author David Chandler */
public class ErrorsAndWarnings {
/** Don't instantiate this class. */
private ErrorsAndWarnings() { }
/** Maps int -> severityString, where severityString is
"ERROR".intern() for errors or "All".intern(),
"Some".intern(), or "Most".intern() for warnings that are
enabled or "DISABLED".intern() for disabled
warnings/errors. */
private static HashMap severityMap = new HashMap();
static {
setupSeverityMapFromBuiltinDefaults();
}
/** Returns higher numbers for higher severity. */
private static int severityStringToInteger(String sev) {
if (sev == "ERROR") return Integer.MAX_VALUE;
if (sev == "Some") return Integer.MAX_VALUE - 1;
if (sev == "Most") return Integer.MAX_VALUE - 2;
if (sev == "All") return Integer.MAX_VALUE - 3;
return 0;
}
/** Returns true if and only if sev1 is at least as severe as
sev2. "ERROR" means an error, the highest severity; "Some" is
the most severe warning; "Most" and "All" follow. Other
values are less severe than these.
@param sev1 an interned String or null
@param sev2 an interned String or null */
private static boolean severityGreaterThanOrEquals(String sev1,
String sev2) {
return severityStringToInteger(sev1) >= severityStringToInteger(sev2);
}
/** Returns true if and only if the warning or error with number
code is enabled for the given warningLevel. Errors are
enabled regardless of warningLevel. */
static boolean isEnabled(int code, String warningLevel) {
// unknown codes appear to be disabled, but let's make sure
// that no unknown code is used during development:
ThdlDebug.verify("Unknown error/warning code " + code,
null != severityMap.get(new Integer(code)));
return severityGreaterThanOrEquals((String)severityMap.get(new Integer(code)),
warningLevel);
}
/** Returns true if and only if code is an error and not a warning
at the moment. */
static boolean isError(int code) {
return ("ERROR" == severityMap.get(new Integer(code)));
}
/** Returns an error or warning message concerning the snippet of
ACIP or EWTS translit. The warning or error
number is code, and the message will be very short, like "101:
{NNYA}" if short is true, or longer and self-contained if
short is false. Note that you cannot call this for certain
messages that take more than one "parameter", if you will,
like message 501. */
static String getMessage(int code, boolean shortMessages,
String translit) {
// Let's make sure that no unknown code is used during
// development:
ThdlDebug.verify("unknown code " + code,
null != severityMap.get(new Integer(code)));
if (shortMessages) {
if ("(".equals(translit)
|| ")".equals(translit)
|| "{".equals(translit)
|| "}".equals(translit)
|| "[".equals(translit)
|| "]".equals(translit)
|| "<".equals(translit)
|| ">".equals(translit))
return "" + code + ": '" + translit + "'";
else
return "" + code + ": {" + translit + "}";
}
// else:
switch (code) {
// ERRORS:
case 101:
return "" + code + ": There's not even a unique, non-illegal parse for {" + translit + "}";
case 102:
return "" + code + ": Found an open bracket, '" + translit + "', within a [#COMMENT]-style comment. Brackets may not appear in comments.";
case 103:
return "" + code + ": Found a truly unmatched close bracket, '" + translit + "'.";
case 104:
return "" + code + ": Found a closing bracket, '" + translit + "', without a matching open bracket. Perhaps a [#COMMENT] incorrectly written as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], caused this.";
case 105:
return "" + code + ": Found a truly unmatched open bracket, '[' or '{', prior to this current illegal open bracket, '" + translit + "'.";
case 106:
return "" + code + ": Found an illegal open bracket (in context, this is '" + translit + "'). Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open bracket?";
case 107:
return "" + code + ": Found an illegal at sign, @ (in context, this is " + translit + "). This folio marker has a period, '.', at the end of it, which is illegal.";
case 108:
return "" + code + ": Found an illegal at sign, @ (in context, this is " + translit + "). This folio marker is not followed by whitespace, as is expected.";
case 109:
return "" + code + ": Found an illegal at sign, @ (in context, this is " + translit + "). @012B is an example of a legal folio marker.";
case 110:
/*
//NYA\\ appears in ACIP input, and I think it means
/////NYA/. We warn about // for this reason. \\ causes
a tsheg-bar //error.
*/
return "" + code + ": Found //, which could be legal (the Unicode would be \\u0F3C\\u0F3D), but is likely in an illegal construct like //NYA\\\\.";
case 111:
return "" + code + ": Found an illegal open parenthesis, '('. Nesting of parentheses is not allowed.";
case 112:
return "" + code + ": Unexpected closing parenthesis, ')', found.";
case 113:
return "" + code + ": The ACIP {?}, found alone, may intend U+0F08, but it may intend a question mark, i.e. '?', in the output. It may even mean that the original text could not be deciphered with certainty, like the ACIP {[?]} does.";
case 114:
return "" + code + ": Found an illegal, unprintable character.";
case 115:
return "" + code + ": Found a backslash, \\, which the ACIP Tibetan Input Code standard says represents a Sanskrit virama. In practice, though, this is so often misused (to represent U+0F3D) that {\\} always generates this error. If you want a Sanskrit virama, change the input document to use {\\u0F84} instead of {\\}. If you want U+0F3D, use {/NYA/} or {/NYA\\u0F3D}.";
case 116:
ThdlDebug.verify(translit.length() == 1);
return "" + code + ": Found an illegal character, '" + translit + "', with ordinal (in decimal) " + (int)translit.charAt(0) + ".";
case 117:
return "" + code + ": Unexpected end of input; truly unmatched open bracket found.";
case 118:
return "" + code + ": Unmatched open bracket found. A comment does not terminate.";
case 119:
return "" + code + ": Unmatched open bracket found. A correction does not terminate.";
case 120:
return "" + code + ": Slashes are supposed to occur in pairs, but the input had an unmatched '/' character.";
case 121:
return "" + code + ": Parentheses are supposed to occur in pairs, but the input had an unmatched parenthesis, '('.";
case 122:
return "" + code + ": Warning, empty tsheg bar found while converting from ACIP!";
case 123:
return "" + code + ": Cannot convert ACIP {" + translit + "} because it contains a number but also a non-number.";
case 124:
return "" + code + ": Cannot convert ACIP {" + translit + "} because {V}, wa-zur, appears without being subscribed to a consonant.";
case 125:
return "" + code + ": Cannot convert ACIP {" + translit + "} because we would be required to assume that {A} is a consonant, when it is not clear if it is a consonant or a vowel.";
case 126:
return "" + code + ": Cannot convert ACIP {" + translit + "} because it ends with a '+'.";
case 127:
return "" + code + ": Cannot convert ACIP {" + translit + "} because it ends with a '-'.";
case 128: // fall through
case 129:
throw new Error("No; error messages 128 and 129 are handled elsewhere.");
case 130:
return "" + code + ": The tsheg bar (\"syllable\") {" + translit + "} is essentially nothing.";
case 131:
return "" + code + ": The ACIP caret, {^}, must precede a tsheg bar.";
case 132:
return "" + code + ": The ACIP {" + translit + "} must be glued to the end of a tsheg bar, but this one was not.";
case 133:
return "" + code + ": Cannot convert the ACIP {" + translit + "} to Tibetan because it is unclear what the result should be.";
case 134:
return "" + code + ": The tsheg bar (\"syllable\") {" + translit + "} has no legal parses.";
case 135:
ThdlDebug.verify(translit.length() == 1);
return "" + code + ": The Unicode escape '" + translit + "' with ordinal (in decimal) " + (int)translit.charAt(0) + " is specified by the Extended Wylie Transliteration Scheme (EWTS), but is in the private-use area (PUA) of Unicode and will thus not be written out into the output lest you think other tools will be able to understand this non-standard construction.";
case 136:
ThdlDebug.verify(translit.length() == 1);
return "" + code + ": The Unicode escape with ordinal (in decimal) " + (int)translit.charAt(0) + " does not match up with any TibetanMachineWeb glyph.";
// See 137 below.
case 138:
ThdlDebug.verify(translit.length() == 1);
return "" + code + ": The Unicode escape '" + translit + "' with ordinal (in decimal) " + (int)translit.charAt(0) + " is in the Tibetan range of Unicode (i.e., [U+0F00, U+0FFF]), but is a reserved code in that area.";
// WARNINGS (by default):
case 501:
throw new Error("Nah -- we handle this one in the code because the message is complicated for 501");
case 502:
return "" + code + ": The last stack does not have a vowel in {" + translit + "}; this may indicate a typo, because Sanskrit, which this probably is (because it's not legal Tibetan), should have a vowel after each stack.";
case 503:
return "" + code + ": Though {" + translit + "} is unambiguous, it would be more computer-friendly if '+' signs were used to stack things because there are two (or more) ways to interpret this ACIP if you're not careful.";
case 504:
return "" + code + ": The ACIP {" + translit + "} is treated by this converter as U+0F35, but sometimes might represent U+0F14 in practice. To avoid seeing this warning again, change the input to use {\\u0F35} instead of {" + translit + "}.";
case 505:
return "" + code + ": There is a useless disambiguator in {" + translit + "}.";
case 506:
return "" + code + ": There is a stack of three or more consonants in {" + translit + "} that uses at least one '+' but does not use a '+' between each consonant.";
case 507:
return "" + code + ": There is a chance that the ACIP {" + translit + "} was intended to represent more consonants than we parsed it as representing -- NNYA, e.g., means N+NYA, but you can imagine seeing N+N+YA and typing NNYA for it too.";
case 508: // see 509 also
return "" + code + ": The ACIP {" + translit + "} has been interpreted as two stacks, not one, but you may wish to confirm that the original text had two stacks as it would be an easy mistake to make to see one stack (because there is such a stack used in Sanskrit transliteration for this particular sequence) and forget to input it with '+' characters.";
case 509: // see 508 also
return "" + code + ": The ACIP {" + translit + "} has an initial sequence that has been interpreted as two stacks, a prefix and a root stack, not one nonnative stack, but you may wish to confirm that the original text had two stacks as it would be an easy mistake to make to see one stack (because there is such a stack used in Sanskrit transliteration for this particular sequence) and forget to input it with '+' characters.";
case 510:
return "" + code + ": A non-breaking tsheg, '" + translit + "', appeared, but not like \"...,\" or \".,\" or \".dA\" or \".DA\".";
// ERROR 137 and WARNING 511 are the same:
case 137: /* fall through */
case 511:
return "" + code + ": The ACIP {" + translit + "} cannot be represented with the TibetanMachine or TibetanMachineWeb fonts because no such glyph exists in these fonts.";
// NEVER HAPPENS:
default:
ThdlDebug.verify("switch statement is missing a case",
false);
return "unknown error or warning with number " + code;
}
}
/** Returns true iff warningLevel is one of "All".intern(),
"Most".intern(), or "Some".intern(). */
static boolean warningLevelIsKnown(String warningLevel) {
return (warningLevel == "Some"
|| warningLevel == "Most"
|| warningLevel == "All");
}
private static final int MIN_ERROR = 101; // inclusive
private static final int MAX_ERROR = 138; // inclusive
private static final int MIN_WARNING = 501; // inclusive
private static final int MAX_WARNING = 511; // inclusive
private static void setupSeverityMapFromBuiltinDefaults() {
// errors:
for (int i = MIN_ERROR; i <= MAX_ERROR; i++) {
severityMap.put(new Integer(i), "ERROR");
}
// warnings:
String[] defaultSeverities = new String[] {
// 501:
"Most",
// 502:
"All",
// 503:
"All",
// 504:
"Some",
// 505:
"Some",
// 506:
"Some",
// 507:
"Most",
// 508:
"Some",
// 509:
"Most",
// 510:
"Some",
// 511:
"Some",
};
for (int num = MIN_WARNING; num <= MAX_WARNING; num++) {
String opt = ThdlOptions.getStringOption("thdl.acip.to.tibetan.warning.severity." + num);
if (null != opt) {
opt = opt.intern();
if ("None" == opt || "DISABLED" == opt)
opt = "DISABLED";
else if (!(opt == "Most"
|| opt == "All"
|| opt == "Some"))
opt = null;
} else {
if (!ThdlOptions.getBooleanOption("thdl.acip.to.tibetan.warning.and.error.severities.are.built.in.defaults"))
ThdlDebug.verify("options.txt is gone?", false);
}
ThdlDebug.verify((null == opt) || opt.intern() == opt);
severityMap.put(new Integer(num), (null != opt) ? opt : defaultSeverities[num - 501]);
}
// DLC FIXME: make 506 an error? or a new, super-high priority class of warning?
// DLC FIXME: you can't turn 504 or 510 down (e.g., to an "All"-level warning)
}
/** Prints out the long forms of the error messages, which will
help a user to decipher the short forms. */
public static void printErrorAndWarningDescriptions(java.io.PrintStream out) {
final String translit = "X";
out.println("ACIP->Tibetan ERRORS are as follows, and appear in their short forms, embedded");
out.println("in the output, like [#ERROR 130: {X}]:");
out.println("");
for (int num = MIN_ERROR; num <= MAX_ERROR; num++) {
if (128 == num) {
out.println("128: Cannot convert ACIP {" + translit + "} because " + "A:" + " is a \"vowel\" without an associated consonant.");
} else if (129 == num) {
out.println("129: Cannot convert ACIP {" + translit + "} because " + "+" + " is not an ACIP consonant.");
} else {
out.println(getMessage(num, false, translit));
}
out.println("");
}
out.println("ACIP->Tibetan WARNINGS are as follows, and appear in their short forms, embedded");
out.println("in the output, like [#WARNING 510: {X}]:");
out.println("");
for (int num = MIN_WARNING; num <= MAX_WARNING; num++) {
if (501 == num) {
out.println("501: Using " + translit + ", but only because the tool's knowledge of prefix rules (see the documentation) says that " + "XX" + " is not a legal Tibetan tsheg bar (\"syllable\")");
} else {
out.println(getMessage(num, false, translit));
}
out.println("");
}
}
}

View file

@ -72,6 +72,8 @@ public class LotsOfTshegBarsTest extends TestCase {
// We don't want to use options.txt: // We don't want to use options.txt:
ThdlOptions.forTestingOnlyInitializeWithoutDefaultOptionsFile(); ThdlOptions.forTestingOnlyInitializeWithoutDefaultOptionsFile();
ThdlOptions.setUserPreference("thdl.acip.to.tibetan.warning.and.error.severities.are.built.in.defaults", "true");
// We don't want to load the TM or TMW font files ourselves: // We don't want to load the TM or TMW font files ourselves:
ThdlOptions.setUserPreference("thdl.rely.on.system.tmw.fonts", true); ThdlOptions.setUserPreference("thdl.rely.on.system.tmw.fonts", true);
ThdlOptions.setUserPreference("thdl.rely.on.system.tm.fonts", true); ThdlOptions.setUserPreference("thdl.rely.on.system.tm.fonts", true);

View file

@ -43,6 +43,8 @@ public class PackageTest extends TestCase {
// We don't want to use options.txt: // We don't want to use options.txt:
ThdlOptions.forTestingOnlyInitializeWithoutDefaultOptionsFile(); ThdlOptions.forTestingOnlyInitializeWithoutDefaultOptionsFile();
ThdlOptions.setUserPreference("thdl.acip.to.tibetan.warning.and.error.severities.are.built.in.defaults", "true");
// We don't want to load the TM or TMW font files ourselves: // We don't want to load the TM or TMW font files ourselves:
ThdlOptions.setUserPreference("thdl.rely.on.system.tmw.fonts", true); ThdlOptions.setUserPreference("thdl.rely.on.system.tmw.fonts", true);
ThdlOptions.setUserPreference("thdl.rely.on.system.tm.fonts", true); ThdlOptions.setUserPreference("thdl.rely.on.system.tm.fonts", true);
@ -57,7 +59,7 @@ public class PackageTest extends TestCase {
which may be an error message. */ which may be an error message. */
static String ACIP2TMW2ACIP(String ACIP) { static String ACIP2TMW2ACIP(String ACIP) {
StringBuffer errors = new StringBuffer(); StringBuffer errors = new StringBuffer();
ArrayList al = ACIPTshegBarScanner.scan(ACIP, errors, -1); ArrayList al = ACIPTshegBarScanner.scan(ACIP, errors, -1, false);
if (null == al || errors.length() > 0) if (null == al || errors.length() > 0)
return null; return null;
org.thdl.tib.text.TibetanDocument tdoc org.thdl.tib.text.TibetanDocument tdoc
@ -72,6 +74,7 @@ public class PackageTest extends TestCase {
false, false,
"None", "None",
false, false,
false,
loc)) loc))
return null; return null;
} catch (java.io.IOException e) { } catch (java.io.IOException e) {
@ -151,10 +154,11 @@ public class PackageTest extends TestCase {
assertTrue(null == expectedLegalParses || expectedLegalParses.length == 0); assertTrue(null == expectedLegalParses || expectedLegalParses.length == 0);
return; return;
} else { } else {
if (pt.getWarning("Most", l, acip) != null) { String s;
System.out.println(pt.getWarning("Most", l, acip)); if ((s = pt.getWarning("Most", l, acip, false)) != null) {
} else if (pt.getWarning("All", l, acip) != null) System.out.println(s);
if (sdebug || debug) System.out.println("Paranoiac warning is this: " + pt.getWarning("All", l, acip)); } else if ((s = pt.getWarning("All", l, acip, false)) != null)
if (sdebug || debug) System.out.println("Paranoiac warning is this: " + s);
} }
int np = pt.numberOfParses(); int np = pt.numberOfParses();
boolean goodness = expectedParses == null || expectedParses.length == np; boolean goodness = expectedParses == null || expectedParses.length == np;
@ -239,8 +243,8 @@ public class PackageTest extends TestCase {
System.out.println("allLegalParses are " + allLegalParses + " and legalParses are " + legalParses); System.out.println("allLegalParses are " + allLegalParses + " and legalParses are " + legalParses);
} }
} }
if (l.getACIPError() != null) if (l.getACIPError(acip, false) != null)
System.out.println("ACIPError: " + l.getACIPError()); System.out.println("ACIPError: " + l.getACIPError(acip, false));
if (!l.recoverACIP().equals(acip) if (!l.recoverACIP().equals(acip)
&& (acip.indexOf("A+") < 1) // which becomes +, e.g. {NA+YA} && (acip.indexOf("A+") < 1) // which becomes +, e.g. {NA+YA}
&& (acip.indexOf('0') < 0) && (acip.indexOf('0') < 0)
@ -297,7 +301,7 @@ public class PackageTest extends TestCase {
} }
/** Tests {@link TPairListFactory#breakACIPIntoChunks(String, /** Tests {@link TPairListFactory#breakACIPIntoChunks(String,
* boolean)}, {@link TPairList#getACIPError()}, and {@link * boolean)}, {@link TPairList#getACIPError(String, boolean)}, and {@link
* TPairList#recoverACIP()}. */ * TPairList#recoverACIP()}. */
public void testBreakACIPIntoChunks() { public void testBreakACIPIntoChunks() {
tstHelper("GASN"); // ambiguous with regard to prefix rules tstHelper("GASN"); // ambiguous with regard to prefix rules
@ -7204,7 +7208,7 @@ tstHelper("ZUR");
private static void shelp(String s, String expectedErrors, String expectedScan) { private static void shelp(String s, String expectedErrors, String expectedScan) {
StringBuffer errors = new StringBuffer(); StringBuffer errors = new StringBuffer();
ArrayList al = ACIPTshegBarScanner.scan(s, errors, -1); ArrayList al = ACIPTshegBarScanner.scan(s, errors, -1, false);
if (null != expectedScan) { if (null != expectedScan) {
if (!al.toString().equals(expectedScan)) { if (!al.toString().equals(expectedScan)) {
System.out.println("Scanning " + s + " into tsheg bars was expected to cause the following scan:"); System.out.println("Scanning " + s + " into tsheg bars was expected to cause the following scan:");
@ -7225,7 +7229,8 @@ tstHelper("ZUR");
} }
} }
/** Tests {@link ACIPTshegBarScanner#scan(String, StringBuffer, int)}. */ /** Tests {@link ACIPTshegBarScanner#scan(String, StringBuffer,
int, boolean)}. */
public void testScanner() { public void testScanner() {
shelp("Pm KA", "", "[TIBETAN_NON_PUNCTUATION:{Pm}, TIBETAN_PUNCTUATION:{ }, TIBETAN_NON_PUNCTUATION:{KA}]"); shelp("Pm KA", "", "[TIBETAN_NON_PUNCTUATION:{Pm}, TIBETAN_PUNCTUATION:{ }, TIBETAN_NON_PUNCTUATION:{KA}]");
@ -7236,7 +7241,7 @@ tstHelper("ZUR");
"[TIBETAN_NON_PUNCTUATION:{LA}, TIBETAN_PUNCTUATION:{.}, TIBETAN_PUNCTUATION:{.}, TIBETAN_PUNCTUATION:{.}, TIBETAN_NON_PUNCTUATION:{SGRUB}]"); "[TIBETAN_NON_PUNCTUATION:{LA}, TIBETAN_PUNCTUATION:{.}, TIBETAN_PUNCTUATION:{.}, TIBETAN_PUNCTUATION:{.}, TIBETAN_NON_PUNCTUATION:{SGRUB}]");
shelp("PAS... LA", shelp("PAS... LA",
"", "",
"[TIBETAN_NON_PUNCTUATION:{PAS}, TIBETAN_PUNCTUATION:{.}, TIBETAN_PUNCTUATION:{.}, TIBETAN_PUNCTUATION:{.}, WARNING:{A non-breaking tsheg, '.', appeared, but not like \"...,\" or \".,\" or \".dA\" or \".DA\".}, TIBETAN_PUNCTUATION:{ }, TIBETAN_NON_PUNCTUATION:{LA}]"); "[TIBETAN_NON_PUNCTUATION:{PAS}, TIBETAN_PUNCTUATION:{.}, TIBETAN_PUNCTUATION:{.}, TIBETAN_PUNCTUATION:{.}, WARNING:{510: A non-breaking tsheg, '.', appeared, but not like \"...,\" or \".,\" or \".dA\" or \".DA\".}, TIBETAN_PUNCTUATION:{ }, TIBETAN_NON_PUNCTUATION:{LA}]");
shelp("^GONG SA,", shelp("^GONG SA,",
"", "",
"[TIBETAN_PUNCTUATION:{^}, TIBETAN_NON_PUNCTUATION:{GONG}, TIBETAN_PUNCTUATION:{ }, TIBETAN_NON_PUNCTUATION:{SA}, TIBETAN_PUNCTUATION:{,}]"); "[TIBETAN_PUNCTUATION:{^}, TIBETAN_NON_PUNCTUATION:{GONG}, TIBETAN_PUNCTUATION:{ }, TIBETAN_NON_PUNCTUATION:{SA}, TIBETAN_PUNCTUATION:{,}]");
@ -7257,42 +7262,43 @@ tstHelper("ZUR");
// {^GONG SA}, but {^ GONG SA} isn't so obvious. We give an // {^GONG SA}, but {^ GONG SA} isn't so obvious. We give an
// error. // error.
shelp("^ GONG SA,", shelp("^ GONG SA,",
"", "Offset 0: ERROR 131: The ACIP caret, {^}, must precede a tsheg bar.\n",
"[ERROR:{The ACIP {^} must precede a tsheg bar.}, TIBETAN_PUNCTUATION:{ }, TIBETAN_PUNCTUATION:{ }, TIBETAN_NON_PUNCTUATION:{GONG}, TIBETAN_PUNCTUATION:{ }, TIBETAN_NON_PUNCTUATION:{SA}, TIBETAN_PUNCTUATION:{,}]"); "[ERROR:{131: The ACIP caret, {^}, must precede a tsheg bar.}, TIBETAN_PUNCTUATION:{ }, TIBETAN_PUNCTUATION:{ }, TIBETAN_NON_PUNCTUATION:{GONG}, TIBETAN_PUNCTUATION:{ }, TIBETAN_NON_PUNCTUATION:{SA}, TIBETAN_PUNCTUATION:{,}]");
shelp("^\n\nGONG SA,", shelp("^\n\nGONG SA,",
"", "Offset 0: ERROR 131: The ACIP caret, {^}, must precede a tsheg bar.\n",
"[ERROR:{The ACIP {^} must precede a tsheg bar.}, TIBETAN_PUNCTUATION:{\n}, TIBETAN_PUNCTUATION:{\n}, TIBETAN_NON_PUNCTUATION:{GONG}, TIBETAN_PUNCTUATION:{ }, TIBETAN_NON_PUNCTUATION:{SA}, TIBETAN_PUNCTUATION:{,}]"); "[ERROR:{131: The ACIP caret, {^}, must precede a tsheg bar.}, TIBETAN_PUNCTUATION:{\n}, TIBETAN_PUNCTUATION:{\n}, TIBETAN_NON_PUNCTUATION:{GONG}, TIBETAN_PUNCTUATION:{ }, TIBETAN_NON_PUNCTUATION:{SA}, TIBETAN_PUNCTUATION:{,}]");
shelp("", "", "[]"); shelp("", "", "[]");
shelp("[DD]", ""); shelp("[DD]", "");
shelp("[", shelp("[",
"Offset 0: Found an illegal open bracket (in context, this is [). Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open bracket?\nOffset END: Truly unmatched open bracket found.\n"); "Offset 0: ERROR 106: Found an illegal open bracket (in context, this is '['). Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open bracket?\nOffset END: ERROR 117: Unexpected end of input; truly unmatched open bracket found.\n");
shelp("{", shelp("{",
"Offset 0: Found an illegal open bracket (in context, this is {). Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open bracket?\nOffset END: Truly unmatched open bracket found.\n"); "Offset 0: ERROR 106: Found an illegal open bracket (in context, this is '{'). Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open bracket?\nOffset END: ERROR 117: Unexpected end of input; truly unmatched open bracket found.\n");
shelp("DD", ""); shelp("DD", "");
shelp("DD]", shelp("DD]",
"Offset 2: Found a truly unmatched close bracket, ']'.\nOffset 2: Found a closing bracket without a matching open bracket. Perhaps a [#COMMENT] incorrectly written as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], caused this.\n"); "Offset 2: ERROR 103: Found a truly unmatched close bracket, ']'.\nOffset 2: ERROR 104: Found a closing bracket, ']', without a matching open bracket. Perhaps a [#COMMENT] incorrectly written as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], caused this.\n");
shelp("///NYA", "Offset 1: Found //, which could be legal (the Unicode would be \\u0F3C\\u0F3D), but is likely in an illegal construct like //NYA\\\\.\nOffset END: Slashes are supposed to occur in pairs, but the input had an unmatched '/' character.\n"); shelp("///NYA", "Offset 1: ERROR 110: Found //, which could be legal (the Unicode would be \\u0F3C\\u0F3D), but is likely in an illegal construct like //NYA\\\\.\nOffset END: ERROR 120: Slashes are supposed to occur in pairs, but the input had an unmatched '/' character.\n");
shelp("/NYA/", ""); shelp("/NYA/", "");
shelp("[?][BP][LS][DD1][DD2][DDD][DR][# (<{A COMMENT)}>]", ""); shelp("[?][BP][LS][DD1][DD2][DDD][DR][# (<{A COMMENT)}>]", "");
shelp("[LS][# A [[[[[COMMENT][LS]", shelp("[LS][# A [[[[[COMMENT][LS]",
"Offset 9: Found an open bracket within a [#COMMENT]-style comment. Brackets may not appear in comments.\n" "Offset 9: ERROR 102: Found an open bracket, '[', within a [#COMMENT]-style comment. Brackets may not appear in comments.\n"
+ "Offset 10: Found an open bracket within a [#COMMENT]-style comment. Brackets may not appear in comments.\n" + "Offset 10: ERROR 102: Found an open bracket, '[', within a [#COMMENT]-style comment. Brackets may not appear in comments.\n"
+ "Offset 11: Found an open bracket within a [#COMMENT]-style comment. Brackets may not appear in comments.\n" + "Offset 11: ERROR 102: Found an open bracket, '[', within a [#COMMENT]-style comment. Brackets may not appear in comments.\n"
+ "Offset 12: Found an open bracket within a [#COMMENT]-style comment. Brackets may not appear in comments.\n" + "Offset 12: ERROR 102: Found an open bracket, '[', within a [#COMMENT]-style comment. Brackets may not appear in comments.\n"
+ "Offset 13: Found an open bracket within a [#COMMENT]-style comment. Brackets may not appear in comments.\n"); + "Offset 13: ERROR 102: Found an open bracket, '[', within a [#COMMENT]-style comment. Brackets may not appear in comments.\n");
shelp("[ILLEGAL COMMENT]", shelp("[ILLEGAL COMMENT]",
"Offset 0: Found an illegal open bracket (in context, this is [ILLEGAL C...). Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open bracket?\nOffset 16: Found a closing bracket without a matching open bracket. Perhaps a [#COMMENT] incorrectly written as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], caused this.\n"); "Offset 0: ERROR 106: Found an illegal open bracket (in context, this is '[ILLEGAL C...'). Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open bracket?\nOffset 16: ERROR 104: Found a closing bracket, ']', without a matching open bracket. Perhaps a [#COMMENT] incorrectly written as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], caused this.\n");
shelp("(BSKYABS GRO)", ""); shelp("(BSKYABS GRO)", "");
shelp("BSKYABS GRO)", "Offset 11: Unexpected closing parenthesis, ), found.\n"); shelp("BSKYABS GRO)", "Offset 11: ERROR 112: Unexpected closing parenthesis, ')', found.\n");
shelp("BSKYABS GRO(", "Offset END: Unmatched open parenthesis, (, found.\n"); shelp("BSKYABS GRO(", "Offset END: ERROR 121: Parentheses are supposed to occur in pairs, but the input had an unmatched parenthesis, '('.\n");
shelp("((NESTAGE))", "Offset 1: Found an illegal open parenthesis, (. Nesting of parentheses is not allowed.\nOffset 10: Unexpected closing parenthesis, ), found.\n"); shelp("((NESTAGE))", "Offset 1: ERROR 111: Found an illegal open parenthesis, '('. Nesting of parentheses is not allowed.\nOffset 10: ERROR 112: Unexpected closing parenthesis, ')', found.\n");
shelp("(BA)(PA)NYA(CA)", ""); shelp("(BA)(PA)NYA(CA)", "");
shelp("NYAx", ""); shelp("NYAx", "");
shelp("NYA x", ""); shelp("NYA x",
shelp("[# A PARTIAL COM", "Offset END: Unmatched open bracket found. A comment does not terminate.\n"); "Offset 4: ERROR 132: The ACIP {x} must be glued to the end of a tsheg bar, but this one was not.\n");
shelp("[* BSKYABS ", "Offset END: Unmatched open bracket found. A correction does not terminate.\n"); shelp("[# A PARTIAL COM", "Offset END: ERROR 118: Unmatched open bracket found. A comment does not terminate.\n");
shelp("[* BSKYABS ", "Offset END: ERROR 119: Unmatched open bracket found. A correction does not terminate.\n");
shelp("SKYABS [*BSKYABS?] GRO [?]", ""); shelp("SKYABS [*BSKYABS?] GRO [?]", "");
shelp(" SKYABS GRO ", ""); shelp(" SKYABS GRO ", "");
shelp("SKYABS [*BSKYABS] GRO [?]", "", "[TIBETAN_NON_PUNCTUATION:{SKYABS}, TIBETAN_PUNCTUATION:{ }, CORRECTION_START:{[*}, TIBETAN_NON_PUNCTUATION:{BSKYABS}, PROBABLE_CORRECTION:{]}, TIBETAN_PUNCTUATION:{ }, TIBETAN_NON_PUNCTUATION:{GRO}, TIBETAN_PUNCTUATION:{ }, QUESTION:{[?]}]"); shelp("SKYABS [*BSKYABS] GRO [?]", "", "[TIBETAN_NON_PUNCTUATION:{SKYABS}, TIBETAN_PUNCTUATION:{ }, CORRECTION_START:{[*}, TIBETAN_NON_PUNCTUATION:{BSKYABS}, PROBABLE_CORRECTION:{]}, TIBETAN_PUNCTUATION:{ }, TIBETAN_NON_PUNCTUATION:{GRO}, TIBETAN_PUNCTUATION:{ }, QUESTION:{[?]}]");
@ -7301,8 +7307,8 @@ tstHelper("ZUR");
shelp("[* RVA ]", "", "[CORRECTION_START:{[*}, TIBETAN_PUNCTUATION:{ }, TIBETAN_NON_PUNCTUATION:{RVA}, TIBETAN_PUNCTUATION:{ }, PROBABLE_CORRECTION:{]}]"); shelp("[* RVA ]", "", "[CORRECTION_START:{[*}, TIBETAN_PUNCTUATION:{ }, TIBETAN_NON_PUNCTUATION:{RVA}, TIBETAN_PUNCTUATION:{ }, PROBABLE_CORRECTION:{]}]");
shelp("[*RVA ?]", "", "[CORRECTION_START:{[*}, TIBETAN_NON_PUNCTUATION:{RVA}, TIBETAN_PUNCTUATION:{ }, POSSIBLE_CORRECTION:{?]}]"); shelp("[*RVA ?]", "", "[CORRECTION_START:{[*}, TIBETAN_NON_PUNCTUATION:{RVA}, TIBETAN_PUNCTUATION:{ }, POSSIBLE_CORRECTION:{?]}]");
shelp("[*RVA? ]", shelp("[*RVA? ]",
"Offset 5: The ACIP {?}, found alone, may intend U+0F08, but it may intend a question mark, i.e. '?', in the output. It may even mean that the original text could not be deciphered with certainty, like the ACIP {[?]} does.\n", "Offset 5: ERROR 113: The ACIP {?}, found alone, may intend U+0F08, but it may intend a question mark, i.e. '?', in the output. It may even mean that the original text could not be deciphered with certainty, like the ACIP {[?]} does.\n",
"[CORRECTION_START:{[*}, TIBETAN_NON_PUNCTUATION:{RVA}, ERROR:{The ACIP {?}, found alone, may intend U+0F08, but it may intend a question mark, i.e. '?', in the output. It may even mean that the original text could not be deciphered with certainty, like the ACIP {[?]} does.}, TIBETAN_PUNCTUATION:{ }, PROBABLE_CORRECTION:{]}]"); "[CORRECTION_START:{[*}, TIBETAN_NON_PUNCTUATION:{RVA}, ERROR:{113: The ACIP {?}, found alone, may intend U+0F08, but it may intend a question mark, i.e. '?', in the output. It may even mean that the original text could not be deciphered with certainty, like the ACIP {[?]} does.}, TIBETAN_PUNCTUATION:{ }, PROBABLE_CORRECTION:{]}]");
shelp("[*LINE BREAK]", "", "[CORRECTION_START:{[*}, LATIN:{LINE BREAK}, PROBABLE_CORRECTION:{]}]"); shelp("[*LINE BREAK]", "", "[CORRECTION_START:{[*}, LATIN:{LINE BREAK}, PROBABLE_CORRECTION:{]}]");
shelp("[*LINE BREAK?]", "", "[CORRECTION_START:{[*}, LATIN:{LINE BREAK}, POSSIBLE_CORRECTION:{?]}]"); shelp("[*LINE BREAK?]", "", "[CORRECTION_START:{[*}, LATIN:{LINE BREAK}, POSSIBLE_CORRECTION:{?]}]");
shelp("[*\n\t\r LINEYO ?]", "", "[CORRECTION_START:{[*}, LATIN:{\n\t\r LINEYO }, POSSIBLE_CORRECTION:{?]}]"); shelp("[*\n\t\r LINEYO ?]", "", "[CORRECTION_START:{[*}, LATIN:{\n\t\r LINEYO }, POSSIBLE_CORRECTION:{?]}]");
@ -7310,23 +7316,23 @@ tstHelper("ZUR");
shelp("[*DATA INCOMPLETE HERE?]", "", "[CORRECTION_START:{[*}, LATIN:{DATA INCOMPLETE HERE}, POSSIBLE_CORRECTION:{?]}]"); shelp("[*DATA INCOMPLETE HERE?]", "", "[CORRECTION_START:{[*}, LATIN:{DATA INCOMPLETE HERE}, POSSIBLE_CORRECTION:{?]}]");
shelp("[*THIS\r\nWAS SUPPOSED TO BE THE SIXTH CATEGORY; THE CATEGORIES MENTIONED\r\nABOVE SEEM TO BE OUT OF ORDER THROUGH THIS SECTION]\r\n", ""); shelp("[*THIS\r\nWAS SUPPOSED TO BE THE SIXTH CATEGORY; THE CATEGORIES MENTIONED\r\nABOVE SEEM TO BE OUT OF ORDER THROUGH THIS SECTION]\r\n", "");
shelp("x o % : m", ""); shelp("x o % : m", "Offset 0: ERROR 132: The ACIP {x} must be glued to the end of a tsheg bar, but this one was not.\nOffset 2: ERROR 132: The ACIP {o} must be glued to the end of a tsheg bar, but this one was not.\nOffset 4: ERROR 132: The ACIP {%} must be glued to the end of a tsheg bar, but this one was not.\n");
shelp("AAx AAo AA% AA: AAm", ""); shelp("AAx AAo AA% AA: AAm", "");
shelp("/NYA ", "Offset END: Slashes are supposed to occur in pairs, but the input had an unmatched '/' character.\n"); shelp("/NYA ", "Offset END: ERROR 120: Slashes are supposed to occur in pairs, but the input had an unmatched '/' character.\n");
shelp("(NYA ", "Offset END: Unmatched open parenthesis, (, found.\n"); shelp("(NYA ", "Offset END: ERROR 121: Parentheses are supposed to occur in pairs, but the input had an unmatched parenthesis, '('.\n");
shelp("[*NYA ", "Offset END: Unmatched open bracket found. A correction does not terminate.\n"); shelp("[*NYA ", "Offset END: ERROR 119: Unmatched open bracket found. A correction does not terminate.\n");
shelp("[?]", "", "[QUESTION:{[?]}]"); shelp("[?]", "", "[QUESTION:{[?]}]");
shelp("?", shelp("?",
"Offset 0: The ACIP {?}, found alone, may intend U+0F08, but it may intend a question mark, i.e. '?', in the output. It may even mean that the original text could not be deciphered with certainty, like the ACIP {[?]} does.\n", "Offset 0: ERROR 113: The ACIP {?}, found alone, may intend U+0F08, but it may intend a question mark, i.e. '?', in the output. It may even mean that the original text could not be deciphered with certainty, like the ACIP {[?]} does.\n",
"[ERROR:{The ACIP {?}, found alone, may intend U+0F08, but it may intend a question mark, i.e. '?', in the output. It may even mean that the original text could not be deciphered with certainty, like the ACIP {[?]} does.}]"); "[ERROR:{113: The ACIP {?}, found alone, may intend U+0F08, but it may intend a question mark, i.e. '?', in the output. It may even mean that the original text could not be deciphered with certainty, like the ACIP {[?]} does.}]");
shelp("KHAN~ BAR ", "Offset 4: Found an illegal character, ~, with ordinal 126.\n"); shelp("KHAN~ BAR ", "Offset 4: ERROR 116: Found an illegal character, '~', with ordinal (in decimal) 126.\n");
shelp("[* Correction with []]", shelp("[* Correction with []]",
"Offset 5: Found an illegal character, r, with ordinal 114.\nOffset 6: Found an illegal character, r, with ordinal 114.\nOffset 7: Found an illegal character, e, with ordinal 101.\nOffset 8: Found an illegal character, c, with ordinal 99.\nOffset 14: Found an illegal character, w, with ordinal 119.\nOffset 19: Found an illegal open bracket (in context, this is []]). Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open bracket?\nOffset 21: Found a closing bracket without a matching open bracket. Perhaps a [#COMMENT] incorrectly written as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], caused this.\n"); "Offset 5: ERROR 116: Found an illegal character, 'r', with ordinal (in decimal) 114.\nOffset 6: ERROR 116: Found an illegal character, 'r', with ordinal (in decimal) 114.\nOffset 7: ERROR 116: Found an illegal character, 'e', with ordinal (in decimal) 101.\nOffset 8: ERROR 116: Found an illegal character, 'c', with ordinal (in decimal) 99.\nOffset 14: ERROR 116: Found an illegal character, 'w', with ordinal (in decimal) 119.\nOffset 19: ERROR 106: Found an illegal open bracket (in context, this is '[]]'). Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open bracket?\nOffset 21: ERROR 104: Found a closing bracket, ']', without a matching open bracket. Perhaps a [#COMMENT] incorrectly written as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], caused this.\n");
shelp(",NGES ? PA", shelp(",NGES ? PA",
"Offset 6: The ACIP {?}, found alone, may intend U+0F08, but it may intend a question mark, i.e. '?', in the output. It may even mean that the original text could not be deciphered with certainty, like the ACIP {[?]} does.\n", "Offset 6: ERROR 113: The ACIP {?}, found alone, may intend U+0F08, but it may intend a question mark, i.e. '?', in the output. It may even mean that the original text could not be deciphered with certainty, like the ACIP {[?]} does.\n",
"[TIBETAN_PUNCTUATION:{,}, TIBETAN_NON_PUNCTUATION:{NGES}, TIBETAN_PUNCTUATION:{ }, ERROR:{The ACIP {?}, found alone, may intend U+0F08, but it may intend a question mark, i.e. '?', in the output. It may even mean that the original text could not be deciphered with certainty, like the ACIP {[?]} does.}, TIBETAN_PUNCTUATION:{ }, TIBETAN_NON_PUNCTUATION:{PA}]"); "[TIBETAN_PUNCTUATION:{,}, TIBETAN_NON_PUNCTUATION:{NGES}, TIBETAN_PUNCTUATION:{ }, ERROR:{113: The ACIP {?}, found alone, may intend U+0F08, but it may intend a question mark, i.e. '?', in the output. It may even mean that the original text could not be deciphered with certainty, like the ACIP {[?]} does.}, TIBETAN_PUNCTUATION:{ }, TIBETAN_NON_PUNCTUATION:{PA}]");
@ -7336,16 +7342,16 @@ tstHelper("ZUR");
uhelp(" 1\\ ", "\u0f0b\u0f21\u0f84\u0f0b"); uhelp(" 1\\ ", "\u0f0b\u0f21\u0f84\u0f0b");
} }
shelp("K\\,", shelp("K\\,",
"Offset 1: Found a backslash, \\, which the ACIP Tibetan Input Code standard says represents a Sanskrit virama. In practice, though, this is so often misused (to represent U+0F3D) that {\\} always generates this error. If you want a Sanskrit virama, change the input document to use {\\u0F84} instead of {\\}. If you want U+0F3D, use {/NYA/} or {/NYA\\u0F3D}.\n", "Offset 1: ERROR 115: Found a backslash, \\, which the ACIP Tibetan Input Code standard says represents a Sanskrit virama. In practice, though, this is so often misused (to represent U+0F3D) that {\\} always generates this error. If you want a Sanskrit virama, change the input document to use {\\u0F84} instead of {\\}. If you want U+0F3D, use {/NYA/} or {/NYA\\u0F3D}.\n",
"[TIBETAN_NON_PUNCTUATION:{K}, ERROR:{Found a backslash, \\, which the ACIP Tibetan Input Code standard says represents a Sanskrit virama. In practice, though, this is so often misused (to represent U+0F3D) that {\\} always generates this error. If you want a Sanskrit virama, change the input document to use {\\u0F84} instead of {\\}. If you want U+0F3D, use {/NYA/} or {/NYA\\u0F3D}.}, TIBETAN_PUNCTUATION:{,}]"); "[TIBETAN_NON_PUNCTUATION:{K}, ERROR:{115: Found a backslash, \\, which the ACIP Tibetan Input Code standard says represents a Sanskrit virama. In practice, though, this is so often misused (to represent U+0F3D) that {\\} always generates this error. If you want a Sanskrit virama, change the input document to use {\\u0F84} instead of {\\}. If you want U+0F3D, use {/NYA/} or {/NYA\\u0F3D}.}, TIBETAN_PUNCTUATION:{,}]");
shelp("MTHAR%", "", "[TIBETAN_NON_PUNCTUATION:{MTHAR}, TSHEG_BAR_ADORNMENT:{%}, WARNING:{The ACIP {%} is treated by this converter as U+0F35, but sometimes might represent U+0F14 in practice. To avoid seeing this warning again, change the input to use {\\u0F35} instead of {%}.}]"); shelp("MTHAR%", "", "[TIBETAN_NON_PUNCTUATION:{MTHAR}, TSHEG_BAR_ADORNMENT:{%}, WARNING:{504: The ACIP {%} is treated by this converter as U+0F35, but sometimes might represent U+0F14 in practice. To avoid seeing this warning again, change the input to use {\\u0F35} instead of {%}.}]");
shelp("MTHARo", "", "[TIBETAN_NON_PUNCTUATION:{MTHAR}, TSHEG_BAR_ADORNMENT:{o}]"); shelp("MTHARo", "", "[TIBETAN_NON_PUNCTUATION:{MTHAR}, TSHEG_BAR_ADORNMENT:{o}]");
shelp("MTHARx", "", "[TIBETAN_NON_PUNCTUATION:{MTHAR}, TSHEG_BAR_ADORNMENT:{x}]"); shelp("MTHARx", "", "[TIBETAN_NON_PUNCTUATION:{MTHAR}, TSHEG_BAR_ADORNMENT:{x}]");
shelp("MTHAR\n%", "", "[TIBETAN_NON_PUNCTUATION:{MTHAR}, TIBETAN_PUNCTUATION:{ }, ERROR:{The ACIP % must be glued to the end of a tsheg bar, but this one was not}, WARNING:{The ACIP {%} is treated by this converter as U+0F35, but sometimes might represent U+0F14 in practice. To avoid seeing this warning again, change the input to use {\\u0F35} instead of {%}.}]"); shelp("MTHAR\n%", "Offset 6 or maybe 5: ERROR 132: The ACIP {%} must be glued to the end of a tsheg bar, but this one was not.\n", "[TIBETAN_NON_PUNCTUATION:{MTHAR}, TIBETAN_PUNCTUATION:{ }, ERROR:{132: The ACIP {%} must be glued to the end of a tsheg bar, but this one was not.}, WARNING:{504: The ACIP {%} is treated by this converter as U+0F35, but sometimes might represent U+0F14 in practice. To avoid seeing this warning again, change the input to use {\\u0F35} instead of {%}.}]");
shelp("MTHAR x", "", "[TIBETAN_NON_PUNCTUATION:{MTHAR}, TIBETAN_PUNCTUATION:{ }, ERROR:{The ACIP x must be glued to the end of a tsheg bar, but this one was not}]"); shelp("MTHAR x", "Offset 6: ERROR 132: The ACIP {x} must be glued to the end of a tsheg bar, but this one was not.\n", "[TIBETAN_NON_PUNCTUATION:{MTHAR}, TIBETAN_PUNCTUATION:{ }, ERROR:{132: The ACIP {x} must be glued to the end of a tsheg bar, but this one was not.}]");
shelp("PHYIR;", "", "[TIBETAN_NON_PUNCTUATION:{PHYIR}, TIBETAN_PUNCTUATION:{;}]"); shelp("PHYIR;", "", "[TIBETAN_NON_PUNCTUATION:{PHYIR}, TIBETAN_PUNCTUATION:{;}]");
shelp("......,DAM ", shelp("......,DAM ",
@ -7382,15 +7388,15 @@ tstHelper("ZUR");
shelp("@01A.3 ", "", "[FOLIO_MARKER:{@01A.3}, TIBETAN_PUNCTUATION:{ }]"); shelp("@01A.3 ", "", "[FOLIO_MARKER:{@01A.3}, TIBETAN_PUNCTUATION:{ }]");
shelp("@001 ", "", "[FOLIO_MARKER:{@001}, TIBETAN_PUNCTUATION:{ }]"); shelp("@001 ", "", "[FOLIO_MARKER:{@001}, TIBETAN_PUNCTUATION:{ }]");
shelp("@19-20A", shelp("@19-20A",
"Offset 0: Found an illegal at sign, @ (in context, this is @19-20A). @012B is an example of a legal folio marker.\n", "Offset 0: ERROR 109: Found an illegal at sign, @ (in context, this is @19-20A). @012B is an example of a legal folio marker.\n",
"[ERROR:{Found an illegal at sign, @ (in context, this is @19-20A). @012B is an example of a legal folio marker.}, TIBETAN_NON_PUNCTUATION:{19-20A}]"); // FIXME: yes it occurs in the kangyur. "[ERROR:{109: Found an illegal at sign, @ (in context, this is @19-20A). @012B is an example of a legal folio marker.}, TIBETAN_NON_PUNCTUATION:{19-20A}]"); // FIXME: yes it occurs in the kangyur.
shelp("@[7B]", ""); shelp("@[7B]", "");
shelp("@012A.3KA", shelp("@012A.3KA",
"", "",
"[FOLIO_MARKER:{@012A.3}, TIBETAN_NON_PUNCTUATION:{KA}]"); "[FOLIO_MARKER:{@012A.3}, TIBETAN_NON_PUNCTUATION:{KA}]");
shelp("@012A.34", shelp("@012A.34",
"Offset 0: Found an illegal at sign, @ (in context, this is @012A.34). This folio marker has a period, '.', at the end of it, which is illegal.\n", "Offset 0: ERROR 107: Found an illegal at sign, @ (in context, this is @012A.34). This folio marker has a period, '.', at the end of it, which is illegal.\n",
"[ERROR:{Found an illegal at sign, @ (in context, this is @012A.34). This folio marker has a period, '.', at the end of it, which is illegal.}, TIBETAN_NON_PUNCTUATION:{34}]"); "[ERROR:{107: Found an illegal at sign, @ (in context, this is @012A.34). This folio marker has a period, '.', at the end of it, which is illegal.}, TIBETAN_NON_PUNCTUATION:{34}]");
shelp("@[07B]", ""); shelp("@[07B]", "");
shelp("@[00007B]", ""); shelp("@[00007B]", "");
shelp("@7B", ""); shelp("@7B", "");
@ -7407,11 +7413,11 @@ tstHelper("ZUR");
shelp("{ BP }", "", "[BP:{{ BP }}]"); // TD3790E2.ACT shelp("{ BP }", "", "[BP:{{ BP }}]"); // TD3790E2.ACT
// LOW-PRIORITY FIXME: support nested comments. // LOW-PRIORITY FIXME: support nested comments.
shelp("[# This is a [# nested comment] don't you know?]KA KHA GA NGA", shelp("[# This is a [# nested comment] don't you know?]KA KHA GA NGA",
"Offset 13: Found an open bracket within a [#COMMENT]-style comment. Brackets may not appear in comments.\nOffset 38: Found an illegal character, y, with ordinal 121.\nOffset 40: Found an illegal character, u, with ordinal 117.\nOffset 42: Found an illegal character, k, with ordinal 107.\nOffset 45: Found an illegal character, w, with ordinal 119.\nOffset 46: The ACIP {?}, found alone, may intend U+0F08, but it may intend a question mark, i.e. '?', in the output. It may even mean that the original text could not be deciphered with certainty, like the ACIP {[?]} does.\nOffset 47: Found a truly unmatched close bracket, ']'.\nOffset 47: Found a closing bracket without a matching open bracket. Perhaps a [#COMMENT] incorrectly written as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], caused this.\n", "Offset 13: ERROR 102: Found an open bracket, '[', within a [#COMMENT]-style comment. Brackets may not appear in comments.\nOffset 38: ERROR 116: Found an illegal character, 'y', with ordinal (in decimal) 121.\nOffset 39: ERROR 132: The ACIP {o} must be glued to the end of a tsheg bar, but this one was not.\nOffset 40: ERROR 116: Found an illegal character, 'u', with ordinal (in decimal) 117.\nOffset 42: ERROR 116: Found an illegal character, 'k', with ordinal (in decimal) 107.\nOffset 45: ERROR 116: Found an illegal character, 'w', with ordinal (in decimal) 119.\nOffset 46: ERROR 113: The ACIP {?}, found alone, may intend U+0F08, but it may intend a question mark, i.e. '?', in the output. It may even mean that the original text could not be deciphered with certainty, like the ACIP {[?]} does.\nOffset 47: ERROR 103: Found a truly unmatched close bracket, ']'.\nOffset 47: ERROR 104: Found a closing bracket, ']', without a matching open bracket. Perhaps a [#COMMENT] incorrectly written as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], caused this.\n",
"[ERROR:{Found an open bracket within a [#COMMENT]-style comment. Brackets may not appear in comments.}, COMMENT:{[# This is a [# nested comment]}, TIBETAN_PUNCTUATION:{ }, TIBETAN_NON_PUNCTUATION:{d}, TSHEG_BAR_ADORNMENT:{o}, TIBETAN_NON_PUNCTUATION:{n't}, TIBETAN_PUNCTUATION:{ }, ERROR:{Found an illegal character, y, with ordinal 121.}, ERROR:{The ACIP o must be glued to the end of a tsheg bar, but this one was not}, ERROR:{Found an illegal character, u, with ordinal 117.}, TIBETAN_PUNCTUATION:{ }, ERROR:{Found an illegal character, k, with ordinal 107.}, TIBETAN_NON_PUNCTUATION:{n}, TSHEG_BAR_ADORNMENT:{o}, ERROR:{Found an illegal character, w, with ordinal 119.}, ERROR:{The ACIP {?}, found alone, may intend U+0F08, but it may intend a question mark, i.e. '?', in the output. It may even mean that the original text could not be deciphered with certainty, like the ACIP {[?]} does.}, ERROR:{Found a truly unmatched close bracket, ']'.}, ERROR:{Found a closing bracket without a matching open bracket. Perhaps a [#COMMENT] incorrectly written as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], caused this.}, TIBETAN_NON_PUNCTUATION:{KA}, TIBETAN_PUNCTUATION:{ }, TIBETAN_NON_PUNCTUATION:{KHA}, TIBETAN_PUNCTUATION:{ }, TIBETAN_NON_PUNCTUATION:{GA}, TIBETAN_PUNCTUATION:{ }, TIBETAN_NON_PUNCTUATION:{NGA}]"); "[ERROR:{102: Found an open bracket, '[', within a [#COMMENT]-style comment. Brackets may not appear in comments.}, COMMENT:{[# This is a [# nested comment]}, TIBETAN_PUNCTUATION:{ }, TIBETAN_NON_PUNCTUATION:{d}, TSHEG_BAR_ADORNMENT:{o}, TIBETAN_NON_PUNCTUATION:{n't}, TIBETAN_PUNCTUATION:{ }, ERROR:{116: Found an illegal character, 'y', with ordinal (in decimal) 121.}, ERROR:{132: The ACIP {o} must be glued to the end of a tsheg bar, but this one was not.}, ERROR:{116: Found an illegal character, 'u', with ordinal (in decimal) 117.}, TIBETAN_PUNCTUATION:{ }, ERROR:{116: Found an illegal character, 'k', with ordinal (in decimal) 107.}, TIBETAN_NON_PUNCTUATION:{n}, TSHEG_BAR_ADORNMENT:{o}, ERROR:{116: Found an illegal character, 'w', with ordinal (in decimal) 119.}, ERROR:{113: The ACIP {?}, found alone, may intend U+0F08, but it may intend a question mark, i.e. '?', in the output. It may even mean that the original text could not be deciphered with certainty, like the ACIP {[?]} does.}, ERROR:{103: Found a truly unmatched close bracket, ']'.}, ERROR:{104: Found a closing bracket, ']', without a matching open bracket. Perhaps a [#COMMENT] incorrectly written as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], caused this.}, TIBETAN_NON_PUNCTUATION:{KA}, TIBETAN_PUNCTUATION:{ }, TIBETAN_NON_PUNCTUATION:{KHA}, TIBETAN_PUNCTUATION:{ }, TIBETAN_NON_PUNCTUATION:{GA}, TIBETAN_PUNCTUATION:{ }, TIBETAN_NON_PUNCTUATION:{NGA}]");
shelp("//NYA\\\\", shelp("//NYA\\\\",
"Offset 1: Found //, which could be legal (the Unicode would be \\u0F3C\\u0F3D), but is likely in an illegal construct like //NYA\\\\.\nOffset 5: Found a backslash, \\, which the ACIP Tibetan Input Code standard says represents a Sanskrit virama. In practice, though, this is so often misused (to represent U+0F3D) that {\\} always generates this error. If you want a Sanskrit virama, change the input document to use {\\u0F84} instead of {\\}. If you want U+0F3D, use {/NYA/} or {/NYA\\u0F3D}.\nOffset 6: Found a backslash, \\, which the ACIP Tibetan Input Code standard says represents a Sanskrit virama. In practice, though, this is so often misused (to represent U+0F3D) that {\\} always generates this error. If you want a Sanskrit virama, change the input document to use {\\u0F84} instead of {\\}. If you want U+0F3D, use {/NYA/} or {/NYA\\u0F3D}.\n", "Offset 1: ERROR 110: Found //, which could be legal (the Unicode would be \\u0F3C\\u0F3D), but is likely in an illegal construct like //NYA\\\\.\nOffset 5: ERROR 115: Found a backslash, \\, which the ACIP Tibetan Input Code standard says represents a Sanskrit virama. In practice, though, this is so often misused (to represent U+0F3D) that {\\} always generates this error. If you want a Sanskrit virama, change the input document to use {\\u0F84} instead of {\\}. If you want U+0F3D, use {/NYA/} or {/NYA\\u0F3D}.\nOffset 6: ERROR 115: Found a backslash, \\, which the ACIP Tibetan Input Code standard says represents a Sanskrit virama. In practice, though, this is so often misused (to represent U+0F3D) that {\\} always generates this error. If you want a Sanskrit virama, change the input document to use {\\u0F84} instead of {\\}. If you want U+0F3D, use {/NYA/} or {/NYA\\u0F3D}.\n",
"[START_SLASH:{/}, ERROR:{Found //, which could be legal (the Unicode would be \\u0F3C\\u0F3D), but is likely in an illegal construct like //NYA\\\\.}, END_SLASH:{/}, TIBETAN_NON_PUNCTUATION:{NYA}, ERROR:{Found a backslash, \\, which the ACIP Tibetan Input Code standard says represents a Sanskrit virama. In practice, though, this is so often misused (to represent U+0F3D) that {\\} always generates this error. If you want a Sanskrit virama, change the input document to use {\\u0F84} instead of {\\}. If you want U+0F3D, use {/NYA/} or {/NYA\\u0F3D}.}, ERROR:{Found a backslash, \\, which the ACIP Tibetan Input Code standard says represents a Sanskrit virama. In practice, though, this is so often misused (to represent U+0F3D) that {\\} always generates this error. If you want a Sanskrit virama, change the input document to use {\\u0F84} instead of {\\}. If you want U+0F3D, use {/NYA/} or {/NYA\\u0F3D}.}]"); "[START_SLASH:{/}, ERROR:{110: Found //, which could be legal (the Unicode would be \\u0F3C\\u0F3D), but is likely in an illegal construct like //NYA\\\\.}, END_SLASH:{/}, TIBETAN_NON_PUNCTUATION:{NYA}, ERROR:{115: Found a backslash, \\, which the ACIP Tibetan Input Code standard says represents a Sanskrit virama. In practice, though, this is so often misused (to represent U+0F3D) that {\\} always generates this error. If you want a Sanskrit virama, change the input document to use {\\u0F84} instead of {\\}. If you want U+0F3D, use {/NYA/} or {/NYA\\u0F3D}.}, ERROR:{115: Found a backslash, \\, which the ACIP Tibetan Input Code standard says represents a Sanskrit virama. In practice, though, this is so often misused (to represent U+0F3D) that {\\} always generates this error. If you want a Sanskrit virama, change the input document to use {\\u0F84} instead of {\\}. If you want U+0F3D, use {/NYA/} or {/NYA\\u0F3D}.}]");
} }
private static void uhelp(String acip) { private static void uhelp(String acip) {
@ -7420,9 +7426,21 @@ tstHelper("ZUR");
private static void uhelp(String acip, String expectedUnicode) { private static void uhelp(String acip, String expectedUnicode) {
uhelp(acip, expectedUnicode, "Most"); uhelp(acip, expectedUnicode, "Most");
} }
private static void uhelp(String acip, String expectedUnicode, String warningLevel) { private static void uhelpShortMessages(String acip,
String expectedUnicode) {
uhelp(acip, expectedUnicode, "Most", true);
}
private static void uhelp(String acip,
String expectedUnicode,
String warningLevel) {
uhelp(acip, expectedUnicode, warningLevel, false);
}
private static void uhelp(String acip, String expectedUnicode,
String warningLevel, boolean shortMessages) {
StringBuffer errors = new StringBuffer(); StringBuffer errors = new StringBuffer();
String unicode = ACIPConverter.convertToUnicodeText(acip, errors, null, true, warningLevel); String unicode = ACIPConverter.convertToUnicodeText(acip, errors, null,
true, warningLevel,
shortMessages);
if (null == unicode) { if (null == unicode) {
if (null != expectedUnicode && "none" != expectedUnicode) { if (null != expectedUnicode && "none" != expectedUnicode) {
System.out.println("No unicode exists for " + acip + " but you expected " + org.thdl.tib.text.tshegbar.UnicodeUtils.unicodeStringToPrettyString(expectedUnicode)); System.out.println("No unicode exists for " + acip + " but you expected " + org.thdl.tib.text.tshegbar.UnicodeUtils.unicodeStringToPrettyString(expectedUnicode));
@ -7460,45 +7478,70 @@ MNA'
M+NA M+NA
*/ */
uhelp("B+NA", "\u0f56\u0fa3"); uhelp("B+NA", "\u0f56\u0fa3");
uhelp("BNA", "[#WARNING CONVERTING ACIP DOCUMENT: Warning: We're going with {B+NA}, but only because our knowledge of prefix rules says that {B}{NA} is not a legal Tibetan tsheg bar (\"syllable\")]\u0f56\u0fa3"); uhelp("BNA", "[#WARNING 501: Using {B+NA} for the ACIP {BNA}, but only because the tool's knowledge of prefix rules (see the documentation) says that {B}{NA} is not a legal Tibetan tsheg bar (\"syllable\")]\u0f56\u0fa3");
uhelp("^GONG SA", "\u0f38\u0f42\u0f7c\u0f44\u0f0b\u0f66"); uhelp("^GONG SA", "\u0f38\u0f42\u0f7c\u0f44\u0f0b\u0f66");
uhelp("^ GONG SA", "\u0f38\u0f42\u0f7c\u0f44\u0f0b\u0f66"); uhelp("^ GONG SA", "\u0f38\u0f42\u0f7c\u0f44\u0f0b\u0f66");
uhelp("^\rGONG SA", "\u0f38\u0f42\u0f7c\u0f44\u0f0b\u0f66"); uhelp("^\rGONG SA", "\u0f38\u0f42\u0f7c\u0f44\u0f0b\u0f66");
uhelp("^\r\nGONG SA", "\u0f38\u0f42\u0f7c\u0f44\u0f0b\u0f66"); uhelp("^\r\nGONG SA", "\u0f38\u0f42\u0f7c\u0f44\u0f0b\u0f66");
uhelp("^\nGONG SA", "\u0f38\u0f42\u0f7c\u0f44\u0f0b\u0f66"); uhelp("^\nGONG SA", "\u0f38\u0f42\u0f7c\u0f44\u0f0b\u0f66");
uhelp("^ GONG SA", "[#ERROR CONVERTING ACIP DOCUMENT: Lexical error: The ACIP {^} must precede a tsheg bar.] \u0f42\u0f7c\u0f44\u0f0b\u0f66"); uhelp("^ GONG SA", "[#ERROR 131: The ACIP caret, {^}, must precede a tsheg bar.] \u0f42\u0f7c\u0f44\u0f0b\u0f66");
uhelp("BGLA", "\u0f56\u0f42\u0fb3"); uhelp("BGLA", "\u0f56\u0f42\u0fb3");
uhelp("BLCAG", "\u0f56\u0f63\u0f95\u0f42"); uhelp("BLCAG", "\u0f56\u0f63\u0f95\u0f42");
uhelp("DBA", "[#WARNING CONVERTING ACIP DOCUMENT: The ACIP DBA has been interpreted as two stacks, not one, but you may wish to confirm that the original text had two stacks as it would be an easy mistake to make to see one stack and forget to input it with '+' characters.]\u0f51\u0f56"); uhelp("DBA", "[#WARNING 508: The ACIP {DBA} has been interpreted as two stacks, not one, but you may wish to confirm that the original text had two stacks as it would be an easy mistake to make to see one stack (because there is such a stack used in Sanskrit transliteration for this particular sequence) and forget to input it with '+' characters.]\u0f51\u0f56");
uhelp("DMAR", "[#WARNING CONVERTING ACIP DOCUMENT: The ACIP DMAR has been interpreted as two stacks, not one, but you may wish to confirm that the original text had two stacks as it would be an easy mistake to make to see one stack and forget to input it with '+' characters.]\u0f51\u0f58\u0f62"); uhelp("DMAR", "[#WARNING 509: The ACIP {DMAR} has an initial sequence that has been interpreted as two stacks, a prefix and a root stack, not one nonnative stack, but you may wish to confirm that the original text had two stacks as it would be an easy mistake to make to see one stack (because there is such a stack used in Sanskrit transliteration for this particular sequence) and forget to input it with '+' characters.]\u0f51\u0f58\u0f62");
uhelp("D+BA", "\u0f51\u0fa6"); uhelp("D+BA", "\u0f51\u0fa6");
uhelp("MNA", "[#WARNING CONVERTING ACIP DOCUMENT: The ACIP MNA has been interpreted as two stacks, not one, but you may wish to confirm that the original text had two stacks as it would be an easy mistake to make to see one stack and forget to input it with '+' characters.]\u0f58\u0f53"); uhelp("MNA", "[#WARNING 508: The ACIP {MNA} has been interpreted as two stacks, not one, but you may wish to confirm that the original text had two stacks as it would be an easy mistake to make to see one stack (because there is such a stack used in Sanskrit transliteration for this particular sequence) and forget to input it with '+' characters.]\u0f58\u0f53");
uhelp("DGRA", "[#WARNING CONVERTING ACIP DOCUMENT: The ACIP DGRA has been interpreted as two stacks, not one, but you may wish to confirm that the original text had two stacks as it would be an easy mistake to make to see one stack and forget to input it with '+' characters.]\u0f51\u0f42\u0fb2"); uhelp("DGRA", "[#WARNING 508: The ACIP {DGRA} has been interpreted as two stacks, not one, but you may wish to confirm that the original text had two stacks as it would be an easy mistake to make to see one stack (because there is such a stack used in Sanskrit transliteration for this particular sequence) and forget to input it with '+' characters.]\u0f51\u0f42\u0fb2");
uhelp("D+GRA", "[#WARNING CONVERTING ACIP DOCUMENT: There is a stack of three or more consonants in D+GRA that uses at least one '+' but does not use a '+' between each consonant.]\u0f51\u0f92\u0fb2"); uhelp("D+GRA", "[#WARNING 506: There is a stack of three or more consonants in {D+GRA} that uses at least one '+' but does not use a '+' between each consonant.]\u0f51\u0f92\u0fb2");
uhelp("DGYA", "[#WARNING CONVERTING ACIP DOCUMENT: The ACIP DGYA has been interpreted as two stacks, not one, but you may wish to confirm that the original text had two stacks as it would be an easy mistake to make to see one stack and forget to input it with '+' characters.]\u0f51\u0f42\u0fb1"); uhelp("D+G+RA", "\u0f51\u0f92\u0fb2");
uhelp("DGYA", "[#WARNING 508: The ACIP {DGYA} has been interpreted as two stacks, not one, but you may wish to confirm that the original text had two stacks as it would be an easy mistake to make to see one stack (because there is such a stack used in Sanskrit transliteration for this particular sequence) and forget to input it with '+' characters.]\u0f51\u0f42\u0fb1");
uhelp("DGYAMS", "[#WARNING 509: The ACIP {DGYAMS} has an initial sequence that has been interpreted as two stacks, a prefix and a root stack, not one nonnative stack, but you may wish to confirm that the original text had two stacks as it would be an easy mistake to make to see one stack (because there is such a stack used in Sanskrit transliteration for this particular sequence) and forget to input it with '+' characters.]\u0f51\u0f42\u0fb1\u0f58\u0f66");
uhelp("DGYAM--S", "[#WARNING 505: There is a useless disambiguator in {DGYAM--S}.]\u0f51\u0f42\u0fb1\u0f58\u0f66"); // FIXME: 509 should be given too.
} }
public void testACIPConversion() { public void testACIPConversion() {
uhelp("RTSNYA", "[#WARNING CONVERTING ACIP DOCUMENT: There is a chance that the ACIP RTSNYA was intended to represent more consonants than we parsed it as representing -- NNYA, e.g., means N+NYA, but you can imagine seeing N+N+YA and typing NNYA for it too.][#WARNING CONVERTING ACIP DOCUMENT: The ACIP {R+TS+NYA} cannot be represented with the TibetanMachine or TibetanMachineWeb fonts because no such glyph exists in these fonts.]\u0f62\u0faa\u0f99"); // FIXME 936998
uhelp("\\u0FFF", "[#ERROR 138: The Unicode escape '\u0fff' with ordinal (in decimal) 4095 is in the Tibetan range of Unicode (i.e., [U+0F00, U+0FFF]), but is a reserved code in that area.]");
uhelp("\\uF020", "\uF020"); /* not in EWTS's domain */
uhelp("[illegal comment, no '#' mark]",
"[#ERROR 106: Found an illegal open bracket (in context, this is '[illegal c...'). Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open bracket\u003f][#ERROR 128: Cannot convert ACIP {i} because i is a \"vowel\" without an associated consonant.][#ERROR 116: Found an illegal character, 'l', with ordinal (in decimal) 108.][#ERROR 116: Found an illegal character, 'l', with ordinal (in decimal) 108.][#ERROR 116: Found an illegal character, 'e', with ordinal (in decimal) 101.][#ERROR 116: Found an illegal character, 'g', with ordinal (in decimal) 103.][#ERROR 116: Found an illegal character, 'a', with ordinal (in decimal) 97.][#ERROR 116: Found an illegal character, 'l', with ordinal (in decimal) 108.]\u0f0b[#ERROR 116: Found an illegal character, 'c', with ordinal (in decimal) 99.][#ERROR 132: The ACIP {o} must be glued to the end of a tsheg bar, but this one was not.][#ERROR 128: Cannot convert ACIP {mm} because Am is a \"vowel\" without an associated consonant.][#ERROR 116: Found an illegal character, 'e', with ordinal (in decimal) 101.]\u0f4e\u0f9a\u0f0d \u0f4e\u0f37\u0f0b\u0f60\u0f04\u0f05\u0f05\u0f60\u0f0b[#ERROR 128: Cannot convert ACIP {m} because Am is a \"vowel\" without an associated consonant.][#ERROR 116: Found an illegal character, 'a', with ordinal (in decimal) 97.][#ERROR 116: Found an illegal character, 'r', with ordinal (in decimal) 114.][#ERROR 116: Found an illegal character, 'k', with ordinal (in decimal) 107.][#ERROR 104: Found a closing bracket, ']', without a matching open bracket. Perhaps a [#COMMENT] incorrectly written as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], caused this.]");
uhelpShortMessages("[illegal comment, no '#' mark]",
"[#ERROR 106: {[illegal c...}][#ERROR 128: {i}][#ERROR 116: {l}][#ERROR 116: {l}][#ERROR 116: {e}][#ERROR 116: {g}][#ERROR 116: {a}][#ERROR 116: {l}]\u0f0b[#ERROR 116: {c}][#ERROR 132: {o}][#ERROR 128: {mm}][#ERROR 116: {e}]\u0f4e\u0f9a\u0f0d \u0f4e\u0f37\u0f0b\u0f60\u0f04\u0f05\u0f05\u0f60\u0f0b[#ERROR 128: {m}][#ERROR 116: {a}][#ERROR 116: {r}][#ERROR 116: {k}][#ERROR 104: ']']");
uhelp("[illegal [nested comment], no '#' marks either]",
"[#ERROR 106: Found an illegal open bracket (in context, this is '[illegal [...'). Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open bracket\u003f][#ERROR 128: Cannot convert ACIP {i} because i is a \"vowel\" without an associated consonant.][#ERROR 116: Found an illegal character, 'l', with ordinal (in decimal) 108.][#ERROR 116: Found an illegal character, 'l', with ordinal (in decimal) 108.][#ERROR 116: Found an illegal character, 'e', with ordinal (in decimal) 101.][#ERROR 116: Found an illegal character, 'g', with ordinal (in decimal) 103.][#ERROR 116: Found an illegal character, 'a', with ordinal (in decimal) 97.][#ERROR 116: Found an illegal character, 'l', with ordinal (in decimal) 108.]\u0f0b[#ERROR 105: Found a truly unmatched open bracket, '[' or '{', prior to this current illegal open bracket, '['.][#ERROR 106: Found an illegal open bracket (in context, this is '[nested co...'). Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open bracket\u003f]\u0f4e[#ERROR 116: Found an illegal character, 'e', with ordinal (in decimal) 101.][#ERROR 129: Cannot convert ACIP {st} because s is not an ACIP consonant.][#ERROR 116: Found an illegal character, 'e', with ordinal (in decimal) 101.]\u0f4c\u0f0b[#ERROR 116: Found an illegal character, 'c', with ordinal (in decimal) 99.][#ERROR 132: The ACIP {o} must be glued to the end of a tsheg bar, but this one was not.][#ERROR 128: Cannot convert ACIP {mm} because Am is a \"vowel\" without an associated consonant.][#ERROR 116: Found an illegal character, 'e', with ordinal (in decimal) 101.]\u0f4e\u0f9a[#ERROR 104: Found a closing bracket, ']', without a matching open bracket. Perhaps a [#COMMENT] incorrectly written as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], caused this.]\u0f0d \u0f4e\u0f37\u0f0b\u0f60\u0f04\u0f05\u0f05\u0f60\u0f0b[#ERROR 128: Cannot convert ACIP {m} because Am is a \"vowel\" without an associated consonant.][#ERROR 116: Found an illegal character, 'a', with ordinal (in decimal) 97.][#ERROR 116: Found an illegal character, 'r', with ordinal (in decimal) 114.][#ERROR 116: Found an illegal character, 'k', with ordinal (in decimal) 107.][#ERROR 129: Cannot convert ACIP {s} because s is not an ACIP consonant.]\u0f0b[#ERROR 116: Found an illegal character, 'e', with ordinal (in decimal) 101.][#ERROR 128: Cannot convert ACIP {ith} because i is a \"vowel\" without an associated consonant.][#ERROR 116: Found an illegal character, 'e', with ordinal (in decimal) 101.][#ERROR 116: Found an illegal character, 'r', with ordinal (in decimal) 114.][#ERROR 103: Found a truly unmatched close bracket, ']'.][#ERROR 104: Found a closing bracket, ']', without a matching open bracket. Perhaps a [#COMMENT] incorrectly written as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], caused this.]");
uhelpShortMessages("[illegal [nested comment], no '#' marks either]",
"[#ERROR 106: {[illegal [...}][#ERROR 128: {i}][#ERROR 116: {l}][#ERROR 116: {l}][#ERROR 116: {e}][#ERROR 116: {g}][#ERROR 116: {a}][#ERROR 116: {l}]\u0f0b[#ERROR 105: '['][#ERROR 106: {[nested co...}]\u0f4e[#ERROR 116: {e}][#ERROR 129: {st}][#ERROR 116: {e}]\u0f4c\u0f0b[#ERROR 116: {c}][#ERROR 132: {o}][#ERROR 128: {mm}][#ERROR 116: {e}]\u0f4e\u0f9a[#ERROR 104: ']']\u0f0d \u0f4e\u0f37\u0f0b\u0f60\u0f04\u0f05\u0f05\u0f60\u0f0b[#ERROR 128: {m}][#ERROR 116: {a}][#ERROR 116: {r}][#ERROR 116: {k}][#ERROR 129: {s}]\u0f0b[#ERROR 116: {e}][#ERROR 128: {ith}][#ERROR 116: {e}][#ERROR 116: {r}][#ERROR 103: ']'][#ERROR 104: ']']");
uhelp("VA", "[#ERROR 124: Cannot convert ACIP {VA} because {V}, wa-zur, appears without being subscribed to a consonant.]");
uhelp("A", "[#ERROR 125: Cannot convert ACIP {A} because we would be required to assume that {A} is a consonant, when it is not clear if it is a consonant or a vowel.]");
uhelp("A-DZU", "[#ERROR 134: The tsheg bar (\"syllable\") {A-DZU} has no legal parses.]");
uhelp("[# a [# nested comment]]",
"[#ERROR 102: Found an open bracket, '[', within a [#COMMENT]-style comment. Brackets may not appear in comments.][# a [# nested comment][#ERROR 103: Found a truly unmatched close bracket, ']'.][#ERROR 104: Found a closing bracket, ']', without a matching open bracket. Perhaps a [#COMMENT] incorrectly written as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], caused this.]");
uhelp("RTSNYA", "[#WARNING 507: There is a chance that the ACIP {RTSNYA} was intended to represent more consonants than we parsed it as representing -- NNYA, e.g., means N+NYA, but you can imagine seeing N+N+YA and typing NNYA for it too.][#WARNING 511: The ACIP {R+TS+NYA} cannot be represented with the TibetanMachine or TibetanMachineWeb fonts because no such glyph exists in these fonts.]\u0f62\u0faa\u0f99"); // FIXME 936998
uhelp("KO&HAm,", "\u0F40\u0F7C\u0F85\u0F67\u0F7E\u0F0D"); uhelp("KO&HAm,", "\u0F40\u0F7C\u0F85\u0F67\u0F7E\u0F0D");
uhelp("x", "[#ERROR CONVERTING ACIP DOCUMENT: Lexical error: The ACIP x must be glued to the end of a tsheg bar, but this one was not]"); uhelp("x", "[#ERROR 132: The ACIP {x} must be glued to the end of a tsheg bar, but this one was not.]");
uhelp("o", "[#ERROR CONVERTING ACIP DOCUMENT: Lexical error: The ACIP o must be glued to the end of a tsheg bar, but this one was not]"); uhelp("o", "[#ERROR 132: The ACIP {o} must be glued to the end of a tsheg bar, but this one was not.]");
uhelp("%", "[#ERROR CONVERTING ACIP DOCUMENT: Lexical error: The ACIP % must be glued to the end of a tsheg bar, but this one was not][#WARNING CONVERTING ACIP DOCUMENT: Lexical warning: The ACIP {%} is treated by this converter as U+0F35, but sometimes might represent U+0F14 in practice. To avoid seeing this warning again, change the input to use {\\u0F35} instead of {%}.]"); uhelp("%", "[#ERROR 132: The ACIP {%} must be glued to the end of a tsheg bar, but this one was not.][#WARNING 504: The ACIP {%} is treated by this converter as U+0F35, but sometimes might represent U+0F14 in practice. To avoid seeing this warning again, change the input to use {\\u0F35} instead of {%}.]");
uhelp(":", "[#ERROR CONVERTING ACIP DOCUMENT: The tsheg bar (\"syllable\") : has these errors: Cannot convert ACIP A: because A: is a \"vowel\" without an associated consonant]"); uhelp(":", "[#ERROR 128: Cannot convert ACIP {:} because A: is a \"vowel\" without an associated consonant.]");
uhelp("m", "[#ERROR CONVERTING ACIP DOCUMENT: The tsheg bar (\"syllable\") m has these errors: Cannot convert ACIP Am because Am is a \"vowel\" without an associated consonant]"); uhelp("m", "[#ERROR 128: Cannot convert ACIP {m} because Am is a \"vowel\" without an associated consonant.]");
uhelp("N+YA", "\u0f53\u0fb1"); uhelp("N+YA", "\u0f53\u0fb1");
uhelp("NA+YA", "\u0f53\u0fb1"); // FIXME: warn about the extra A uhelp("NA+YA", "\u0f53\u0fb1"); // FIXME: warn about the extra A
uhelp("NE+YA", "[#ERROR CONVERTING ACIP DOCUMENT: The tsheg bar (\"syllable\") NE+YA has these errors: Cannot convert ACIP NE+-YA because + is not an ACIP consonant]"); uhelp("NE+YA", "[#ERROR 129: Cannot convert ACIP {NE+YA} because + is not an ACIP consonant.]");
uhelp("tRAStA", "[#WARNING CONVERTING ACIP DOCUMENT: The ACIP {t+RA} cannot be represented with the TibetanMachine or TibetanMachineWeb fonts because no such glyph exists in these fonts.]\u0f4a\u0fb2\u0f66\u0f9a"); uhelp("tRAStA", "[#WARNING 511: The ACIP {t+RA} cannot be represented with the TibetanMachine or TibetanMachineWeb fonts because no such glyph exists in these fonts.]\u0f4a\u0fb2\u0f66\u0f9a");
uhelp("DZHDZHA", "[#WARNING CONVERTING ACIP DOCUMENT: There is a chance that the ACIP DZHDZHA was intended to represent more consonants than we parsed it as representing -- NNYA, e.g., means N+NYA, but you can imagine seeing N+N+YA and typing NNYA for it too.]\u0f5b\u0fb7\u0fab\u0fb7"); // tricky because DZHDZA is not in TMW but DZHDZHA is uhelp("DZHDZHA", "[#WARNING 507: There is a chance that the ACIP {DZHDZHA} was intended to represent more consonants than we parsed it as representing -- NNYA, e.g., means N+NYA, but you can imagine seeing N+N+YA and typing NNYA for it too.]\u0f5b\u0fb7\u0fab\u0fb7"); // tricky because DZHDZA is not in TMW but DZHDZHA is
uhelp("DZHDZA", "[#WARNING CONVERTING ACIP DOCUMENT: There is a chance that the ACIP DZHDZA was intended to represent more consonants than we parsed it as representing -- NNYA, e.g., means N+NYA, but you can imagine seeing N+N+YA and typing NNYA for it too.][#WARNING CONVERTING ACIP DOCUMENT: The ACIP {DZH+DZA} cannot be represented with the TibetanMachine or TibetanMachineWeb fonts because no such glyph exists in these fonts.]\u0f5b\u0fb7\u0fab"); uhelp("DZHDZA", "[#WARNING 507: There is a chance that the ACIP {DZHDZA} was intended to represent more consonants than we parsed it as representing -- NNYA, e.g., means N+NYA, but you can imagine seeing N+N+YA and typing NNYA for it too.][#WARNING 511: The ACIP {DZH+DZA} cannot be represented with the TibetanMachine or TibetanMachineWeb fonts because no such glyph exists in these fonts.]\u0f5b\u0fb7\u0fab");
uhelp("P+S+N+YA", "\u0f54\u0fb6\u0fa3\u0fb1"); uhelp("P+S+N+YA", "\u0f54\u0fb6\u0fa3\u0fb1");
uhelp("P+S+NYA", "[#WARNING CONVERTING ACIP DOCUMENT: The ACIP {P+S+NYA} cannot be represented with the TibetanMachine or TibetanMachineWeb fonts because no such glyph exists in these fonts.]\u0f54\u0fb6\u0f99"); uhelp("P+S+NYA", "[#WARNING 511: The ACIP {P+S+NYA} cannot be represented with the TibetanMachine or TibetanMachineWeb fonts because no such glyph exists in these fonts.]\u0f54\u0fb6\u0f99");
uhelp("PSNYA", "[#WARNING CONVERTING ACIP DOCUMENT: There is a chance that the ACIP PSNYA was intended to represent more consonants than we parsed it as representing -- NNYA, e.g., means N+NYA, but you can imagine seeing N+N+YA and typing NNYA for it too.][#WARNING CONVERTING ACIP DOCUMENT: The ACIP {P+S+NYA} cannot be represented with the TibetanMachine or TibetanMachineWeb fonts because no such glyph exists in these fonts.]\u0f54\u0fb6\u0f99"); // Is this P+S+N+YA? No, it's P+S+NYA. But warn! uhelp("PSNYA", "[#WARNING 507: There is a chance that the ACIP {PSNYA} was intended to represent more consonants than we parsed it as representing -- NNYA, e.g., means N+NYA, but you can imagine seeing N+N+YA and typing NNYA for it too.][#WARNING 511: The ACIP {P+S+NYA} cannot be represented with the TibetanMachine or TibetanMachineWeb fonts because no such glyph exists in these fonts.]\u0f54\u0fb6\u0f99"); // Is this P+S+N+YA? No, it's P+S+NYA. But warn!
uhelp("NNYA", "[#WARNING CONVERTING ACIP DOCUMENT: There is a chance that the ACIP NNYA was intended to represent more consonants than we parsed it as representing -- NNYA, e.g., means N+NYA, but you can imagine seeing N+N+YA and typing NNYA for it too.][#WARNING CONVERTING ACIP DOCUMENT: The ACIP {N+NYA} cannot be represented with the TibetanMachine or TibetanMachineWeb fonts because no such glyph exists in these fonts.]\u0f53\u0f99"); uhelp("NNYA", "[#WARNING 507: There is a chance that the ACIP {NNYA} was intended to represent more consonants than we parsed it as representing -- NNYA, e.g., means N+NYA, but you can imagine seeing N+N+YA and typing NNYA for it too.][#WARNING 511: The ACIP {N+NYA} cannot be represented with the TibetanMachine or TibetanMachineWeb fonts because no such glyph exists in these fonts.]\u0f53\u0f99");
uhelp("GHNYA", "[#WARNING CONVERTING ACIP DOCUMENT: There is a chance that the ACIP GHNYA was intended to represent more consonants than we parsed it as representing -- NNYA, e.g., means N+NYA, but you can imagine seeing N+N+YA and typing NNYA for it too.]\u0f42\u0fb7\u0f99"); uhelp("GHNYA", "[#WARNING 507: There is a chance that the ACIP {GHNYA} was intended to represent more consonants than we parsed it as representing -- NNYA, e.g., means N+NYA, but you can imagine seeing N+N+YA and typing NNYA for it too.]\u0f42\u0fb7\u0f99");
// TS+NYA and T+S+N+YA are both legal, so what is TSNYA? // TS+NYA and T+S+N+YA are both legal, so what is TSNYA?
// Private correspondence with Robert Chilton says that it is // Private correspondence with Robert Chilton says that it is
@ -7506,22 +7549,22 @@ M+NA
uhelp("THAG PA", "\u0f50\u0f42\u0f0b\u0f54"); uhelp("THAG PA", "\u0f50\u0f42\u0f0b\u0f54");
uhelp("KA \nKHA\n\nGA", "\u0f40\u0f0b\u0f41\u0f0b\n\n\u0f42"); uhelp("KA \nKHA\n\nGA", "\u0f40\u0f0b\u0f41\u0f0b\n\n\u0f42");
uhelp("KA%\nKHA", "\u0f40\u0f35[#WARNING CONVERTING ACIP DOCUMENT: Lexical warning: The ACIP {%} is treated by this converter as U+0F35, but sometimes might represent U+0F14 in practice. To avoid seeing this warning again, change the input to use {\\u0F35} instead of {%}.]\u0f0b\u0f41"); uhelp("KA%\nKHA", "\u0f40\u0f35[#WARNING 504: The ACIP {%} is treated by this converter as U+0F35, but sometimes might represent U+0F14 in practice. To avoid seeing this warning again, change the input to use {\\u0F35} instead of {%}.]\u0f0b\u0f41");
uhelp("KA%", "\u0f40\u0f35[#WARNING CONVERTING ACIP DOCUMENT: Lexical warning: The ACIP {%} is treated by this converter as U+0F35, but sometimes might represent U+0F14 in practice. To avoid seeing this warning again, change the input to use {\\u0F35} instead of {%}.]"); uhelp("KA%", "\u0f40\u0f35[#WARNING 504: The ACIP {%} is treated by this converter as U+0F35, but sometimes might represent U+0F14 in practice. To avoid seeing this warning again, change the input to use {\\u0F35} instead of {%}.]");
uhelp("KAo", "\u0f40\u0f37"); uhelp("KAo", "\u0f40\u0f37");
uhelp("KAo\n\nKA", "\u0f40\u0f37\u0f0b\n\n\u0f40"); uhelp("KAo\n\nKA", "\u0f40\u0f37\u0f0b\n\n\u0f40");
uhelp("KAo\nKHA", "\u0f40\u0f37\u0f0b\u0f41"); uhelp("KAo\nKHA", "\u0f40\u0f37\u0f0b\u0f41");
uhelp("KAo KHA", "\u0f40\u0f37\u0f0b\u0f41"); uhelp("KAo KHA", "\u0f40\u0f37\u0f0b\u0f41");
uhelp("KA KAo KHA", "\u0f40\u0f0b\u0f40\u0f37\u0f0b\u0f41"); uhelp("KA KAo KHA", "\u0f40\u0f0b\u0f40\u0f37\u0f0b\u0f41");
uhelp("KAx", "\u0f40[#ERROR CONVERTING ACIP DOCUMENT: This converter cannot convert the ACIP {x} to Tibetan because it is unclear what the result should be.]"); uhelp("KAx", "\u0f40[#ERROR 133: Cannot convert the ACIP {x} to Tibetan because it is unclear what the result should be.]");
uhelp("G+DHA", "\u0f42\u0fa1\u0fb7"); uhelp("G+DHA", "\u0f42\u0fa1\u0fb7");
uhelp("P'EE", "\u0f54\u0f71\u0f7b"); uhelp("P'EE", "\u0f54\u0f71\u0f7b");
uhelp("BA ? HA", "\u0f56\u0f0b[#ERROR CONVERTING ACIP DOCUMENT: Lexical error: The ACIP {?}, found alone, may intend U+0F08, but it may intend a question mark, i.e. '?', in the output. It may even mean that the original text could not be deciphered with certainty, like the ACIP {[?]} does.] \u0f67"); uhelp("BA ? HA", "\u0f56\u0f0b[#ERROR 113: The ACIP {?}, found alone, may intend U+0F08, but it may intend a question mark, i.e. '?', in the output. It may even mean that the original text could not be deciphered with certainty, like the ACIP {[?]} does.] \u0f67");
uhelp("KA", "\u0f40"); uhelp("KA", "\u0f40");
uhelp("\\u0F35", "\u0F35"); uhelp("\\u0F35", "\u0F35");
uhelp("\\uF035", "[#ERROR CONVERTING ACIP DOCUMENT: The Unicode escape '\uf035' with ordinal 61493 is in the private-use area (PUA) of Unicode and will thus not be written out into the output lest you think other tools will be able to understand this non-standard construction.]"); uhelp("\\uF035", "[#ERROR 135: The Unicode escape '\uf035' with ordinal (in decimal) 61493 is specified by the Extended Wylie Transliteration Scheme (EWTS), but is in the private-use area (PUA) of Unicode and will thus not be written out into the output lest you think other tools will be able to understand this non-standard construction.]");
uhelp("KI", "\u0f40\u0f72"); uhelp("KI", "\u0f40\u0f72");
uhelp("KO", "\u0f40\u0f7c"); uhelp("KO", "\u0f40\u0f7c");
uhelp("KE", "\u0f40\u0f7a"); uhelp("KE", "\u0f40\u0f7a");
@ -7603,7 +7646,7 @@ M+NA
uhelp("*#HUm: G+DHOO GRO`;.,", uhelp("*#HUm: G+DHOO GRO`;.,",
"\u0f04\u0f05\u0f04\u0f05\u0f05\u0f67\u0f74\u0f7e\u0f7f\u0f0b\u0f42\u0fa1\u0fb7\u0f7d\u0f0b\u0f42\u0fb2\u0f7c\u0f08\u0f11\u0f0c\u0f0d"); "\u0f04\u0f05\u0f04\u0f05\u0f05\u0f67\u0f74\u0f7e\u0f7f\u0f0b\u0f42\u0fa1\u0fb7\u0f7d\u0f0b\u0f42\u0fb2\u0f7c\u0f08\u0f11\u0f0c\u0f0d");
uhelp("*#HUm: K+DHA GRO`;.,", uhelp("*#HUm: K+DHA GRO`;.,",
"\u0f04\u0f05\u0f04\u0f05\u0f05\u0f67\u0f74\u0f7e\u0f7f\u0f0b[#WARNING CONVERTING ACIP DOCUMENT: The ACIP {K+DHA} cannot be represented with the TibetanMachine or TibetanMachineWeb fonts because no such glyph exists in these fonts.]\u0f40\u0fa1\u0fb7\u0f0b\u0f42\u0fb2\u0f7c\u0f08\u0f11\u0f0c\u0f0d"); "\u0f04\u0f05\u0f04\u0f05\u0f05\u0f67\u0f74\u0f7e\u0f7f\u0f0b[#WARNING 511: The ACIP {K+DHA} cannot be represented with the TibetanMachine or TibetanMachineWeb fonts because no such glyph exists in these fonts.]\u0f40\u0fa1\u0fb7\u0f0b\u0f42\u0fb2\u0f7c\u0f08\u0f11\u0f0c\u0f0d");
uhelp("HA,\nHA\n\nHA", "\u0f67\u0f0d \u0f67\u0f0b\n\n\u0f67"); uhelp("HA,\nHA\n\nHA", "\u0f67\u0f0d \u0f67\u0f0b\n\n\u0f67");
uhelp("NGA,", "\u0f44\u0f0c\u0f0d"); uhelp("NGA,", "\u0f44\u0f0c\u0f0d");
uhelp("NGA,\nHA\n\nHA", "\u0f44\u0f0c\u0f0d \u0f67\u0f0b\n\n\u0f67"); uhelp("NGA,\nHA\n\nHA", "\u0f44\u0f0c\u0f0d \u0f67\u0f0b\n\n\u0f67");
@ -7616,8 +7659,8 @@ M+NA
uhelp("GU, ,KHO", "\u0f42\u0f74\u0f0d \u0f0d\u0f41\u0f7c"); uhelp("GU, ,KHO", "\u0f42\u0f74\u0f0d \u0f0d\u0f41\u0f7c");
uhelp("GU ,KHO", "\u0f42\u0f74\u0f0b \u0f0d\u0f41\u0f7c"); // FIXME: missing a shad after GU, warn about that. uhelp("GU ,KHO", "\u0f42\u0f74\u0f0b \u0f0d\u0f41\u0f7c"); // FIXME: missing a shad after GU, warn about that.
uhelp("GA HA", "\u0f42\u0f0b \u0f67"); uhelp("GA HA", "\u0f42\u0f0b \u0f67");
uhelp("BCWA", "[#WARNING CONVERTING ACIP DOCUMENT: The ACIP {B+C+WA} cannot be represented with the TibetanMachine or TibetanMachineWeb fonts because no such glyph exists in these fonts.]\u0f56\u0f95\u0fba"); uhelp("BCWA", "[#WARNING 511: The ACIP {B+C+WA} cannot be represented with the TibetanMachine or TibetanMachineWeb fonts because no such glyph exists in these fonts.]\u0f56\u0f95\u0fba");
uhelp("'KYO", "[#WARNING CONVERTING ACIP DOCUMENT: Warning: We're going with {'+K+YO}, but only because our knowledge of prefix rules says that {'}{K+YO} is not a legal Tibetan tsheg bar (\"syllable\")][#WARNING CONVERTING ACIP DOCUMENT: The ACIP {'+K+YO} cannot be represented with the TibetanMachine or TibetanMachineWeb fonts because no such glyph exists in these fonts.]\u0f60\u0f90\u0fb1\u0f7c"); uhelp("'KYO", "[#WARNING 501: Using {'+K+YO} for the ACIP {'KYO}, but only because the tool's knowledge of prefix rules (see the documentation) says that {'}{K+YO} is not a legal Tibetan tsheg bar (\"syllable\")][#WARNING 511: The ACIP {'+K+YO} cannot be represented with the TibetanMachine or TibetanMachineWeb fonts because no such glyph exists in these fonts.]\u0f60\u0f90\u0fb1\u0f7c");
uhelp("WA", "\u0f5d"); uhelp("WA", "\u0f5d");
uhelp("W", "\u0f5d"); uhelp("W", "\u0f5d");
uhelp("WO", "\u0f5d\u0f7c"); uhelp("WO", "\u0f5d\u0f7c");
@ -7635,21 +7678,21 @@ M+NA
uhelp("WRA", "\u0f5d\u0fb2"); uhelp("WRA", "\u0f5d\u0fb2");
uhelp("W+RA", "\u0f5d\u0fb2"); uhelp("W+RA", "\u0f5d\u0fb2");
uhelp("W+R", "\u0f5d\u0fb2"); uhelp("W+R", "\u0f5d\u0fb2");
uhelp("BCWA", "[#WARNING CONVERTING ACIP DOCUMENT: The ACIP {B+C+WA} cannot be represented with the TibetanMachine or TibetanMachineWeb fonts because no such glyph exists in these fonts.]\u0f56\u0f95\u0fba"); uhelp("BCWA", "[#WARNING 511: The ACIP {B+C+WA} cannot be represented with the TibetanMachine or TibetanMachineWeb fonts because no such glyph exists in these fonts.]\u0f56\u0f95\u0fba");
uhelp("BCW", "[#WARNING CONVERTING ACIP DOCUMENT: The ACIP {B+C+W} cannot be represented with the TibetanMachine or TibetanMachineWeb fonts because no such glyph exists in these fonts.]\u0f56\u0f95\u0fba"); uhelp("BCW", "[#WARNING 511: The ACIP {B+C+W} cannot be represented with the TibetanMachine or TibetanMachineWeb fonts because no such glyph exists in these fonts.]\u0f56\u0f95\u0fba");
uhelp("BCWO", "[#WARNING CONVERTING ACIP DOCUMENT: The ACIP {B+C+WO} cannot be represented with the TibetanMachine or TibetanMachineWeb fonts because no such glyph exists in these fonts.]\u0f56\u0f95\u0fba\u0f7c"); uhelp("BCWO", "[#WARNING 511: The ACIP {B+C+WO} cannot be represented with the TibetanMachine or TibetanMachineWeb fonts because no such glyph exists in these fonts.]\u0f56\u0f95\u0fba\u0f7c");
uhelp("BCVA", "\u0f56\u0f45\u0fad"); uhelp("BCVA", "\u0f56\u0f45\u0fad");
uhelp("BCV", "\u0f56\u0f45\u0fad"); uhelp("BCV", "\u0f56\u0f45\u0fad");
uhelp("BCV'O", "\u0f56\u0f45\u0fad\u0f71\u0f7c"); uhelp("BCV'O", "\u0f56\u0f45\u0fad\u0f71\u0f7c");
uhelp("BCV'A", "\u0f56\u0f45\u0fad\u0f71"); uhelp("BCV'A", "\u0f56\u0f45\u0fad\u0f71");
uhelp("BCV'", "[#WARNING CONVERTING ACIP DOCUMENT: The ACIP {B+C+V+'} cannot be represented with the TibetanMachine or TibetanMachineWeb fonts because no such glyph exists in these fonts.]\u0f56\u0f95\u0fad\u0fb0"); uhelp("BCV'", "[#WARNING 511: The ACIP {B+C+V+'} cannot be represented with the TibetanMachine or TibetanMachineWeb fonts because no such glyph exists in these fonts.]\u0f56\u0f95\u0fad\u0fb0");
uhelp("GYA", "\u0f42\u0fb1"); uhelp("GYA", "\u0f42\u0fb1");
uhelp("GY", "\u0f42\u0fb1"); uhelp("GY", "\u0f42\u0fb1");
uhelp("G-YA", "\u0f42\u0f61"); uhelp("G-YA", "\u0f42\u0f61");
uhelp("GA-YA", "[#WARNING CONVERTING ACIP DOCUMENT: There is a useless disambiguator in GA-YA.]\u0f42\u0f61"); uhelp("GA-YA", "[#WARNING 505: There is a useless disambiguator in {GA-YA}.]\u0f42\u0f61");
uhelp("GA-YO", "[#WARNING CONVERTING ACIP DOCUMENT: There is a useless disambiguator in GA-YO.]\u0f42\u0f61\u0F7c"); uhelp("GA-YO", "[#WARNING 505: There is a useless disambiguator in {GA-YO}.]\u0f42\u0f61\u0F7c");
uhelp("RTZVA", "[#WARNING CONVERTING ACIP DOCUMENT: There is a chance that the ACIP RTZVA was intended to represent more consonants than we parsed it as representing -- NNYA, e.g., means N+NYA, but you can imagine seeing N+N+YA and typing NNYA for it too.]\u0f62\u0fa9\u0fad"); uhelp("RTZVA", "[#WARNING 507: There is a chance that the ACIP {RTZVA} was intended to represent more consonants than we parsed it as representing -- NNYA, e.g., means N+NYA, but you can imagine seeing N+N+YA and typing NNYA for it too.]\u0f62\u0fa9\u0fad");
uhelp("RTZWA", "[#WARNING CONVERTING ACIP DOCUMENT: There is a chance that the ACIP RTZWA was intended to represent more consonants than we parsed it as representing -- NNYA, e.g., means N+NYA, but you can imagine seeing N+N+YA and typing NNYA for it too.][#WARNING CONVERTING ACIP DOCUMENT: The ACIP {R+TZ+WA} cannot be represented with the TibetanMachine or TibetanMachineWeb fonts because no such glyph exists in these fonts.]\u0f62\u0fa9\u0fba"); uhelp("RTZWA", "[#WARNING 507: There is a chance that the ACIP {RTZWA} was intended to represent more consonants than we parsed it as representing -- NNYA, e.g., means N+NYA, but you can imagine seeing N+N+YA and typing NNYA for it too.][#WARNING 511: The ACIP {R+TZ+WA} cannot be represented with the TibetanMachine or TibetanMachineWeb fonts because no such glyph exists in these fonts.]\u0f62\u0fa9\u0fba");
} }
public void testFixedFormSubjoinedConsonants() { public void testFixedFormSubjoinedConsonants() {
// Usual subjoined RA: // Usual subjoined RA:
@ -7675,7 +7718,7 @@ M+NA
+ "\u0f61\u0fbb\u0f7b\u0f0b" // Y+YEE + "\u0f61\u0fbb\u0f7b\u0f0b" // Y+YEE
+ "\u0f4e\u0f9c\u0fbc\u0fb1\u0f0b" // ndRYA + "\u0f4e\u0f9c\u0fbc\u0fb1\u0f0b" // ndRYA
+ "\u0f4e\u0f9c\u0fbc\u0fb1\u0f7b\u0f0b" // n+d+R+YEE + "\u0f4e\u0f9c\u0fbc\u0fb1\u0f7b\u0f0b" // n+d+R+YEE
+ "[#WARNING CONVERTING ACIP DOCUMENT: There is a chance that the ACIP KshR was intended to represent more consonants than we parsed it as representing -- NNYA, e.g., means N+NYA, but you can imagine seeing N+N+YA and typing NNYA for it too.]\u0f40\u0fb5\u0fbc\u0f0b" // KshR + "[#WARNING 507: There is a chance that the ACIP {KshR} was intended to represent more consonants than we parsed it as representing -- NNYA, e.g., means N+NYA, but you can imagine seeing N+N+YA and typing NNYA for it too.]\u0f40\u0fb5\u0fbc\u0f0b" // KshR
+ "\u0f40\u0fb5\u0fbc\u0f7b\u0f0b" // K+sh+REE + "\u0f40\u0fb5\u0fbc\u0f7b\u0f0b" // K+sh+REE
+ "\u0f4e\u0f9c\u0fbb\u0f0b" // ndY + "\u0f4e\u0f9c\u0fbb\u0f0b" // ndY
+ "\u0f4e\u0f9c\u0fbb\u0f7b\u0f0d" // n+d+YEE + "\u0f4e\u0f9c\u0fbb\u0f7b\u0f0d" // n+d+YEE
@ -7702,6 +7745,9 @@ M+NA
a2ahelp("/'A/"); a2ahelp("/'A/");
a2ahelp("/1/"); a2ahelp("/1/");
a2ahelp("/1/"); a2ahelp("/1/");
a2ahelp("#**##*");
a2ahelp("#");
a2ahelp("*");
assertTrue(ACIP2TMW2ACIP("RTSNYA") == null); // R+TS+NYA is thought of, not R+T+S+N+YA -- FIXME 936998 assertTrue(ACIP2TMW2ACIP("RTSNYA") == null); // R+TS+NYA is thought of, not R+T+S+N+YA -- FIXME 936998
a2ahelp("N+DZY", "N+DZ+YA"); // R+TS+NYA is not thought of as R+T+S+N+YA; note the (documented and necessary) inconsistency a2ahelp("N+DZY", "N+DZ+YA"); // R+TS+NYA is not thought of as R+T+S+N+YA; note the (documented and necessary) inconsistency
} }
@ -10171,8 +10217,8 @@ tstHelper("shKA");
// 'BYONGS [BLO,) S0375M.ACT // 'BYONGS [BLO,) S0375M.ACT
/* FIXME: BDEm: is different than BDE for us, is that OK? /* DLC FIXME: BDEm: is different than BDE for us, is that OK?
uhelp("BDEm:", "[#WARNING CONVERTING ACIP DOCUMENT: The ACIP BDEm: has been interpreted as two stacks, not one, but you may wish to confirm that the original text had two stacks as it would be an easy mistake to make to see one stack and forget to input it with '+' characters.]PLACEHOLDER"); uhelp("BDEm:", "[#WARNING The ACIP BDEm: has been interpreted as two stacks, not one, but you may wish to confirm that the original text had two stacks as it would be an easy mistake to make to see one stack and forget to input it with '+' characters.]PLACEHOLDER");
tstHelper("BDA:", "{B}{DA:}", tstHelper("BDA:", "{B}{DA:}",
new String[] { "{B+DA:}", "{B}{DA:}" }, new String[] { "{B+DA:}", "{B}{DA:}" },
new String[] { "{B}{DA:}" }, new String[] { "{B}{DA:}" },

View file

@ -153,92 +153,56 @@ class TPairList {
* Returns an error message, or null if there is no error that * Returns an error message, or null if there is no error that
* you can find without the help of tsheg bar syntax rules. */ * you can find without the help of tsheg bar syntax rules. */
// FIXME: This is needlessly ACIP specific -- rename and change text of messages // FIXME: This is needlessly ACIP specific -- rename and change text of messages
String getACIPError() { String getACIPError(String originalACIP, boolean shortMessages) {
// FIXME: this returns just the first error. List all errors
// at once.
int sz = size(); int sz = size();
if (0 == sz) if (0 == sz) // FIXME: see if you can make this happen...
return "Warning, empty tsheg bar found while converting from ACIP!"; return ErrorsAndWarnings.getMessage(122, shortMessages,
boolean first = true; ((null != originalACIP)
StringBuffer rv = null; ? originalACIP
: ""));
String translit
= (null != originalACIP) ? originalACIP : recoverACIP();
boolean mustBeEntirelyNumeric = get(0).isNumeric(); boolean mustBeEntirelyNumeric = get(0).isNumeric();
for (int i = 0; i < sz; i++) { for (int i = 0; i < sz; i++) {
TPair p = get(i); TPair p = get(i);
if (mustBeEntirelyNumeric != p.isNumeric()) if (mustBeEntirelyNumeric != p.isNumeric())
return "Cannot convert ACIP " + recoverACIP() + " because it contains a number but also a non-number."; return ErrorsAndWarnings.getMessage(123, shortMessages, translit);
if ((i == 0 && "V".equals(p.getLeft())) if ((i == 0 && "V".equals(p.getLeft()))
|| (i > 0 && "V".equals(p.getLeft()) || (i > 0 && "V".equals(p.getLeft())
&& (null != get(i - 1).getRight() && (null != get(i - 1).getRight()
&& !"+".equals(get(i - 1).getRight())))) { && !"+".equals(get(i - 1).getRight())))) {
if (first) { return ErrorsAndWarnings.getMessage(124, shortMessages, translit);
first = false;
rv = new StringBuffer("Cannot convert ACIP ");
rv.append(recoverACIP());
rv.append(" because {V}, wa-zur, appears without being subscribed to a consonant.");
} else {
rv.append("; also, {V}, wa-zur, appears without being subscribed to a consonant");
}
} else if ("A".equals(p.getLeft()) && (null == p.getRight() || "".equals(p.getRight()))) { } else if ("A".equals(p.getLeft()) && (null == p.getRight() || "".equals(p.getRight()))) {
if (first) { return ErrorsAndWarnings.getMessage(125, shortMessages, translit);
first = false;
rv = new StringBuffer("Cannot convert ACIP ");
rv.append(recoverACIP());
rv.append(" because we would be required to assume that {A} is a consonant, when it is not clear if it is a consonant or a vowel.");
} else {
rv.append("; also, we would be required to assume that {A} is a consonant, when it is not clear if it is a consonant or a vowel.");
}
} else if ((null == p.getLeft() && !"-".equals(p.getRight())) } else if ((null == p.getLeft() && !"-".equals(p.getRight()))
|| (null != p.getLeft() || (null != p.getLeft()
&& !ACIPRules.isConsonant(p.getLeft()) && !ACIPRules.isConsonant(p.getLeft())
&& !p.isNumeric())) { && !p.isNumeric())) {
if (first) { // FIXME: stop handling this outside of ErrorsAndWarnings:
first = false; if (null == p.getLeft()) {
rv = new StringBuffer("Cannot convert ACIP "); if (shortMessages)
rv.append(recoverACIP()); return "128: {" + translit + "}";
rv.append(" because "); else
if (null == p.getLeft()) { return "128: Cannot convert ACIP {" + translit + "} because " + p.getRight() + " is a \"vowel\" without an associated consonant.";
rv.append(p.getRight());
rv.append(" is a \"vowel\" without an associated consonant");
} else {
rv.append(p.getLeft());
rv.append(" is not an ACIP consonant");
}
} else { } else {
if (null == p.getLeft()) { if (shortMessages)
rv.append("; also, "); return "129: {" + translit + "}";
rv.append(p.getRight()); else
rv.append(" is an ACIP \"vowel\" without an associated consonant"); return "129: Cannot convert ACIP {" + translit + "} because " + p.getLeft() + " is not an ACIP consonant.";
} else {
rv.append("; also, ");
rv.append(p.getLeft());
rv.append(" is not an ACIP consonant");
}
} }
} }
} }
if ("+".equals(get(sz - 1).getRight())) { if ("+".equals(get(sz - 1).getRight())) {
if (first) { return ErrorsAndWarnings.getMessage(126, shortMessages, translit);
first = false;
rv = new StringBuffer("Cannot convert ACIP ");
rv.append(recoverACIP());
rv.append(" because it ends with a {+}.");
} else {
rv.append("; also, it ends with a {+}.");
}
} }
// FIXME: really this is a warning, not an error: // FIXME: really this is a warning, not an error:
if ("-".equals(get(sz - 1).getRight())) { if ("-".equals(get(sz - 1).getRight())) {
if (first) { return ErrorsAndWarnings.getMessage(127, shortMessages, translit);
first = false;
rv = new StringBuffer("Cannot convert ACIP ");
rv.append(recoverACIP());
rv.append(" because it ends with a {-}.");
} else {
rv.append("; also, it ends with a {-}.");
}
} }
return null;
return (rv == null) ? null : rv.toString();
} }
/** Returns true if and only if either x is an TPairList object /** Returns true if and only if either x is an TPairList object
@ -657,10 +621,14 @@ class TPairList {
} }
/** Appends the DuffCodes that correspond to this grapheme cluster /** Appends the DuffCodes that correspond to this grapheme cluster
* to duffsAndErrors, or appends a String that is an error * to duffsAndErrors, or appends a String that is an error or
* message saying that TMW cannot represent this grapheme * warning message (a short one iff shortMessages is true) saying
* cluster. */ * that TMW cannot represent this grapheme cluster. The message
void getDuff(ArrayList duffsAndErrors) { * is Error 137 if noCorrespondingTMWGlyphIsError is true;
* otherwise, it's Warning 511. */
void getDuff(ArrayList duffsAndErrors,
boolean shortMessages,
boolean noCorrespondingTMWGlyphIsError) {
int previousSize = duffsAndErrors.size(); int previousSize = duffsAndErrors.size();
StringBuffer wylieForConsonant = new StringBuffer(); StringBuffer wylieForConsonant = new StringBuffer();
for (int x = 0; x + 1 < size(); x++) { for (int x = 0; x + 1 < size(); x++) {
@ -716,7 +684,11 @@ class TPairList {
if (!TibetanMachineWeb.isKnownHashKey(hashKey)) { if (!TibetanMachineWeb.isKnownHashKey(hashKey)) {
hashKey = hashKey.replace('+', '-'); hashKey = hashKey.replace('+', '-');
if (!TibetanMachineWeb.isKnownHashKey(hashKey)) { if (!TibetanMachineWeb.isKnownHashKey(hashKey)) {
duffsAndErrors.add("The ACIP {" + recoverACIP() + "} cannot be represented with the TibetanMachine or TibetanMachineWeb fonts because no such glyph exists in these fonts."); duffsAndErrors.add(ErrorsAndWarnings.getMessage(noCorrespondingTMWGlyphIsError
? 137
: 511,
shortMessages,
recoverACIP()));
return; return;
} }
} }

View file

@ -18,6 +18,8 @@ Contributor(s): ______________________________________.
package org.thdl.tib.text.ttt; package org.thdl.tib.text.ttt;
import org.thdl.util.ThdlDebug;
import java.util.ArrayList; import java.util.ArrayList;
/** A list of non-empty list of {@link TStackListList /** A list of non-empty list of {@link TStackListList
@ -266,16 +268,28 @@ class TParseTree {
* warnings about lacking vowels on final stacks, "Some" to see * warnings about lacking vowels on final stacks, "Some" to see
* warnings about lacking vowels on non-final stacks and also * warnings about lacking vowels on non-final stacks and also
* warnings about when prefix rules affect you, "None" if you * warnings about when prefix rules affect you, "None" if you
* like to see IllegalArgumentExceptions. * like to see IllegalArgumentExceptions thrown. (Actually, this
* refers only to the default values -- the level at which any
* particular warning appears is customizable.)
* @param pl the pair list from which this parse tree originated * @param pl the pair list from which this parse tree originated
* @param originalACIP the original ACIP, or null if you want * @param originalACIP the original ACIP, or null if you want
* this parse tree to make a best guess. */ * this parse tree to make a best guess.
* @param shortMessages true iff you want short error and warning
* messages */
public String getWarning(String warningLevel, public String getWarning(String warningLevel,
TPairList pl, TPairList pl,
String originalACIP) { String originalACIP,
if (warningLevel != "Some" boolean shortMessages) {
&& warningLevel != "Most" // ROOM_FOR_IMPROVEMENT: Allow one tsheg bar to have multiple
&& warningLevel != "All") // warnings/errors associated with it. Make this a private
// subroutine, and have the public getWarning(..) call on this
// subroutine again and again until no new error is found. If
// call N yields warning 506, then disable 506 and call again.
// If you get 508, call again, etc. Finally, restore 506
// etc. and return the concatenation of messages 506 and 508.
// {DGYAM--S} should yield both 505 and 509.
if (!ErrorsAndWarnings.warningLevelIsKnown(warningLevel))
throw new IllegalArgumentException("warning level bad: is it interned?"); throw new IllegalArgumentException("warning level bad: is it interned?");
TStackList bestParse = getBestParse(); TStackList bestParse = getBestParse();
@ -283,37 +297,51 @@ class TParseTree {
TStackListList noPrefixTestsUniqueParse = getUniqueParse(true); TStackListList noPrefixTestsUniqueParse = getUniqueParse(true);
if (noPrefixTestsUniqueParse.size() == 1 if (noPrefixTestsUniqueParse.size() == 1
&& !noPrefixTestsUniqueParse.get(0).equals(bestParse)) { && !noPrefixTestsUniqueParse.get(0).equals(bestParse)) {
if (warningLevel != "Some") if (ErrorsAndWarnings.isEnabled(501, warningLevel))
return "Warning: We're going with " + bestParse + ", but only because our knowledge of prefix rules says that " + noPrefixTestsUniqueParse.get(0) + " is not a legal Tibetan tsheg bar (\"syllable\")"; if (shortMessages)
return "501: Using " + bestParse + ", not " + noPrefixTestsUniqueParse.get(0);
else
return "501: Using " + bestParse + ((null != originalACIP) ? (" for the ACIP {" + originalACIP + "}") : "") + ", but only because the tool's knowledge of prefix rules (see the documentation) says that " + noPrefixTestsUniqueParse.get(0) + " is not a legal Tibetan tsheg bar (\"syllable\")";
} }
} }
String translit = (null != originalACIP) ? originalACIP : recoverACIP();
TStackListList up = getUniqueParse(false); TStackListList up = getUniqueParse(false);
if (null == up || up.size() != 1) { if (null == up || up.size() != 1) {
// FIXME: code duplication
boolean isLastStack[] = new boolean[1]; boolean isLastStack[] = new boolean[1];
TStackListList nip = getNonIllegalParses(); TStackListList nip = getNonIllegalParses();
if (nip.size() != 1) { if (nip.size() != 1) {
if (null == bestParse) { if (null == bestParse) {
return "Warning: There's not even a unique, non-illegal parse for ACIP {" + ((null != originalACIP) ? originalACIP : recoverACIP()) + "}"; /* FIXME: Is this case possible? We can get to it
in unit testing (and we do), but is there any
ACIP input file that will cause this? */
// FIXME: IS 101 NOT TREATED AS AN error, BUT
// INSTEAD TREATED AS A warning?
//
// FIXME: The caller will prepend "WARNING " to this error!
if (ErrorsAndWarnings.isEnabled(101, warningLevel))
return ErrorsAndWarnings.getMessage(101, shortMessages,
translit);
} else { } else {
if (bestParse.hasStackWithoutVowel(pl, isLastStack)) { if (bestParse.hasStackWithoutVowel(pl, isLastStack)) {
if (isLastStack[0]) { if (isLastStack[0]) {
if (warningLevel == "All") if (ErrorsAndWarnings.isEnabled(502, warningLevel))
return "Warning: The last stack does not have a vowel in the ACIP {" + ((null != originalACIP) ? originalACIP : recoverACIP()) + "}; this may indicate a typo, because Sanskrit, which this is (because it's not legal Tibetan), should have a vowel after each stack."; return ErrorsAndWarnings.getMessage(502, shortMessages,
translit);
} else { } else {
throw new Error("Can't happen now that we stack greedily"); throw new Error("Can't happen now that we stack greedily");
} }
} }
if ("All" == warningLevel) { if (ErrorsAndWarnings.isEnabled(503, warningLevel))
return "Warning: Though the ACIP {" + ((null != originalACIP) ? originalACIP : recoverACIP()) + "} is unambiguous, it would be more computer-friendly if + signs were used to stack things because there are two (or more) ways to interpret this ACIP if you're not careful."; return ErrorsAndWarnings.getMessage(503, shortMessages,
} translit);
} }
} else { } else {
if (nip.get(0).hasStackWithoutVowel(pl, isLastStack)) { if (nip.get(0).hasStackWithoutVowel(pl, isLastStack)) {
if (isLastStack[0]) { if (isLastStack[0]) {
if (warningLevel == "All") if (ErrorsAndWarnings.isEnabled(502, warningLevel))
return "Warning: The last stack does not have a vowel in the ACIP {" + ((null != originalACIP) ? originalACIP : recoverACIP()) + "}; this may indicate a typo, because Sanskrit, which this is (because it's not legal Tibetan), should have a vowel after each stack."; return ErrorsAndWarnings.getMessage(502, shortMessages,
translit);
} else { } else {
throw new Error("Can't happen now that we stack greedily [2]"); throw new Error("Can't happen now that we stack greedily [2]");
} }
@ -330,14 +358,13 @@ class TParseTree {
// Check for useless disambiguators. // Check for useless disambiguators.
{ {
int plnum = 0; int plnum = 0;
String swarn
= "There is a stack of three or more consonants in " + ((null != originalACIP) ? originalACIP : recoverACIP()) + " that uses at least one '+' but does not use a '+' between each consonant.";
String disamWarn
= "There is a useless disambiguator in " + ((null != originalACIP) ? originalACIP : recoverACIP()) + ".";
while (plnum < pl.size() && pl.get(plnum).isDisambiguator()) { while (plnum < pl.size() && pl.get(plnum).isDisambiguator()) {
++plnum; ++plnum;
return disamWarn; if (ErrorsAndWarnings.isEnabled(505, warningLevel))
return ErrorsAndWarnings.getMessage(505, shortMessages,
translit);
} }
plnum = 0;
for (int stackNum = 0; stackNum < bestParse.size(); stackNum++) { for (int stackNum = 0; stackNum < bestParse.size(); stackNum++) {
TPairList stack = bestParse.get(stackNum); TPairList stack = bestParse.get(stackNum);
int type = 0; int type = 0;
@ -350,12 +377,16 @@ class TParseTree {
if (type == 0) if (type == 0)
type = -1; type = -1;
else if (type == 1) else if (type == 1)
return swarn; if (ErrorsAndWarnings.isEnabled(506, warningLevel))
return ErrorsAndWarnings.getMessage(506, shortMessages,
translit);
} else { } else {
if (type == 0) if (type == 0)
type = 1; type = 1;
else if (type == -1) else if (type == -1)
return swarn; if (ErrorsAndWarnings.isEnabled(506, warningLevel))
return ErrorsAndWarnings.getMessage(506, shortMessages,
translit);
} }
} }
if (stackSize > 1 && tp.getLeft() != null && tp.getLeft().length() > 1) { if (stackSize > 1 && tp.getLeft() != null && tp.getLeft().length() > 1) {
@ -364,12 +395,15 @@ class TParseTree {
} }
} }
if (hasAmbiguousConsonant && -1 == type) { if (hasAmbiguousConsonant && -1 == type) {
if ("Most" == warningLevel || "All" == warningLevel) if (ErrorsAndWarnings.isEnabled(507, warningLevel))
return "There is a chance that the ACIP " + ((null != originalACIP) ? originalACIP : recoverACIP()) + " was intended to represent more consonants than we parsed it as representing -- NNYA, e.g., means N+NYA, but you can imagine seeing N+N+YA and typing NNYA for it too."; return ErrorsAndWarnings.getMessage(507, shortMessages,
translit);
} }
while (plnum < pl.size() && pl.get(plnum).isDisambiguator()) { while (plnum < pl.size() && pl.get(plnum).isDisambiguator()) {
++plnum; ++plnum;
return disamWarn; if (ErrorsAndWarnings.isEnabled(505, warningLevel))
return ErrorsAndWarnings.getMessage(505, shortMessages,
translit);
} }
} }
} }
@ -386,8 +420,15 @@ class TParseTree {
&& null != left && null != right) { && null != left && null != right) {
if (("D".equals(left) && "G".equals(middle) && "R".equals(right)) if (("D".equals(left) && "G".equals(middle) && "R".equals(right))
|| ("D".equals(left) && "G".equals(middle) && "Y".equals(right))) { || ("D".equals(left) && "G".equals(middle) && "Y".equals(right))) {
if (pl.size() == 3 || "Some" != warningLevel) if (pl.size() == 3) {
return "The ACIP " + ((null != originalACIP) ? originalACIP : recoverACIP()) + " has been interpreted as two stacks, not one, but you may wish to confirm that the original text had two stacks as it would be an easy mistake to make to see one stack and forget to input it with '+' characters."; if (ErrorsAndWarnings.isEnabled(508, warningLevel))
return ErrorsAndWarnings.getMessage(508, shortMessages,
translit);
} else {
if (ErrorsAndWarnings.isEnabled(509, warningLevel))
return ErrorsAndWarnings.getMessage(509, shortMessages,
translit);
}
} }
} }
} }
@ -404,8 +445,15 @@ class TParseTree {
|| ("G".equals(left) && "D".equals(right)) || ("G".equals(left) && "D".equals(right))
|| ("D".equals(left) && "N".equals(right)) || ("D".equals(left) && "N".equals(right))
|| ("M".equals(left) && "N".equals(right))) { || ("M".equals(left) && "N".equals(right))) {
if (pl.size() == 2 || "Some" != warningLevel) if (pl.size() == 2) {
return "The ACIP " + ((null != originalACIP) ? originalACIP : recoverACIP()) + " has been interpreted as two stacks, not one, but you may wish to confirm that the original text had two stacks as it would be an easy mistake to make to see one stack and forget to input it with '+' characters."; if (ErrorsAndWarnings.isEnabled(508, warningLevel))
return ErrorsAndWarnings.getMessage(508, shortMessages,
translit);
} else {
if (ErrorsAndWarnings.isEnabled(509, warningLevel))
return ErrorsAndWarnings.getMessage(509, shortMessages,
translit);
}
} }
} }
} }

View file

@ -183,8 +183,10 @@ class TStackList {
boolean isClearlyIllegal() { boolean isClearlyIllegal() {
// check for {D}{VA} sorts of things: // check for {D}{VA} sorts of things:
for (int i = 0; i < size(); i++) { for (int i = 0; i < size(); i++) {
if (get(i).getACIPError() != null) { if (get(i).getACIPError("THIS MAKES IT FASTER AND IS SAFE, DON'T WORRY",
if (ddebug) System.out.println("ddebug: error is " + get(i).getACIPError()); true /* faster... */)
!= null) {
if (ddebug) System.out.println("ddebug: error is " + get(i).getACIPError("THIS MAKES IT FASTER AND IS SAFE, DON'T WORRY", false));
return true; return true;
} }
} }
@ -237,12 +239,14 @@ class TStackList {
/** Returns the DuffCodes and errors corresponding to this stack /** Returns the DuffCodes and errors corresponding to this stack
list. Each element of the array is a DuffCode or a String, the list. Each element of the array is a DuffCode or a String, the
latter if and only if the TMW font cannot represent the latter if and only if the TMW font cannot represent the
corresponding stack in this list. */ corresponding stack in this list. Iff shortMessages is true,
Object[] getDuff() { the String elements will be shorter messages. */
Object[] getDuff(boolean shortMessages,
boolean noCorrespondingTMWGlyphIsError) {
ArrayList al = new ArrayList(size()*2); // rough estimate ArrayList al = new ArrayList(size()*2); // rough estimate
int count = 0; int count = 0;
for (int i = 0; i < size(); i++) { for (int i = 0; i < size(); i++) {
get(i).getDuff(al); get(i).getDuff(al, shortMessages, noCorrespondingTMWGlyphIsError);
} }
if (size() > 0 && al.size() == 0) { if (size() > 0 && al.size() == 0) {
throw new Error("But this stack list, " + this + ", contains " + size() + " stacks! How can it not have DuffCodes associated with it?"); throw new Error("But this stack list, " + this + ", contains " + size() + " stacks! How can it not have DuffCodes associated with it?");

View file

@ -10,7 +10,7 @@ License for the specific terms governing rights and limitations under the
License. License.
The Initial Developer of this software is the Tibetan and Himalayan Digital The Initial Developer of this software is the Tibetan and Himalayan Digital
Library (THDL). Portions created by the THDL are Copyright 2001 THDL. Library (THDL). Portions created by the THDL are Copyright 2001, 2004 THDL.
All Rights Reserved. All Rights Reserved.
Contributor(s): ______________________________________. Contributor(s): ______________________________________.
@ -45,50 +45,57 @@ package org.thdl.util;
*/ */
public final class ThdlLazyException extends Error { public final class ThdlLazyException extends Error {
/** /**
* the wrapped exception * the wrapped exception
*/ */
private Throwable wrappedException = null; private Throwable wrappedException = null;
/** /**
* Constructor for ThdlLazyException. * Constructor for ThdlLazyException.
*/ */
public ThdlLazyException() { public ThdlLazyException() {
super(); super();
} }
/** /**
* Constructor for ThdlLazyException. * Constructor for ThdlLazyException.
* @param descrip description * @param descrip description
*/ */
public ThdlLazyException(String descrip) { public ThdlLazyException(String descrip) {
super(descrip); super(descrip);
} }
/** /**
* Constructor for ThdlLazyException. * Constructor for ThdlLazyException.
* @param descrip description * @param descrip description
* @param realException the exception the user should actually care about * @param realException the exception the user should actually care about
*/ */
public ThdlLazyException(String descrip, Throwable realException) { public ThdlLazyException(String descrip, Throwable realException) {
super(descrip); super(descrip);
wrappedException = realException; wrappedException = realException;
} }
/** /**
* Constructor for ThdlLazyException. * Constructor for ThdlLazyException.
* @param realException the exception the user should actually care about * @param realException the exception the user should actually care about
*/ */
public ThdlLazyException(Throwable realException) { public ThdlLazyException(Throwable realException) {
super(); super();
wrappedException = realException; wrappedException = realException;
} }
/** /**
* Returns the wrapped exception, the one about which you should actually * Returns the wrapped exception, the one about which you should actually
* be concerned. * be concerned.
*/ */
public Throwable getRealException() { public Throwable getRealException() {
return wrappedException; return wrappedException;
} }
public String toString() {
return "ThdlLazyException [" + super.toString() + "] wrapping " + ((getRealException() == null) ? "nothing" : getRealException().toString());
}
public String getMessage() {
return "ThdlLazyException [" + super.getMessage() + "] wrapping " + ((getRealException() == null) ? "nothing" : getRealException().getMessage());
}
} }

View file

@ -87,7 +87,8 @@ public class ThdlLazyExceptionTest extends TestCase {
public void testThdlLazyExceptionString() { public void testThdlLazyExceptionString() {
String msg = "foo"; String msg = "foo";
ThdlLazyException e = new ThdlLazyException(msg); ThdlLazyException e = new ThdlLazyException(msg);
assertTrue(msg.equals(e.getMessage())); assertTrue("Oops: " + e.getMessage(),
"ThdlLazyException [foo] wrapping nothing".equals(e.getMessage()));
assertTrue(null == e.getRealException()); assertTrue(null == e.getRealException());
} }
@ -98,7 +99,8 @@ public class ThdlLazyExceptionTest extends TestCase {
String msg = "foo"; String msg = "foo";
IOException ioe = new IOException("bah"); IOException ioe = new IOException("bah");
ThdlLazyException e = new ThdlLazyException(msg, ioe); ThdlLazyException e = new ThdlLazyException(msg, ioe);
assertTrue(msg.equals(e.getMessage())); assertTrue("oops: " + e.getMessage(),
"ThdlLazyException [foo] wrapping bah".equals(e.getMessage()));
assertTrue(ioe.equals(e.getRealException())); assertTrue(ioe.equals(e.getRealException()));
assertTrue("bah".equals(e.getRealException().getMessage())); assertTrue("bah".equals(e.getRealException().getMessage()));
} }