Robert Chilton's experience inspired me to make the handling of errors and

warnings in ACIP->Tibetan conversion much more configurable.  You can
now choose from short or long error messages, for one thing.  You can change
the severity of almost all warnings.  Each error and warning has an error code.
Errors and warnings are better tested.

The converter GUI has a new checkbox for short messages; the converter
CLI has a new mandatory option for short messages.

I also fixed a bug whereby certain errors were not being appended to the
'errors' StringBuffer.
This commit is contained in:
dchandler 2004-04-24 17:49:16 +00:00
parent cc5d096918
commit e2d42f36eb
22 changed files with 1106 additions and 506 deletions

View file

@ -144,3 +144,21 @@ thdl.log.line.breaking.algorithm = false
# disappears from the input. We turn these guys into Unicode escapes
# when this is false. We leave it buggy when this is true.
thdl.do.not.fix.rtf.hex.escapes = false
# ACIP->Tibetan conversions have numerous warnings. If you want to
# see warning 501 even at the "Some" level, just change the option
# thdl.acip.to.tibetan.warning.severity.501 to Some. You cannot make
# a warning into an error, and you cannot make an error into a
# warning. 504 and 510 cannot be downgraded; they are always
# "Some"-level.
thdl.acip.to.tibetan.warning.severity.501 = Most
thdl.acip.to.tibetan.warning.severity.502 = All
thdl.acip.to.tibetan.warning.severity.503 = All
thdl.acip.to.tibetan.warning.severity.504 = Some
thdl.acip.to.tibetan.warning.severity.505 = Some
thdl.acip.to.tibetan.warning.severity.506 = Some
thdl.acip.to.tibetan.warning.severity.507 = Most
thdl.acip.to.tibetan.warning.severity.508 = Some
thdl.acip.to.tibetan.warning.severity.509 = Most
thdl.acip.to.tibetan.warning.severity.510 = Some
thdl.acip.to.tibetan.warning.severity.511 = Some

View file

@ -40,6 +40,9 @@ class ConvertDialog extends JDialog
private JCheckBox colors;
private static final String colorDesc = "Color-coding (ACIP to RTF only)";
private JCheckBox shortMessages;
private static final String shortMessagesDesc = "Short warning and error messages (ACIP to Tibetan only)";
// Attributes
private FontConversion controller;
@ -99,11 +102,17 @@ class ConvertDialog extends JDialog
updateWarningLevels();
temp.add(warningLevels);
content.add(temp);
temp = new JPanel(new FlowLayout(FlowLayout.CENTER,5,5));
this.colors = new JCheckBox(colorDesc, false);
this.colors.addActionListener(tal);
this.shortMessages = new JCheckBox(shortMessagesDesc, false);
this.shortMessages.addActionListener(tal);
updateWarningLevels();
temp.add(colors);
temp.add(shortMessages);
content.add(temp);
temp = new JPanel(new FlowLayout(FlowLayout.CENTER,5,5));
@ -160,7 +169,7 @@ class ConvertDialog extends JDialog
content.add(buttonBox);
setContentPane(content);
pack();
setSize(new Dimension(640,235));
setSize(new Dimension(600,240));
}
private void setChoices(String[] choices)
@ -301,6 +310,7 @@ class ConvertDialog extends JDialog
convertedFile,
(String)choices.getSelectedItem(),
(String)warningLevels.getSelectedItem(),
shortMessages.isSelected(),
colors.isSelected());
} catch (OutOfMemoryError e) {
JOptionPane.showMessageDialog(this,

View file

@ -49,7 +49,7 @@ public class ConverterGUI implements FontConversion, FontConverterConstants {
public boolean doConversion(ConvertDialog cd, File oldFile, File newFile,
String whichConversion, String warningLevel,
boolean colors) {
boolean shortMessages, boolean colors) {
PrintStream ps;
try {
if (whichConversion == ACIP_TO_UNI_TEXT) {
@ -64,6 +64,7 @@ public class ConverterGUI implements FontConversion, FontConverterConstants {
false),
whichConversion,
warningLevel,
shortMessages,
colors);
ps.close();
} catch (FileNotFoundException e) {

View file

@ -36,9 +36,12 @@ interface FontConversion
conversion performed is specified by the interned String
whichConversion, which must be one of the known conversions.
If you want colors to be used in the output (which is only
supported by a few conversions), then colors must be true.
supported by a few conversions), then colors must be true. If
you want short error and warning messages for ACIP to Tibetan
conversions, then shortMessages must be true.
@return true on success, false otherwise */
boolean doConversion(ConvertDialog cd, File oldFile,
File newFile, String whichConversion,
String warningLevel, boolean colors);
String warningLevel, boolean shortMessages,
boolean colors);
}

View file

@ -63,6 +63,6 @@ interface FontConverterConstants
final String suggested_TO_TM_prefix = "TM_";
// String Constants
public final String PROGRAM_TITLE = "THDL Font Conversion (with Jskad Technology)";
public final String PROGRAM_TITLE = "THDL Tibetan Converters -- featuring Jskad Technology";
}

View file

@ -268,7 +268,7 @@ public class Jskad extends JPanel implements DocumentListener {
fileMenu = new JMenu("File");
JMenuItem newItem = new JMenuItem("New");
JMenuItem newItem = new JMenuItem("New...");
// newItem.setAccelerator(KeyStroke.getKeyStroke(KeyEvent.VK_N,java.awt.Event.CTRL_MASK)); //Ctrl-n
newItem.addActionListener(new ThdlActionListener() {
public void theRealActionPerformed(ActionEvent e) {
@ -652,7 +652,7 @@ public class Jskad extends JPanel implements DocumentListener {
JMenu helpMenu = new JMenu("Help");
{
JMenuItem helpItem = new JMenuItem("Help");
JMenuItem helpItem = new JMenuItem("Help...");
helpItem.addActionListener(new ThdlActionListener() {
public void theRealActionPerformed(ActionEvent e) {
CalHTMLPane helpPane = new CalHTMLPane();
@ -672,7 +672,7 @@ public class Jskad extends JPanel implements DocumentListener {
}
{
JMenuItem helpItem = new JMenuItem("Jskad on the Web");
JMenuItem helpItem = new JMenuItem("Jskad on the Web...");
helpItem.addActionListener(new ThdlActionListener() {
public void theRealActionPerformed(ActionEvent e) {
CalHTMLPane onlineHelpPane = new CalHTMLPane();
@ -692,7 +692,7 @@ public class Jskad extends JPanel implements DocumentListener {
for (int i = 0; i < keybdMgr.size(); i++) {
final JskadKeyboard kbd = keybdMgr.elementAt(i);
if (kbd.hasQuickRefFile()) {
JMenuItem keybdItem = new JMenuItem(kbd.getIdentifyingString());
JMenuItem keybdItem = new JMenuItem(kbd.getIdentifyingString() + "...");
keybdItem.addActionListener(new ThdlActionListener() {
public void theRealActionPerformed(ActionEvent e) {
new SimpleFrame(kbd.getIdentifyingString(),
@ -712,7 +712,7 @@ public class Jskad extends JPanel implements DocumentListener {
helpMenu.addSeparator();
{
JMenuItem aboutItem = new JMenuItem("About");
JMenuItem aboutItem = new JMenuItem("About...");
aboutItem.addActionListener(new ThdlActionListener() {
public void theRealActionPerformed(ActionEvent e) {
JOptionPane.showMessageDialog(Jskad.this,

View file

@ -106,6 +106,8 @@ public class TMW_RTF_TO_THDL_WYLIETest extends TestCase {
"no",
"--warning-level",
"All",
"--acip-to-tibetan-warning-and-error-messages",
"long",
mode,
getTestFileName(testName)
};
@ -130,6 +132,8 @@ public class TMW_RTF_TO_THDL_WYLIETest extends TestCase {
+ "thdl" + File.separator
+ "tib" + File.separator
+ "input" + File.separator
// FIXME: one of the files named '.rtf' is really a text
// file:
+ "TMW_RTF_TO_THDL_WYLIE" + testName + ".rtf";
}

View file

@ -30,14 +30,13 @@ import org.thdl.tib.text.ttt.ACIPConverter;
import org.thdl.tib.text.ttt.ACIPTshegBarScanner;
import java.util.ArrayList;
/** TibetanConverter is a command-line utility for converting to
* and from Tibetan Machine Web (TMW). It converts TMW to Wylie, to
* Unicode, or to Tibetan Machine (TM). It also converts TM to TMW.
* It is a TibetanMachineWeb-in-RichTextFormat to your choice of
* TibetanMachine-in-RichTextFormat, THDL Extended
* Wylie-in-RichTextFormat, or Unicode-in-RichTextFormat converter,
* more specifically, as well as converting from TM to TMW. Invoke
* it with no parameters for usage information.
/** TibetanConverter is a command-line utility for converting to and
* from Tibetan Machine Web (TMW). It converts TMW to Wylie, ACIP,
* Unicode, or to Tibetan Machine (TM). It also converts to TMW from
* TM or ACIP. Some conversions use RTF (rich text format); some use
* text. Invoke it with no parameters for usage information. Full
* documentation is available at {@link
* http://thdltools.sourceforge.net/TMW_RTF_TO_THDL_WYLIE.html}.
* @author David Chandler */
public class TibetanConverter implements FontConverterConstants {
private static final boolean debug = false;
@ -80,22 +79,28 @@ public class TibetanConverter implements FontConverterConstants {
boolean findAllNonTMMode = false;
boolean colors = false;
boolean shortMessages = false;
String warningLevel = null;
// Process arguments:
final int numArgs = 6;
final int numArgs = 8;
if ((args.length != 1 && args.length != numArgs)
|| (args.length == 1
&& !(args[0].equals("-v")
|| args[0].equals("--version")))
|| (args.length == numArgs
&& (!(args[numArgs - 6].equals("--colors"))
|| !((colors = args[numArgs - 5].equals("yes"))
|| args[numArgs - 5].equals("no"))
|| !(args[numArgs - 4].equals("--warning-level"))
|| !(args[numArgs - 3].equals("Most")
|| args[numArgs - 3].equals("Some")
|| args[numArgs - 3].equals("All")
|| args[numArgs - 3].equals("None"))
&& (!(args[numArgs - 8].equals("--colors"))
|| !((colors = args[numArgs - 7].equals("yes"))
|| args[numArgs - 7].equals("no"))
|| !(args[numArgs - 6].equals("--warning-level"))
|| !((warningLevel = args[numArgs - 5]).equals("Most")
|| warningLevel.equals("Some")
|| warningLevel.equals("All")
|| warningLevel.equals("None"))
|| !(args[numArgs - 4].equals("--acip-to-tibetan-warning-and-error-messages"))
|| !((shortMessages = args[numArgs - 3].equals("short"))
|| args[numArgs - 3].equals("long"))
|| !((findAllNonTMWMode
= args[numArgs - 2].equals("--find-all-non-tmw"))
|| (convertToTMMode
@ -123,8 +128,15 @@ public class TibetanConverter implements FontConverterConstants {
|| (findAllNonTMMode
= args[numArgs - 2].equals("--find-all-non-tm"))
)))) {
if (args.length != numArgs) {
out.println("");
out.println("Wrong number of arguments; needs " + numArgs + " arguments.");
out.println("");
}
out.println("TibetanConverter --colors yes|no");
out.println(" --warning-level None|Some|Most|All");
out.println(" --acip-to-tibetan-warning-and-error-messages short|long");
out.println(" --find-all-non-tmw | --find-some-non-tmw");
out.println(" | --to-tibetan-machine | --to-tibetan-machine-web");
out.println(" | --to-unicode | --to-wylie | --to-acip");
@ -189,10 +201,17 @@ public class TibetanConverter implements FontConverterConstants {
out.println("You may find it helpful to use `--find-some-non-tmw' mode (or");
out.println("`--find-some-non-tm' mode for Tibetan Machine input) before doing a");
out.println("conversion so that you have confidence in the conversion's correctness.");
out.println("");
out.println("When using short error and warning messages for ACIP->Tibetan conversions,");
out.println("i.e. when '--acip-to-tibetan-warning-and-error-messages short' is given,");
out.println("the output will contain error and warning numbers. The following are the");
out.println("long forms of each warning and error:");
out.println("");
org.thdl.tib.text.ttt.ErrorsAndWarnings.printErrorAndWarningDescriptions(out);
return 77;
}
if (args[0].equals("--version") || args[0].equals("-v")) {
out.println("TibetanConverter version 0.83");
out.println("TibetanConverter version 0.84");
out.println("Compiled at "
+ ThdlVersion.getTimeOfCompilation());
return 77;
@ -237,7 +256,8 @@ public class TibetanConverter implements FontConverterConstants {
}
}
return reallyConvert(in, out, conversionTag,
args[numArgs - 3].intern(), colors);
warningLevel.intern(), shortMessages,
colors);
} catch (ThdlLazyException e) {
out.println("TibetanConverter has a BUG:");
e.getRealException().printStackTrace(out);
@ -258,17 +278,21 @@ public class TibetanConverter implements FontConverterConstants {
/** Reads from in, closes in, converts (or finds some/all
non-TM/TMW), writes the result to out, does not close out.
The action taken depends on ct, which must be one of a set
number of strings -- see the code. Returns an appropriate
return code so that TibetanConverter's usage message is
honored. */
number of strings -- see the code. Uses short error and
warning messages if shortMessages is true; gives no warnings
or many warnings depending on warningLevel. Returns an
appropriate return code so that TibetanConverter's usage
message is honored. */
static int reallyConvert(InputStream in, PrintStream out, String ct,
String warningLevel, boolean colors) {
String warningLevel, boolean shortMessages,
boolean colors) {
if (ACIP_TO_UNI_TEXT == ct || ACIP_TO_TMW == ct) {
try {
ArrayList al = ACIPTshegBarScanner.scanStream(in, null,
ArrayList al
= ACIPTshegBarScanner.scanStream(in, null,
ThdlOptions.getIntegerOption("thdl.most.errors.a.tibetan.acip.document.can.have",
1000 - 1)
);
1000 - 1),
shortMessages);
if (null == al)
return 47;
boolean embeddedWarnings = (warningLevel != "None");
@ -277,14 +301,16 @@ public class TibetanConverter implements FontConverterConstants {
if (!ACIPConverter.convertToUnicodeText(al, out, null,
null, hasWarnings,
embeddedWarnings,
warningLevel))
warningLevel,
shortMessages))
return 46;
} else {
if (ct != ACIP_TO_TMW) throw new Error("badness");
if (!ACIPConverter.convertToTMW(al, out, null, null,
hasWarnings,
embeddedWarnings,
warningLevel, colors))
warningLevel, shortMessages,
colors))
return 46;
}
if (embeddedWarnings && hasWarnings[0])

View file

@ -333,7 +333,7 @@ public class TibTextUtils implements THDLWylieConstants {
throws InvalidACIPException
{
StringBuffer errors = new StringBuffer();
ArrayList al = ACIPTshegBarScanner.scan(acip, errors, 500);
ArrayList al = ACIPTshegBarScanner.scan(acip, errors, 500, false);
if (null == al || errors.length() > 0) {
if (errors.length() > 0)
throw new InvalidACIPException(errors.toString());
@ -350,7 +350,7 @@ public class TibTextUtils implements THDLWylieConstants {
int tloc[] = new int[] { loc };
ACIPConverter.convertToTMW(al, tdoc, null, null, null,
putWarningsInOutput, warningLevel,
colors, tloc);
false, colors, tloc);
return tloc[0] - loc;
} catch (IOException e) {
throw new Error("Can't happen: " + e);

View file

@ -14,10 +14,11 @@
// sure to run 'ant clean check' after your change.
//
// Note that some glyphs have EWTS \uF021-\uF0FF inclusive. These do
// not have anything in the Unicode column, though, because this is just
// the EWTS -- if someone wants to convert TMSkt3.183->Unicode and get
// \u0F21, let them do that, but our *->Unicode converters shouldn't
// output codes in the PUA without explicit user authorization.
// not have anything in the Unicode column, though, because this is
// just the EWTS -- if someone wants to convert TMSkt3.183->Unicode
// and get \u0F21, let them do that, but our *->Unicode converters
// shouldn't output codes in the private-use area (PUA) without
// explicit user authorization.
//
// Note that 0F00, 0F02, 0F03, and 0F0E are made by using multiple
// glyphs from TMW. 0F6A is not listed here (DLC FIXME: should it be?

View file

@ -390,5 +390,18 @@ public class UnicodeUtils implements UnicodeConstants {
&& '\u0F48' != cp
&& '\u0F98' != cp);
}
/** Returns true if a character is in the Tibetan range of Unicode
4.0 but is a reserved code in that range, not yet assigned to
any character. */
public static boolean isReservedTibetanCode(char cp) {
return (cp == '\u0F48'
|| cp == '\u0F98'
|| (cp >= '\u0F6B' && cp <= '\u0F70')
|| (cp >= '\u0F8C' && cp <= '\u0F8F')
|| cp == '\u0FBD'
|| (cp >= '\u0FCD' && cp <= '\u0FCE')
|| (cp >= '\u0FD0' && cp <= '\u0FFF'));
}
}

View file

@ -10,7 +10,7 @@ License for the specific terms governing rights and limitations under the
License.
The Initial Developer of this software is the Tibetan and Himalayan Digital
Library (THDL). Portions created by the THDL are Copyright 2003 THDL.
Library (THDL). Portions created by the THDL are Copyright 2003-2004 THDL.
All Rights Reserved.
Contributor(s): ______________________________________.
@ -37,11 +37,14 @@ import org.thdl.tib.text.DuffCode;
*/
public class ACIPConverter {
/** Command-line converter. Gives error messages on standard
* output about why we can't convert the document perfectly and
* exits with non-zero return code, or is silent otherwise and
* exits with code zero. <p>FIXME: not so efficient; copies the
* whole file into memory first. */
/** Command-line converter for testing only -- use
* org.thdl.tib.input.TibetanConverter for production work.
* Gives error messages on standard output about why we can't
* convert the document perfectly and exits with non-zero return
* code, or is silent otherwise and exits with code zero.
*
* <p>FIXME: not so efficient; copies the whole file into memory
* first. */
public static void main(String[] args)
throws IOException
{
@ -50,13 +53,20 @@ public class ACIPConverter {
ThdlOptions.setUserPreference("thdl.rely.on.system.tm.fonts", true);
ThdlOptions.setUserPreference("thdl.debug", true);
// Only developers should use this.
if (!ThdlOptions.getBooleanOption("thdl.debug")) {
System.err.println("Use org.thdl.tib.input.TibetanConverter for production work, not ACIPConverter.");
System.exit(1);
}
boolean verbose = true;
if (args.length != 1) {
System.out.println("Bad args! Need just the name of the ACIP text file.");
}
StringBuffer errors = new StringBuffer();
int maxErrors = 1000; // DLC NOW PER CAPITA
ArrayList al = ACIPTshegBarScanner.scanFile(args[0], errors, maxErrors - 1);
int maxErrors = 1000; // FIXME: make this PER CAPITA or else large ACIP Tibetan files are not converted for fear that they are English
boolean shortMessages = false;
ArrayList al = ACIPTshegBarScanner.scanFile(args[0], errors, maxErrors - 1, shortMessages);
if (null == al) {
System.err.println(maxErrors + " or more lexical errors occurred while scanning ACIP input file; is this");
@ -89,7 +99,7 @@ public class ACIPConverter {
putWarningsInOutput = true;
}
convertToTMW(al, System.out, errors, warnings, null,
putWarningsInOutput, warningLevel, colors);
putWarningsInOutput, warningLevel, shortMessages, colors);
int retCode = 0;
if (errors.length() > 0) {
System.err.println("Errors converting ACIP input file: ");
@ -131,13 +141,15 @@ public class ACIPConverter {
boolean[] hasWarnings,
boolean writeWarningsToResult,
String warningLevel,
boolean shortMessages,
boolean colors)
throws IOException
{
TibetanDocument tdoc = new TibetanDocument();
boolean rv
= convertToTMW(scan, tdoc, errors, warnings, hasWarnings,
writeWarningsToResult, warningLevel, colors,
writeWarningsToResult, warningLevel,
shortMessages, colors,
new int[] { tdoc.getLength() });
tdoc.writeRTFOutputStream(out);
return rv;
@ -159,13 +171,15 @@ public class ACIPConverter {
boolean[] hasWarnings,
boolean writeWarningsToResult,
String warningLevel,
boolean shortMessages,
boolean colors,
int[] loc)
throws IOException
{
return convertTo(false, true, scan, null, tdoc, errors, warnings,
hasWarnings, writeWarningsToResult, warningLevel,
colors, loc, loc[0] == tdoc.getLength());
shortMessages, colors, loc,
loc[0] == tdoc.getLength());
}
/** Returns UTF-8 encoded Unicode. A bit indirect, so use this
@ -175,22 +189,23 @@ public class ACIPConverter {
* written to the result. If warnings occur in scanning the ACIP
* or in converting a tsheg bar, then they are appended to
* warnings if warnings is non-null, and they are written to the
* result if writeWarningsToResult is true. Returns the
* conversion upon perfect success or if there were merely
* warnings, null if errors occurred.
*/
* result if writeWarningsToResult is true. Error and warning
* messages are long and self-contained unless shortMessages is
* true. Returns the conversion upon perfect success or if there
* were merely warnings, null if errors occurred. */
public static String convertToUnicodeText(String acip,
StringBuffer errors,
StringBuffer warnings,
boolean writeWarningsToResult,
String warningLevel) {
String warningLevel,
boolean shortMessages) {
ByteArrayOutputStream sw = new ByteArrayOutputStream();
ArrayList al = ACIPTshegBarScanner.scan(acip, errors, -1);
ArrayList al = ACIPTshegBarScanner.scan(acip, errors, -1, shortMessages);
try {
if (null != al) {
convertToUnicodeText(al, sw, errors,
warnings, null, writeWarningsToResult,
warningLevel);
warningLevel, shortMessages);
return sw.toString("UTF-8");
} else {
return null;
@ -227,12 +242,13 @@ public class ACIPConverter {
StringBuffer warnings,
boolean[] hasWarnings,
boolean writeWarningsToOut,
String warningLevel)
String warningLevel,
boolean shortMessages)
throws IOException
{
return convertTo(true, false, scan, out, null, errors, warnings,
hasWarnings, writeWarningsToOut, warningLevel, false,
new int[] { -1 } , true);
hasWarnings, writeWarningsToOut, warningLevel,
shortMessages, false, new int[] { -1 } , true);
}
private static boolean peekaheadFindsSpacesAndComma(ArrayList /* of TString */ scan,
@ -263,6 +279,7 @@ public class ACIPConverter {
boolean[] hasWarnings,
boolean writeWarningsToOut,
String warningLevel,
boolean shortMessages,
boolean colors,
// tdocLocation[0] is an
// input-output parameter. It's
@ -284,6 +301,10 @@ public class ACIPConverter {
throw new IllegalArgumentException("ACIP->Uni.rtf requires a TibetanDocument");
if (null != out && !(toUnicode && !toRTF))
throw new IllegalArgumentException("That stream is only used in ACIP->Uni.txt mode");
if (null != out && null != tdoc)
throw new IllegalArgumentException("Errors are not treated properly yet; do one conversion and then the other. Is performance important enough to risk improper output for you?");
if (null == out && null == tdoc)
throw new IllegalArgumentException("Why would you?");
int smallFontSize = -1;
int regularFontSize = -1;
if (null != tdoc) {
@ -325,7 +346,7 @@ public class ACIPConverter {
if (stype == TString.ERROR) {
// leave lastGuyWasNonPunct and lastGuy alone; WARNINGs and ERRORs are invisible.
hasErrors = true;
String text = "[#ERROR CONVERTING ACIP DOCUMENT: Lexical error: " + s.getText() + "]";
String text = "[#ERROR " + s.getText() + "]";
if (null != writer) writer.write(text);
if (null != tdoc) {
tdoc.appendRoman(tdocLocation[0], text, Color.RED);
@ -333,7 +354,7 @@ public class ACIPConverter {
}
} else if (stype == TString.TSHEG_BAR_ADORNMENT) {
if (lastGuyWasNonPunct) {
String err = "[#ERROR CONVERTING ACIP DOCUMENT: This converter cannot convert the ACIP {" + s.getText() + "} to Tibetan because it is unclear what the result should be.]";
String err = "[#ERROR " + ErrorsAndWarnings.getMessage(133, shortMessages, s.getText()) + "]";
if (null != writer) {
String uni = ACIPRules.getUnicodeFor(s.getText(), false);
if (null == uni) {
@ -363,7 +384,7 @@ public class ACIPConverter {
} else if (stype == TString.WARNING) {
// leave lastGuyWasNonPunct and lastGuy alone; WARNINGs and ERRORs are invisible.
if (writeWarningsToOut) {
String text = "[#WARNING CONVERTING ACIP DOCUMENT: Lexical warning: " + s.getText() + "]";
String text = "[#WARNING " + s.getText() + "]";
if (null != writer) writer.write(text);
if (null != tdoc) {
tdoc.appendRoman(tdocLocation[0], text, Color.RED);
@ -372,7 +393,7 @@ public class ACIPConverter {
}
if (null != hasWarnings) hasWarnings[0] = true;
if (null != warnings) {
warnings.append("Warning: Lexical warning: ");
warnings.append("Warning: ");
warnings.append(s.getText());
warnings.append('\n');
}
@ -399,10 +420,10 @@ public class ACIPConverter {
TPairList pls[] = TPairListFactory.breakACIPIntoChunks(s.getText(), false);
String acipError;
if ((acipError = pls[0].getACIPError()) != null
&& (null == pls[1] || pls[1].getACIPError() != null)) {
if ((acipError = pls[0].getACIPError(s.getText(), shortMessages)) != null
&& (null == pls[1] || pls[1].getACIPError(s.getText(), shortMessages) != null)) {
hasErrors = true;
String errorMessage = "[#ERROR CONVERTING ACIP DOCUMENT: The tsheg bar (\"syllable\") " + s.getText() + " has these errors: " + acipError + "]";
String errorMessage = "[#ERROR " + acipError + "]";
if (null != writer) writer.write(errorMessage);
if (null != tdoc) {
tdoc.appendRoman(tdocLocation[0], errorMessage,
@ -417,7 +438,10 @@ public class ACIPConverter {
? null : pls[1].getParseTree());
if (null == pt0 && null == pt1) {
hasErrors = true;
String errorMessage = "[#ERROR CONVERTING ACIP DOCUMENT: The tsheg bar (\"syllable\") " + s.getText() + " is essentially nothing.]";
String errorMessage
= ("[#ERROR "
+ ErrorsAndWarnings.getMessage(130, shortMessages, s.getText())
+ "]");
if (null != writer) writer.write(errorMessage);
if (null != tdoc) {
tdoc.appendRoman(tdocLocation[0], errorMessage,
@ -431,16 +455,16 @@ public class ACIPConverter {
TStackList sl1 = ((null == pt1)
? null : pt1.getBestParse());
if (null == sl0 && null == sl1) {
// I don't think this can happen
// nowadays; early in the
// converter's life, parsing of
// tsheg bars was handled
// differently, but now, I think
// this is impossible.
ThdlDebug.noteIffyCode();
// {A-DZU} causes this, for example.
hasErrors = true;
String errorMessage = "[#ERROR CONVERTING ACIP DOCUMENT: The tsheg bar (\"syllable\") " + s.getText() + " has no legal parses.]";
if (null != writer) writer.write(errorMessage);
String errorMessage =
"[#ERROR "
+ ErrorsAndWarnings.getMessage(134,
shortMessages,
s.getText())
+ "]";
if (null != writer)
writer.write(errorMessage);
if (null != tdoc) {
tdoc.appendRoman(tdocLocation[0],
errorMessage,
@ -474,12 +498,13 @@ public class ACIPConverter {
if ("None" != warningLevel) {
warning = pt.getWarning(warningLevel,
pl,
s.getText());
s.getText(),
shortMessages);
}
if (null != warning) {
if (writeWarningsToOut) {
String text
= ("[#WARNING CONVERTING ACIP DOCUMENT: "
= ("[#WARNING "
+ warning + "]");
if (null != writer) writer.write(text);
if (null != tdoc) {
@ -504,12 +529,16 @@ public class ACIPConverter {
// in TMW. That means there
// was probably a typo in the
// input.
if ("None" != warningLevel) {
Object[] trialDuff = sl.getDuff();
if (ErrorsAndWarnings.isEnabled(511, warningLevel)) {
Object[] trialDuff
= sl.getDuff(shortMessages,
false);
for (int ii = 0; ii < trialDuff.length; ii++) {
if (trialDuff[ii] instanceof String) {
if (!((String)trialDuff[ii]).startsWith("511"))
throw new Error("I thought 511 was the only beast like this; FIXME: make this an assertion 324xd3");
String bwarning
= "[#WARNING CONVERTING ACIP DOCUMENT: "
= "[#WARNING "
+ (String)trialDuff[ii] + "]";
unicode = bwarning + unicode;
if (null != hasWarnings) hasWarnings[0] = true;
@ -522,7 +551,7 @@ public class ACIPConverter {
}
}
if (null != tdoc) {
duff = sl.getDuff();
duff = sl.getDuff(shortMessages, true);
BoolTriple bt;
if (colors && sl.isLegalTshegBar(true).isLegal && !sl.isLegalTshegBar(false).isLegal) {
color = Color.YELLOW;
@ -657,7 +686,24 @@ public class ACIPConverter {
char ch = s.getText().charAt(0);
if (ch >= '\uF021' && ch <= '\uF0FF') {
hasErrors = true;
String errorMessage = "[#ERROR CONVERTING ACIP DOCUMENT: The Unicode escape '" + ch + "' with ordinal " + (int)ch + " is in the private-use area (PUA) of Unicode and will thus not be written out into the output lest you think other tools will be able to understand this non-standard construction.]";
String errorMessage =
"[#ERROR "
+ ErrorsAndWarnings.getMessage(135,
shortMessages,
"" + ch)
+ "]";
writer.write(errorMessage);
if (null != errors)
errors.append(errorMessage + "\n");
continue; // FIXME: dropping output if null != tdoc
} else if (org.thdl.tib.text.tshegbar.UnicodeUtils.isReservedTibetanCode(ch)) {
hasErrors = true;
String errorMessage =
"[#ERROR "
+ ErrorsAndWarnings.getMessage(138,
shortMessages,
"" + ch)
+ "]";
writer.write(errorMessage);
if (null != errors)
errors.append(errorMessage + "\n");
@ -669,7 +715,12 @@ public class ACIPConverter {
duff = TibetanMachineWeb.mapUnicodeToTMW(s.getText().charAt(0));
if (null == duff) {
hasErrors = true;
String errorMessage = "[#ERROR CONVERTING ACIP DOCUMENT: The Unicode escape with ordinal " + (int)s.getText().charAt(0) + " does not match up with any TibetanMachineWeb glyph.]";
String errorMessage =
"[#ERROR "
+ ErrorsAndWarnings.getMessage(136,
shortMessages,
s.getText())
+ "]";
tdoc.appendRoman(tdocLocation[0],
errorMessage,
Color.RED);
@ -700,7 +751,7 @@ public class ACIPConverter {
else {
hasErrors = true;
String emsg
= "[ERROR: " + (String)duff[j] + "]";
= "[ERROR " + (String)duff[j] + "]";
if (null != errors)
errors.append(emsg + "\n");
tdoc.appendRoman(tdocLocation[0],

View file

@ -50,7 +50,8 @@ public class ACIPTshegBarScanner {
}
StringBuffer errors = new StringBuffer();
int maxErrors = 1000;
ArrayList al = scanFile(args[0], errors, maxErrors - 1);
ArrayList al = scanFile(args[0], errors, maxErrors - 1,
"true".equals(System.getProperty("org.thdl.tib.text.ttt.ACIPTshegBarScanner.shortMessages")));
if (null == al) {
System.out.println(maxErrors + " or more errors occurred while scanning ACIP input file; is this");
@ -74,25 +75,36 @@ public class ACIPTshegBarScanner {
/** Scans an ACIP file with path fname into tsheg bars. If errors
* is non-null, error messages will be appended to it. Returns a
* list of TStrings that is the scan. <p>FIXME: not so
* efficient; copies the whole file into memory first.
* @throws IOException if we cannot read in the ACIP input file */
public static ArrayList scanFile(String fname, StringBuffer errors, int maxErrors)
* list of TStrings that is the scan. Warning and error messages
* in the result will be long and self-contained unless
* shortMessagse is true.
*
* <p>FIXME: not so efficient; copies the whole file into memory
* first.
*
* @throws IOException if we cannot read in the ACIP input file
* */
public static ArrayList scanFile(String fname, StringBuffer errors,
int maxErrors, boolean shortMessages)
throws IOException
{
return scanStream(new FileInputStream(fname),
errors, maxErrors);
errors, maxErrors, shortMessages);
}
/** Scans a stream of ACIP into tsheg bars. If errors is
* non-null, error messages will be appended to it. You can
* recover both errors and warnings (modulo offset information)
* from the result, though. Returns a list of TStrings that
* is the scan, or null if more than maxErrors occur. <p>FIXME:
* not so efficient; copies the whole file into memory first.
* from the result, though. They will be short messages iff
* shortMessages is true. Returns a list of TStrings that is the
* scan, or null if more than maxErrors occur.
*
* <p>FIXME: not so efficient; copies the whole file into memory
* first.
*
* @throws IOException if we cannot read the whole ACIP stream */
public static ArrayList scanStream(InputStream stream, StringBuffer errors,
int maxErrors)
int maxErrors, boolean shortMessages)
throws IOException
{
StringBuffer s = new StringBuffer();
@ -105,7 +117,7 @@ public class ACIPTshegBarScanner {
s.append(ch, 0, amt);
}
in.close();
return scan(s.toString(), errors, maxErrors);
return scan(s.toString(), errors, maxErrors, shortMessages);
}
/** Helper. Here because ACIP {MTHAR%\nKHA} should be treated the
@ -121,6 +133,40 @@ public class ACIPTshegBarScanner {
|| ((TString)al.get(i)).getType() == TString.TSHEG_BAR_ADORNMENT));
}
/** Helper function that increments numErrorsArray[0] by one and
adds an ERROR to the end of al and appends to the end of
errors if it is nonnull. (Nothing else is mutated.)
@return true if and only if the error count has gone too high
and caller should abort scanning */
private static boolean queueError(int code,
String translit,
boolean shortMessages,
int i,
int numNewlines,
int maxErrors,
ArrayList al,
StringBuffer errors,
int numErrorsArray[]) {
String errMsg;
al.add(new TString("ACIP",
errMsg = ErrorsAndWarnings.getMessage(code,
shortMessages,
translit),
TString.ERROR));
if (null != errors)
errors.append("Offset " + ((i < 0) ? "END" : ("" + i))
+ ((numNewlines == 0)
? ""
: (" or maybe " + (i-numNewlines)))
+ ": ERROR "
+ errMsg + "\n");
if (maxErrors >= 0 && ++numErrorsArray[0] >= maxErrors)
return true;
else
return false;
}
// DLC FIXME "H:\n\n" becomes "H: \n\n", wrongly I think. See
// Tibetan! 5.1 section on formatting Tibetan texts.
@ -145,17 +191,21 @@ public class ACIPTshegBarScanner {
* @param maxErrors if nonnegative, then scanning will stop when
* more than maxErrors errors occur. In this event, null is
* returned.
* @param shortMessages true iff you want short error and warning
* messages instead of long, self-contained error messages
* @return null if more than maxErrors errors occur, or the scan
* otherwise
*/
public static ArrayList scan(String s, StringBuffer errors, int maxErrors) {
* otherwise */
public static ArrayList scan(String s, StringBuffer errors, int maxErrors,
boolean shortMessages) {
// FIXME: Use less memory and time by not adding in the
// warnings that are below threshold.
// the size depends on whether it's mostly Tibetan or mostly
// Latin and a number of other factors. This is meant to be
// an underestimate, but not too much of an underestimate.
int numErrors = 0;
ArrayList al = new ArrayList(s.length() / 10);
int numErrorsArray[] = new int[] { 0 };
boolean waitingForMatchingIllegalClose = false;
int sl = s.length();
int currentType = TString.ERROR;
@ -171,13 +221,9 @@ public class ACIPTshegBarScanner {
if (ch == '\n') ++numNewlines;
if (TString.COMMENT == currentType && ch != ']') {
if ('[' == ch) {
String errMsg;
al.add(new TString("ACIP", errMsg = "Found an open bracket within a [#COMMENT]-style comment. Brackets may not appear in comments.",
TString.ERROR));
if (null != errors)
errors.append("Offset " + i + ((numNewlines == 0) ? "" : (" or maybe " + (i-numNewlines))) + ": "
+ errMsg + "\n");
if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
if (queueError(102, "" + ch,
shortMessages, i, numNewlines, maxErrors, al, errors, numErrorsArray))
return null;
}
continue;
}
@ -191,22 +237,14 @@ public class ACIPTshegBarScanner {
currentType));
}
if (!waitingForMatchingIllegalClose) {
String errMsg;
al.add(new TString("ACIP", errMsg = ("Found a truly unmatched close bracket, '" + s.substring(i, i+1)) + "'.",
TString.ERROR));
if (null != errors) {
errors.append("Offset " + i + ((numNewlines == 0) ? "" : (" or maybe " + (i-numNewlines))) + ": "
+ errMsg + "\n");
}
if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
if (queueError(103, "" + ch,
shortMessages, i, numNewlines, maxErrors, al, errors, numErrorsArray))
return null;
}
waitingForMatchingIllegalClose = false;
al.add(new TString("ACIP", "Found a closing bracket without a matching open bracket. Perhaps a [#COMMENT] incorrectly written as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], caused this.",
TString.ERROR));
if (null != errors)
errors.append("Offset " + i + ((numNewlines == 0) ? "" : (" or maybe " + (i-numNewlines))) + ": "
+ "Found a closing bracket without a matching open bracket. Perhaps a [#COMMENT] incorrectly written as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], caused this.\n");
if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
if (queueError(104, "" + ch,
shortMessages, i, numNewlines, maxErrors, al, errors, numErrorsArray))
return null;
startOfString = i+1;
currentType = TString.ERROR;
} else {
@ -459,16 +497,12 @@ public class ACIPTshegBarScanner {
// WITHOUT # MARKS]. Though "... [" could cause
// this too.
if (waitingForMatchingIllegalClose) {
al.add(new TString("ACIP", "Found a truly unmatched open bracket, [ or {, prior to this current illegal open bracket.",
TString.ERROR));
if (null != errors) {
errors.append("Offset " + i + ((numNewlines == 0) ? "" : (" or maybe " + (i-numNewlines))) + ": "
+ "Found a truly unmatched open bracket, [ or {, prior to this current illegal open bracket.\n");
}
if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
if (queueError(105, "" + ch,
shortMessages, i, numNewlines, maxErrors, al, errors, numErrorsArray))
return null;
}
waitingForMatchingIllegalClose = true;
if (null != errors) {
{
String inContext = s.substring(i, i+Math.min(sl-i, 10));
if (inContext.indexOf("\r") >= 0) {
inContext = inContext.substring(0, inContext.indexOf("\r"));
@ -479,11 +513,9 @@ public class ACIPTshegBarScanner {
inContext = inContext + "...";
}
}
al.add(new TString("ACIP", "Found an illegal open bracket (in context, this is " + inContext + "). Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open bracket?",
TString.ERROR));
errors.append("Offset " + i + ((numNewlines == 0) ? "" : (" or maybe " + (i-numNewlines))) + ": "
+ "Found an illegal open bracket (in context, this is " + inContext + "). Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open bracket?\n");
if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
if (queueError(106, inContext,
shortMessages, i, numNewlines, maxErrors, al, errors, numErrorsArray))
return null;
}
startOfString = i + 1;
currentType = TString.ERROR;
@ -533,18 +565,21 @@ public class ACIPTshegBarScanner {
inContext = inContext + "...";
}
}
al.add(new TString("ACIP", "Found an illegal at sign, @ (in context, this is " + inContext + "). This folio marker has a period, '.', at the end of it, which is illegal.",
TString.ERROR));
if (null != errors)
errors.append("Offset " + i + ((numNewlines == 0) ? "" : (" or maybe " + (i-numNewlines))) + ": "
+ "Found an illegal at sign, @ (in context, this is " + inContext + "). This folio marker has a period, '.', at the end of it, which is illegal.\n");
if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
if (queueError(107, inContext,
shortMessages, i, numNewlines, maxErrors, al, errors, numErrorsArray))
return null;
startOfString = i+numdigits+3;
i = startOfString - 1;
currentType = TString.ERROR;
break;
}
if (i+numdigits+4 < sl && (s.charAt(i+numdigits+4) == '.' || s.charAt(i+numdigits+4) == 'A' || s.charAt(i+numdigits+4) == 'B' || s.charAt(i+numdigits+4) == 'a' || s.charAt(i+numdigits+4) == 'b' || isNumeric(s.charAt(i+numdigits+4)))) {
if (i+numdigits+4 < sl
&& (s.charAt(i+numdigits+4) == '.'
|| s.charAt(i+numdigits+4) == 'A'
|| s.charAt(i+numdigits+4) == 'B'
|| s.charAt(i+numdigits+4) == 'a'
|| s.charAt(i+numdigits+4) == 'b'
|| isNumeric(s.charAt(i+numdigits+4)))) {
String inContext = s.substring(i, i+Math.min(sl-i, 10));
if (inContext.indexOf("\r") >= 0) {
inContext = inContext.substring(0, inContext.indexOf("\r"));
@ -555,12 +590,9 @@ public class ACIPTshegBarScanner {
inContext = inContext + "...";
}
}
al.add(new TString("ACIP", "Found an illegal at sign, @ (in context, this is " + inContext + "). This folio marker is not followed by whitespace, as is expected.",
TString.ERROR));
if (null != errors)
errors.append("Offset " + i + ((numNewlines == 0) ? "" : (" or maybe " + (i-numNewlines))) + ": "
+ "Found an illegal at sign, @ (in context, this is " + inContext + "). This folio marker is not followed by whitespace, as is expected.\n");
if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
if (queueError(108, inContext,
shortMessages, i, numNewlines, maxErrors, al, errors, numErrorsArray))
return null;
startOfString = i+1; // FIXME: skip over more? test this code.
currentType = TString.ERROR;
break;
@ -651,12 +683,9 @@ public class ACIPTshegBarScanner {
inContext = inContext + "...";
}
}
al.add(new TString("ACIP", "Found an illegal at sign, @ (in context, this is " + inContext + "). @012B is an example of a legal folio marker.",
TString.ERROR));
if (null != errors)
errors.append("Offset " + i + ((numNewlines == 0) ? "" : (" or maybe " + (i-numNewlines))) + ": "
+ "Found an illegal at sign, @ (in context, this is " + inContext + "). @012B is an example of a legal folio marker.\n");
if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
if (queueError(109, inContext,
shortMessages, i, numNewlines, maxErrors, al, errors, numErrorsArray))
return null;
startOfString = i+1;
currentType = TString.ERROR;
}
@ -673,16 +702,9 @@ public class ACIPTshegBarScanner {
if (startSlashIndex >= 0) {
if (startSlashIndex + 1 == i) {
/* //NYA\\ appears in ACIP input, and I think
* it means /NYA/. We warn about // for this
* reason. \\ causes a tsheg-bar error. */
al.add(new TString("ACIP", "Found //, which could be legal (the Unicode would be \\u0F3C\\u0F3D), but is likely in an illegal construct like //NYA\\\\.",
TString.ERROR));
if (errors != null) {
errors.append("Offset " + i + ((numNewlines == 0) ? "" : (" or maybe " + (i-numNewlines))) + ": "
+ "Found //, which could be legal (the Unicode would be \\u0F3C\\u0F3D), but is likely in an illegal construct like //NYA\\\\.\n");
}
if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
if (queueError(110, "" + ch,
shortMessages, i, numNewlines, maxErrors, al, errors, numErrorsArray))
return null;
}
al.add(new TString("ACIP", s.substring(i, i+1),
TString.END_SLASH));
@ -712,12 +734,9 @@ public class ACIPTshegBarScanner {
if (startParenIndex >= 0) {
if (ch == '(') {
al.add(new TString("ACIP", "Found an illegal open parenthesis, (. Nesting of parentheses is not allowed.",
TString.ERROR));
if (null != errors)
errors.append("Offset " + i + ((numNewlines == 0) ? "" : (" or maybe " + (i-numNewlines))) + ": "
+ "Found an illegal open parenthesis, (. Nesting of parentheses is not allowed.\n");
if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
if (queueError(111, "" + ch,
shortMessages, i, numNewlines, maxErrors, al, errors, numErrorsArray))
return null;
} else {
al.add(new TString("ACIP", s.substring(i, i+1), TString.END_PAREN));
startParenIndex = -1;
@ -726,12 +745,9 @@ public class ACIPTshegBarScanner {
currentType = TString.ERROR;
} else {
if (ch == ')') {
al.add(new TString("ACIP", "Unexpected closing parenthesis, ), found.",
TString.ERROR));
if (null != errors)
errors.append("Offset " + i + ((numNewlines == 0) ? "" : (" or maybe " + (i-numNewlines))) + ": "
+ "Unexpected closing parenthesis, ), found.\n");
if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
if (queueError(112, "" + ch,
shortMessages, i, numNewlines, maxErrors, al, errors, numErrorsArray))
return null;
} else {
startParenIndex = i;
al.add(new TString("ACIP", s.substring(i, i+1), TString.START_PAREN));
@ -749,13 +765,9 @@ public class ACIPTshegBarScanner {
al.add(new TString("ACIP", s.substring(startOfString, i),
currentType));
}
String errMsg = "The ACIP {?}, found alone, may intend U+0F08, but it may intend a question mark, i.e. '?', in the output. It may even mean that the original text could not be deciphered with certainty, like the ACIP {[?]} does.";
al.add(new TString("ACIP", errMsg,
TString.ERROR));
if (null != errors)
errors.append("Offset " + i + ((numNewlines == 0) ? "" : (" or maybe " + (i-numNewlines))) + ": "
+ errMsg + "\n");
if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
if (queueError(113, "" + ch,
shortMessages, i, numNewlines, maxErrors, al, errors, numErrorsArray))
return null;
startOfString = i+1;
currentType = TString.ERROR;
} // else this is [*TR'A ?] or the like.
@ -780,7 +792,10 @@ public class ACIPTshegBarScanner {
|| (s.charAt(i+1) == '\r' || s.charAt(i+1) == '\n')
|| (s.charAt(i+1) >= 'a' && s.charAt(i+1) <= 'z')
|| (s.charAt(i+1) >= 'A' && s.charAt(i+1) <= 'Z')))) {
al.add(new TString("ACIP", "A non-breaking tsheg, '.', appeared, but not like \"...,\" or \".,\" or \".dA\" or \".DA\".",
al.add(new TString("ACIP",
ErrorsAndWarnings.getMessage(510,
shortMessages,
"" + ch),
TString.WARNING));
}
startOfString = i+1;
@ -858,8 +873,11 @@ public class ACIPTshegBarScanner {
}
if (!bad)
al.add(new TString("ACIP", "^", TString.TIBETAN_PUNCTUATION));
else
al.add(new TString("ACIP", "The ACIP {^} must precede a tsheg bar.", TString.ERROR));
else {
if (queueError(131, "^",
shortMessages, i, numNewlines, maxErrors, al, errors, numErrorsArray))
return null;
}
} else {
// Don't add in a "\r\n" or "\n" unless there's a
// blank line.
@ -871,8 +889,9 @@ public class ACIPTshegBarScanner {
|| ('\n' == ch && i >= 1 && s.charAt(i-1) == '\n')))) {
for (int h = 0; h < (realNewline ? 2 : 1); h++) {
if (isTshegBarAdornment(ch) && !legalTshegBarAdornment) {
al.add(new TString("ACIP", "The ACIP " + ch + " must be glued to the end of a tsheg bar, but this one was not",
TString.ERROR));
if (queueError(132, "" + ch,
shortMessages, i, numNewlines, maxErrors, al, errors, numErrorsArray))
return null;
} else {
al.add(new TString("ACIP", rn ? s.substring(i - 1, i+1) : s.substring(i, i+1),
(legalTshegBarAdornment
@ -882,7 +901,10 @@ public class ACIPTshegBarScanner {
}
}
if ('%' == ch) {
al.add(new TString("ACIP", "The ACIP {%} is treated by this converter as U+0F35, but sometimes might represent U+0F14 in practice. To avoid seeing this warning again, change the input to use {\\u0F35} instead of {%}.",
al.add(new TString("ACIP",
ErrorsAndWarnings.getMessage(504,
shortMessages,
"" + ch),
TString.WARNING));
}
}
@ -909,11 +931,9 @@ public class ACIPTshegBarScanner {
currentType));
}
if ((int)ch == 65533) {
al.add(new TString("ACIP", "Found an illegal, unprintable character.",
TString.ERROR));
if (null != errors)
errors.append("Offset " + i + ((numNewlines == 0) ? "" : (" or maybe " + (i-numNewlines))) + ": "
+ "Found an illegal, unprintable character.\n");
if (queueError(114, "unknown character",
shortMessages, i, numNewlines, maxErrors, al, errors, numErrorsArray))
return null;
} else if ('\\' == ch) {
int x = -1;
if (!ThdlOptions.getBooleanOption("thdl.tib.text.disallow.unicode.character.escapes.in.acip")
@ -934,22 +954,15 @@ public class ACIPTshegBarScanner {
startOfString = i+1;
break;
} else {
final String msg
= "Found a backslash, \\, which the ACIP Tibetan Input Code standard says represents a Sanskrit virama. In practice, though, this is so often misused (to represent U+0F3D) that {\\} always generates this error. If you want a Sanskrit virama, change the input document to use {\\u0F84} instead of {\\}. If you want U+0F3D, use {/NYA/} or {/NYA\\u0F3D}.";
al.add(new TString("ACIP", msg,
TString.ERROR));
if (null != errors)
errors.append("Offset " + i + ((numNewlines == 0) ? "" : (" or maybe " + (i-numNewlines))) + ": "
+ msg + "\n");
if (queueError(115, "\\",
shortMessages, i, numNewlines, maxErrors, al, errors, numErrorsArray))
return null;
}
} else {
al.add(new TString("ACIP", "Found an illegal character, " + ch + ", with ordinal " + (int)ch + ".",
TString.ERROR));
if (null != errors)
errors.append("Offset " + i + ((numNewlines == 0) ? "" : (" or maybe " + (i-numNewlines))) + ": "
+ "Found an illegal character, " + ch + ", with ordinal " + (int)ch + ".\n");
if (queueError(116, "" + ch,
shortMessages, i, numNewlines, maxErrors, al, errors, numErrorsArray))
return null;
}
if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
startOfString = i+1;
currentType = TString.ERROR;
} else {
@ -965,38 +978,25 @@ public class ACIPTshegBarScanner {
currentType));
}
if (waitingForMatchingIllegalClose) {
al.add(new TString("ACIP", "UNEXPECTED END OF INPUT",
TString.ERROR));
if (null != errors) {
errors.append("Offset END: "
+ "Truly unmatched open bracket found.\n");
}
if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
if (queueError(117, "-*-END OF FILE-*-",
shortMessages, -1, numNewlines, maxErrors, al, errors, numErrorsArray))
return null;
}
if (!bracketTypeStack.empty()) {
al.add(new TString("ACIP", "Unmatched open bracket found. A " + ((TString.COMMENT == currentType) ? "comment" : "correction") + " does not terminate.",
TString.ERROR));
if (null != errors) {
errors.append("Offset END: "
+ "Unmatched open bracket found. A " + ((TString.COMMENT == currentType) ? "comment" : "correction") + " does not terminate.\n");
}
if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
if (queueError(((TString.COMMENT == currentType) ? 118 : 119),
"-*-END OF FILE-*-",
shortMessages, -1, numNewlines, maxErrors, al, errors, numErrorsArray))
return null;
}
if (startSlashIndex >= 0) {
al.add(new TString("ACIP", "Slashes are supposed to occur in pairs, but the input had an unmatched '/' character.",
TString.ERROR));
if (null != errors)
errors.append("Offset END: "
+ "Slashes are supposed to occur in pairs, but the input had an unmatched '/' character.\n");
if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
if (queueError(120, "-*-END OF FILE-*-",
shortMessages, -1, numNewlines, maxErrors, al, errors, numErrorsArray))
return null;
}
if (startParenIndex >= 0) {
al.add(new TString("ACIP", "Parentheses are supposed to occur in pairs, but the input had an unmatched parenthesis.",
TString.ERROR));
if (null != errors)
errors.append("Offset END: "
+ "Unmatched open parenthesis, (, found.\n");
if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
if (queueError(121, "-*-END OF FILE-*-",
shortMessages, -1, numNewlines, maxErrors, al, errors, numErrorsArray))
return null;
}
return al;
}

View file

@ -0,0 +1,392 @@
/*
The contents of this file are subject to the THDL Open Community License
Version 1.0 (the "License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License on the THDL web site
(http://www.thdl.org/).
Software distributed under the License is distributed on an "AS IS" basis,
WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
License for the specific terms governing rights and limitations under the
License.
The Initial Developer of this software is the Tibetan and Himalayan Digital
Library (THDL). Portions created by the THDL are Copyright 2004 THDL.
All Rights Reserved.
Contributor(s): ______________________________________.
*/
package org.thdl.tib.text.ttt;
import org.thdl.util.ThdlDebug;
import org.thdl.util.ThdlOptions;
import java.util.HashMap;
/** A noninstantiable class that knows about every user-visible error
* or warning message. Each has a unique integer key starting at 101
* for those messages that are errors and starting at 501 for those
* messages that are warnings. This class knows which messages are
* enabled for a given warning level (which is customizable via user
* preferences), whether a message is a warning or an error (which
* could be made configurable at runtime -- easily if you just want
* to upgrade a warning to an error -- FIXME), and how to produce
* both a short and a long error message.
*
* @author David Chandler */
public class ErrorsAndWarnings {
/** Don't instantiate this class. */
private ErrorsAndWarnings() { }
/** Maps int -> severityString, where severityString is
"ERROR".intern() for errors or "All".intern(),
"Some".intern(), or "Most".intern() for warnings that are
enabled or "DISABLED".intern() for disabled
warnings/errors. */
private static HashMap severityMap = new HashMap();
static {
setupSeverityMapFromBuiltinDefaults();
}
/** Returns higher numbers for higher severity. */
private static int severityStringToInteger(String sev) {
if (sev == "ERROR") return Integer.MAX_VALUE;
if (sev == "Some") return Integer.MAX_VALUE - 1;
if (sev == "Most") return Integer.MAX_VALUE - 2;
if (sev == "All") return Integer.MAX_VALUE - 3;
return 0;
}
/** Returns true if and only if sev1 is at least as severe as
sev2. "ERROR" means an error, the highest severity; "Some" is
the most severe warning; "Most" and "All" follow. Other
values are less severe than these.
@param sev1 an interned String or null
@param sev2 an interned String or null */
private static boolean severityGreaterThanOrEquals(String sev1,
String sev2) {
return severityStringToInteger(sev1) >= severityStringToInteger(sev2);
}
/** Returns true if and only if the warning or error with number
code is enabled for the given warningLevel. Errors are
enabled regardless of warningLevel. */
static boolean isEnabled(int code, String warningLevel) {
// unknown codes appear to be disabled, but let's make sure
// that no unknown code is used during development:
ThdlDebug.verify("Unknown error/warning code " + code,
null != severityMap.get(new Integer(code)));
return severityGreaterThanOrEquals((String)severityMap.get(new Integer(code)),
warningLevel);
}
/** Returns true if and only if code is an error and not a warning
at the moment. */
static boolean isError(int code) {
return ("ERROR" == severityMap.get(new Integer(code)));
}
/** Returns an error or warning message concerning the snippet of
ACIP or EWTS translit. The warning or error
number is code, and the message will be very short, like "101:
{NNYA}" if short is true, or longer and self-contained if
short is false. Note that you cannot call this for certain
messages that take more than one "parameter", if you will,
like message 501. */
static String getMessage(int code, boolean shortMessages,
String translit) {
// Let's make sure that no unknown code is used during
// development:
ThdlDebug.verify("unknown code " + code,
null != severityMap.get(new Integer(code)));
if (shortMessages) {
if ("(".equals(translit)
|| ")".equals(translit)
|| "{".equals(translit)
|| "}".equals(translit)
|| "[".equals(translit)
|| "]".equals(translit)
|| "<".equals(translit)
|| ">".equals(translit))
return "" + code + ": '" + translit + "'";
else
return "" + code + ": {" + translit + "}";
}
// else:
switch (code) {
// ERRORS:
case 101:
return "" + code + ": There's not even a unique, non-illegal parse for {" + translit + "}";
case 102:
return "" + code + ": Found an open bracket, '" + translit + "', within a [#COMMENT]-style comment. Brackets may not appear in comments.";
case 103:
return "" + code + ": Found a truly unmatched close bracket, '" + translit + "'.";
case 104:
return "" + code + ": Found a closing bracket, '" + translit + "', without a matching open bracket. Perhaps a [#COMMENT] incorrectly written as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], caused this.";
case 105:
return "" + code + ": Found a truly unmatched open bracket, '[' or '{', prior to this current illegal open bracket, '" + translit + "'.";
case 106:
return "" + code + ": Found an illegal open bracket (in context, this is '" + translit + "'). Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open bracket?";
case 107:
return "" + code + ": Found an illegal at sign, @ (in context, this is " + translit + "). This folio marker has a period, '.', at the end of it, which is illegal.";
case 108:
return "" + code + ": Found an illegal at sign, @ (in context, this is " + translit + "). This folio marker is not followed by whitespace, as is expected.";
case 109:
return "" + code + ": Found an illegal at sign, @ (in context, this is " + translit + "). @012B is an example of a legal folio marker.";
case 110:
/*
//NYA\\ appears in ACIP input, and I think it means
/////NYA/. We warn about // for this reason. \\ causes
a tsheg-bar //error.
*/
return "" + code + ": Found //, which could be legal (the Unicode would be \\u0F3C\\u0F3D), but is likely in an illegal construct like //NYA\\\\.";
case 111:
return "" + code + ": Found an illegal open parenthesis, '('. Nesting of parentheses is not allowed.";
case 112:
return "" + code + ": Unexpected closing parenthesis, ')', found.";
case 113:
return "" + code + ": The ACIP {?}, found alone, may intend U+0F08, but it may intend a question mark, i.e. '?', in the output. It may even mean that the original text could not be deciphered with certainty, like the ACIP {[?]} does.";
case 114:
return "" + code + ": Found an illegal, unprintable character.";
case 115:
return "" + code + ": Found a backslash, \\, which the ACIP Tibetan Input Code standard says represents a Sanskrit virama. In practice, though, this is so often misused (to represent U+0F3D) that {\\} always generates this error. If you want a Sanskrit virama, change the input document to use {\\u0F84} instead of {\\}. If you want U+0F3D, use {/NYA/} or {/NYA\\u0F3D}.";
case 116:
ThdlDebug.verify(translit.length() == 1);
return "" + code + ": Found an illegal character, '" + translit + "', with ordinal (in decimal) " + (int)translit.charAt(0) + ".";
case 117:
return "" + code + ": Unexpected end of input; truly unmatched open bracket found.";
case 118:
return "" + code + ": Unmatched open bracket found. A comment does not terminate.";
case 119:
return "" + code + ": Unmatched open bracket found. A correction does not terminate.";
case 120:
return "" + code + ": Slashes are supposed to occur in pairs, but the input had an unmatched '/' character.";
case 121:
return "" + code + ": Parentheses are supposed to occur in pairs, but the input had an unmatched parenthesis, '('.";
case 122:
return "" + code + ": Warning, empty tsheg bar found while converting from ACIP!";
case 123:
return "" + code + ": Cannot convert ACIP {" + translit + "} because it contains a number but also a non-number.";
case 124:
return "" + code + ": Cannot convert ACIP {" + translit + "} because {V}, wa-zur, appears without being subscribed to a consonant.";
case 125:
return "" + code + ": Cannot convert ACIP {" + translit + "} because we would be required to assume that {A} is a consonant, when it is not clear if it is a consonant or a vowel.";
case 126:
return "" + code + ": Cannot convert ACIP {" + translit + "} because it ends with a '+'.";
case 127:
return "" + code + ": Cannot convert ACIP {" + translit + "} because it ends with a '-'.";
case 128: // fall through
case 129:
throw new Error("No; error messages 128 and 129 are handled elsewhere.");
case 130:
return "" + code + ": The tsheg bar (\"syllable\") {" + translit + "} is essentially nothing.";
case 131:
return "" + code + ": The ACIP caret, {^}, must precede a tsheg bar.";
case 132:
return "" + code + ": The ACIP {" + translit + "} must be glued to the end of a tsheg bar, but this one was not.";
case 133:
return "" + code + ": Cannot convert the ACIP {" + translit + "} to Tibetan because it is unclear what the result should be.";
case 134:
return "" + code + ": The tsheg bar (\"syllable\") {" + translit + "} has no legal parses.";
case 135:
ThdlDebug.verify(translit.length() == 1);
return "" + code + ": The Unicode escape '" + translit + "' with ordinal (in decimal) " + (int)translit.charAt(0) + " is specified by the Extended Wylie Transliteration Scheme (EWTS), but is in the private-use area (PUA) of Unicode and will thus not be written out into the output lest you think other tools will be able to understand this non-standard construction.";
case 136:
ThdlDebug.verify(translit.length() == 1);
return "" + code + ": The Unicode escape with ordinal (in decimal) " + (int)translit.charAt(0) + " does not match up with any TibetanMachineWeb glyph.";
// See 137 below.
case 138:
ThdlDebug.verify(translit.length() == 1);
return "" + code + ": The Unicode escape '" + translit + "' with ordinal (in decimal) " + (int)translit.charAt(0) + " is in the Tibetan range of Unicode (i.e., [U+0F00, U+0FFF]), but is a reserved code in that area.";
// WARNINGS (by default):
case 501:
throw new Error("Nah -- we handle this one in the code because the message is complicated for 501");
case 502:
return "" + code + ": The last stack does not have a vowel in {" + translit + "}; this may indicate a typo, because Sanskrit, which this probably is (because it's not legal Tibetan), should have a vowel after each stack.";
case 503:
return "" + code + ": Though {" + translit + "} is unambiguous, it would be more computer-friendly if '+' signs were used to stack things because there are two (or more) ways to interpret this ACIP if you're not careful.";
case 504:
return "" + code + ": The ACIP {" + translit + "} is treated by this converter as U+0F35, but sometimes might represent U+0F14 in practice. To avoid seeing this warning again, change the input to use {\\u0F35} instead of {" + translit + "}.";
case 505:
return "" + code + ": There is a useless disambiguator in {" + translit + "}.";
case 506:
return "" + code + ": There is a stack of three or more consonants in {" + translit + "} that uses at least one '+' but does not use a '+' between each consonant.";
case 507:
return "" + code + ": There is a chance that the ACIP {" + translit + "} was intended to represent more consonants than we parsed it as representing -- NNYA, e.g., means N+NYA, but you can imagine seeing N+N+YA and typing NNYA for it too.";
case 508: // see 509 also
return "" + code + ": The ACIP {" + translit + "} has been interpreted as two stacks, not one, but you may wish to confirm that the original text had two stacks as it would be an easy mistake to make to see one stack (because there is such a stack used in Sanskrit transliteration for this particular sequence) and forget to input it with '+' characters.";
case 509: // see 508 also
return "" + code + ": The ACIP {" + translit + "} has an initial sequence that has been interpreted as two stacks, a prefix and a root stack, not one nonnative stack, but you may wish to confirm that the original text had two stacks as it would be an easy mistake to make to see one stack (because there is such a stack used in Sanskrit transliteration for this particular sequence) and forget to input it with '+' characters.";
case 510:
return "" + code + ": A non-breaking tsheg, '" + translit + "', appeared, but not like \"...,\" or \".,\" or \".dA\" or \".DA\".";
// ERROR 137 and WARNING 511 are the same:
case 137: /* fall through */
case 511:
return "" + code + ": The ACIP {" + translit + "} cannot be represented with the TibetanMachine or TibetanMachineWeb fonts because no such glyph exists in these fonts.";
// NEVER HAPPENS:
default:
ThdlDebug.verify("switch statement is missing a case",
false);
return "unknown error or warning with number " + code;
}
}
/** Returns true iff warningLevel is one of "All".intern(),
"Most".intern(), or "Some".intern(). */
static boolean warningLevelIsKnown(String warningLevel) {
return (warningLevel == "Some"
|| warningLevel == "Most"
|| warningLevel == "All");
}
private static final int MIN_ERROR = 101; // inclusive
private static final int MAX_ERROR = 138; // inclusive
private static final int MIN_WARNING = 501; // inclusive
private static final int MAX_WARNING = 511; // inclusive
private static void setupSeverityMapFromBuiltinDefaults() {
// errors:
for (int i = MIN_ERROR; i <= MAX_ERROR; i++) {
severityMap.put(new Integer(i), "ERROR");
}
// warnings:
String[] defaultSeverities = new String[] {
// 501:
"Most",
// 502:
"All",
// 503:
"All",
// 504:
"Some",
// 505:
"Some",
// 506:
"Some",
// 507:
"Most",
// 508:
"Some",
// 509:
"Most",
// 510:
"Some",
// 511:
"Some",
};
for (int num = MIN_WARNING; num <= MAX_WARNING; num++) {
String opt = ThdlOptions.getStringOption("thdl.acip.to.tibetan.warning.severity." + num);
if (null != opt) {
opt = opt.intern();
if ("None" == opt || "DISABLED" == opt)
opt = "DISABLED";
else if (!(opt == "Most"
|| opt == "All"
|| opt == "Some"))
opt = null;
} else {
if (!ThdlOptions.getBooleanOption("thdl.acip.to.tibetan.warning.and.error.severities.are.built.in.defaults"))
ThdlDebug.verify("options.txt is gone?", false);
}
ThdlDebug.verify((null == opt) || opt.intern() == opt);
severityMap.put(new Integer(num), (null != opt) ? opt : defaultSeverities[num - 501]);
}
// DLC FIXME: make 506 an error? or a new, super-high priority class of warning?
// DLC FIXME: you can't turn 504 or 510 down (e.g., to an "All"-level warning)
}
/** Prints out the long forms of the error messages, which will
help a user to decipher the short forms. */
public static void printErrorAndWarningDescriptions(java.io.PrintStream out) {
final String translit = "X";
out.println("ACIP->Tibetan ERRORS are as follows, and appear in their short forms, embedded");
out.println("in the output, like [#ERROR 130: {X}]:");
out.println("");
for (int num = MIN_ERROR; num <= MAX_ERROR; num++) {
if (128 == num) {
out.println("128: Cannot convert ACIP {" + translit + "} because " + "A:" + " is a \"vowel\" without an associated consonant.");
} else if (129 == num) {
out.println("129: Cannot convert ACIP {" + translit + "} because " + "+" + " is not an ACIP consonant.");
} else {
out.println(getMessage(num, false, translit));
}
out.println("");
}
out.println("ACIP->Tibetan WARNINGS are as follows, and appear in their short forms, embedded");
out.println("in the output, like [#WARNING 510: {X}]:");
out.println("");
for (int num = MIN_WARNING; num <= MAX_WARNING; num++) {
if (501 == num) {
out.println("501: Using " + translit + ", but only because the tool's knowledge of prefix rules (see the documentation) says that " + "XX" + " is not a legal Tibetan tsheg bar (\"syllable\")");
} else {
out.println(getMessage(num, false, translit));
}
out.println("");
}
}
}

View file

@ -72,6 +72,8 @@ public class LotsOfTshegBarsTest extends TestCase {
// We don't want to use options.txt:
ThdlOptions.forTestingOnlyInitializeWithoutDefaultOptionsFile();
ThdlOptions.setUserPreference("thdl.acip.to.tibetan.warning.and.error.severities.are.built.in.defaults", "true");
// We don't want to load the TM or TMW font files ourselves:
ThdlOptions.setUserPreference("thdl.rely.on.system.tmw.fonts", true);
ThdlOptions.setUserPreference("thdl.rely.on.system.tm.fonts", true);

View file

@ -43,6 +43,8 @@ public class PackageTest extends TestCase {
// We don't want to use options.txt:
ThdlOptions.forTestingOnlyInitializeWithoutDefaultOptionsFile();
ThdlOptions.setUserPreference("thdl.acip.to.tibetan.warning.and.error.severities.are.built.in.defaults", "true");
// We don't want to load the TM or TMW font files ourselves:
ThdlOptions.setUserPreference("thdl.rely.on.system.tmw.fonts", true);
ThdlOptions.setUserPreference("thdl.rely.on.system.tm.fonts", true);
@ -57,7 +59,7 @@ public class PackageTest extends TestCase {
which may be an error message. */
static String ACIP2TMW2ACIP(String ACIP) {
StringBuffer errors = new StringBuffer();
ArrayList al = ACIPTshegBarScanner.scan(ACIP, errors, -1);
ArrayList al = ACIPTshegBarScanner.scan(ACIP, errors, -1, false);
if (null == al || errors.length() > 0)
return null;
org.thdl.tib.text.TibetanDocument tdoc
@ -72,6 +74,7 @@ public class PackageTest extends TestCase {
false,
"None",
false,
false,
loc))
return null;
} catch (java.io.IOException e) {
@ -151,10 +154,11 @@ public class PackageTest extends TestCase {
assertTrue(null == expectedLegalParses || expectedLegalParses.length == 0);
return;
} else {
if (pt.getWarning("Most", l, acip) != null) {
System.out.println(pt.getWarning("Most", l, acip));
} else if (pt.getWarning("All", l, acip) != null)
if (sdebug || debug) System.out.println("Paranoiac warning is this: " + pt.getWarning("All", l, acip));
String s;
if ((s = pt.getWarning("Most", l, acip, false)) != null) {
System.out.println(s);
} else if ((s = pt.getWarning("All", l, acip, false)) != null)
if (sdebug || debug) System.out.println("Paranoiac warning is this: " + s);
}
int np = pt.numberOfParses();
boolean goodness = expectedParses == null || expectedParses.length == np;
@ -239,8 +243,8 @@ public class PackageTest extends TestCase {
System.out.println("allLegalParses are " + allLegalParses + " and legalParses are " + legalParses);
}
}
if (l.getACIPError() != null)
System.out.println("ACIPError: " + l.getACIPError());
if (l.getACIPError(acip, false) != null)
System.out.println("ACIPError: " + l.getACIPError(acip, false));
if (!l.recoverACIP().equals(acip)
&& (acip.indexOf("A+") < 1) // which becomes +, e.g. {NA+YA}
&& (acip.indexOf('0') < 0)
@ -297,7 +301,7 @@ public class PackageTest extends TestCase {
}
/** Tests {@link TPairListFactory#breakACIPIntoChunks(String,
* boolean)}, {@link TPairList#getACIPError()}, and {@link
* boolean)}, {@link TPairList#getACIPError(String, boolean)}, and {@link
* TPairList#recoverACIP()}. */
public void testBreakACIPIntoChunks() {
tstHelper("GASN"); // ambiguous with regard to prefix rules
@ -7204,7 +7208,7 @@ tstHelper("ZUR");
private static void shelp(String s, String expectedErrors, String expectedScan) {
StringBuffer errors = new StringBuffer();
ArrayList al = ACIPTshegBarScanner.scan(s, errors, -1);
ArrayList al = ACIPTshegBarScanner.scan(s, errors, -1, false);
if (null != expectedScan) {
if (!al.toString().equals(expectedScan)) {
System.out.println("Scanning " + s + " into tsheg bars was expected to cause the following scan:");
@ -7225,7 +7229,8 @@ tstHelper("ZUR");
}
}
/** Tests {@link ACIPTshegBarScanner#scan(String, StringBuffer, int)}. */
/** Tests {@link ACIPTshegBarScanner#scan(String, StringBuffer,
int, boolean)}. */
public void testScanner() {
shelp("Pm KA", "", "[TIBETAN_NON_PUNCTUATION:{Pm}, TIBETAN_PUNCTUATION:{ }, TIBETAN_NON_PUNCTUATION:{KA}]");
@ -7236,7 +7241,7 @@ tstHelper("ZUR");
"[TIBETAN_NON_PUNCTUATION:{LA}, TIBETAN_PUNCTUATION:{.}, TIBETAN_PUNCTUATION:{.}, TIBETAN_PUNCTUATION:{.}, TIBETAN_NON_PUNCTUATION:{SGRUB}]");
shelp("PAS... LA",
"",
"[TIBETAN_NON_PUNCTUATION:{PAS}, TIBETAN_PUNCTUATION:{.}, TIBETAN_PUNCTUATION:{.}, TIBETAN_PUNCTUATION:{.}, WARNING:{A non-breaking tsheg, '.', appeared, but not like \"...,\" or \".,\" or \".dA\" or \".DA\".}, TIBETAN_PUNCTUATION:{ }, TIBETAN_NON_PUNCTUATION:{LA}]");
"[TIBETAN_NON_PUNCTUATION:{PAS}, TIBETAN_PUNCTUATION:{.}, TIBETAN_PUNCTUATION:{.}, TIBETAN_PUNCTUATION:{.}, WARNING:{510: A non-breaking tsheg, '.', appeared, but not like \"...,\" or \".,\" or \".dA\" or \".DA\".}, TIBETAN_PUNCTUATION:{ }, TIBETAN_NON_PUNCTUATION:{LA}]");
shelp("^GONG SA,",
"",
"[TIBETAN_PUNCTUATION:{^}, TIBETAN_NON_PUNCTUATION:{GONG}, TIBETAN_PUNCTUATION:{ }, TIBETAN_NON_PUNCTUATION:{SA}, TIBETAN_PUNCTUATION:{,}]");
@ -7257,42 +7262,43 @@ tstHelper("ZUR");
// {^GONG SA}, but {^ GONG SA} isn't so obvious. We give an
// error.
shelp("^ GONG SA,",
"",
"[ERROR:{The ACIP {^} must precede a tsheg bar.}, TIBETAN_PUNCTUATION:{ }, TIBETAN_PUNCTUATION:{ }, TIBETAN_NON_PUNCTUATION:{GONG}, TIBETAN_PUNCTUATION:{ }, TIBETAN_NON_PUNCTUATION:{SA}, TIBETAN_PUNCTUATION:{,}]");
"Offset 0: ERROR 131: The ACIP caret, {^}, must precede a tsheg bar.\n",
"[ERROR:{131: The ACIP caret, {^}, must precede a tsheg bar.}, TIBETAN_PUNCTUATION:{ }, TIBETAN_PUNCTUATION:{ }, TIBETAN_NON_PUNCTUATION:{GONG}, TIBETAN_PUNCTUATION:{ }, TIBETAN_NON_PUNCTUATION:{SA}, TIBETAN_PUNCTUATION:{,}]");
shelp("^\n\nGONG SA,",
"",
"[ERROR:{The ACIP {^} must precede a tsheg bar.}, TIBETAN_PUNCTUATION:{\n}, TIBETAN_PUNCTUATION:{\n}, TIBETAN_NON_PUNCTUATION:{GONG}, TIBETAN_PUNCTUATION:{ }, TIBETAN_NON_PUNCTUATION:{SA}, TIBETAN_PUNCTUATION:{,}]");
"Offset 0: ERROR 131: The ACIP caret, {^}, must precede a tsheg bar.\n",
"[ERROR:{131: The ACIP caret, {^}, must precede a tsheg bar.}, TIBETAN_PUNCTUATION:{\n}, TIBETAN_PUNCTUATION:{\n}, TIBETAN_NON_PUNCTUATION:{GONG}, TIBETAN_PUNCTUATION:{ }, TIBETAN_NON_PUNCTUATION:{SA}, TIBETAN_PUNCTUATION:{,}]");
shelp("", "", "[]");
shelp("[DD]", "");
shelp("[",
"Offset 0: Found an illegal open bracket (in context, this is [). Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open bracket?\nOffset END: Truly unmatched open bracket found.\n");
"Offset 0: ERROR 106: Found an illegal open bracket (in context, this is '['). Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open bracket?\nOffset END: ERROR 117: Unexpected end of input; truly unmatched open bracket found.\n");
shelp("{",
"Offset 0: Found an illegal open bracket (in context, this is {). Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open bracket?\nOffset END: Truly unmatched open bracket found.\n");
"Offset 0: ERROR 106: Found an illegal open bracket (in context, this is '{'). Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open bracket?\nOffset END: ERROR 117: Unexpected end of input; truly unmatched open bracket found.\n");
shelp("DD", "");
shelp("DD]",
"Offset 2: Found a truly unmatched close bracket, ']'.\nOffset 2: Found a closing bracket without a matching open bracket. Perhaps a [#COMMENT] incorrectly written as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], caused this.\n");
"Offset 2: ERROR 103: Found a truly unmatched close bracket, ']'.\nOffset 2: ERROR 104: Found a closing bracket, ']', without a matching open bracket. Perhaps a [#COMMENT] incorrectly written as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], caused this.\n");
shelp("///NYA", "Offset 1: Found //, which could be legal (the Unicode would be \\u0F3C\\u0F3D), but is likely in an illegal construct like //NYA\\\\.\nOffset END: Slashes are supposed to occur in pairs, but the input had an unmatched '/' character.\n");
shelp("///NYA", "Offset 1: ERROR 110: Found //, which could be legal (the Unicode would be \\u0F3C\\u0F3D), but is likely in an illegal construct like //NYA\\\\.\nOffset END: ERROR 120: Slashes are supposed to occur in pairs, but the input had an unmatched '/' character.\n");
shelp("/NYA/", "");
shelp("[?][BP][LS][DD1][DD2][DDD][DR][# (<{A COMMENT)}>]", "");
shelp("[LS][# A [[[[[COMMENT][LS]",
"Offset 9: Found an open bracket within a [#COMMENT]-style comment. Brackets may not appear in comments.\n"
+ "Offset 10: Found an open bracket within a [#COMMENT]-style comment. Brackets may not appear in comments.\n"
+ "Offset 11: Found an open bracket within a [#COMMENT]-style comment. Brackets may not appear in comments.\n"
+ "Offset 12: Found an open bracket within a [#COMMENT]-style comment. Brackets may not appear in comments.\n"
+ "Offset 13: Found an open bracket within a [#COMMENT]-style comment. Brackets may not appear in comments.\n");
"Offset 9: ERROR 102: Found an open bracket, '[', within a [#COMMENT]-style comment. Brackets may not appear in comments.\n"
+ "Offset 10: ERROR 102: Found an open bracket, '[', within a [#COMMENT]-style comment. Brackets may not appear in comments.\n"
+ "Offset 11: ERROR 102: Found an open bracket, '[', within a [#COMMENT]-style comment. Brackets may not appear in comments.\n"
+ "Offset 12: ERROR 102: Found an open bracket, '[', within a [#COMMENT]-style comment. Brackets may not appear in comments.\n"
+ "Offset 13: ERROR 102: Found an open bracket, '[', within a [#COMMENT]-style comment. Brackets may not appear in comments.\n");
shelp("[ILLEGAL COMMENT]",
"Offset 0: Found an illegal open bracket (in context, this is [ILLEGAL C...). Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open bracket?\nOffset 16: Found a closing bracket without a matching open bracket. Perhaps a [#COMMENT] incorrectly written as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], caused this.\n");
"Offset 0: ERROR 106: Found an illegal open bracket (in context, this is '[ILLEGAL C...'). Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open bracket?\nOffset 16: ERROR 104: Found a closing bracket, ']', without a matching open bracket. Perhaps a [#COMMENT] incorrectly written as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], caused this.\n");
shelp("(BSKYABS GRO)", "");
shelp("BSKYABS GRO)", "Offset 11: Unexpected closing parenthesis, ), found.\n");
shelp("BSKYABS GRO(", "Offset END: Unmatched open parenthesis, (, found.\n");
shelp("((NESTAGE))", "Offset 1: Found an illegal open parenthesis, (. Nesting of parentheses is not allowed.\nOffset 10: Unexpected closing parenthesis, ), found.\n");
shelp("BSKYABS GRO)", "Offset 11: ERROR 112: Unexpected closing parenthesis, ')', found.\n");
shelp("BSKYABS GRO(", "Offset END: ERROR 121: Parentheses are supposed to occur in pairs, but the input had an unmatched parenthesis, '('.\n");
shelp("((NESTAGE))", "Offset 1: ERROR 111: Found an illegal open parenthesis, '('. Nesting of parentheses is not allowed.\nOffset 10: ERROR 112: Unexpected closing parenthesis, ')', found.\n");
shelp("(BA)(PA)NYA(CA)", "");
shelp("NYAx", "");
shelp("NYA x", "");
shelp("[# A PARTIAL COM", "Offset END: Unmatched open bracket found. A comment does not terminate.\n");
shelp("[* BSKYABS ", "Offset END: Unmatched open bracket found. A correction does not terminate.\n");
shelp("NYA x",
"Offset 4: ERROR 132: The ACIP {x} must be glued to the end of a tsheg bar, but this one was not.\n");
shelp("[# A PARTIAL COM", "Offset END: ERROR 118: Unmatched open bracket found. A comment does not terminate.\n");
shelp("[* BSKYABS ", "Offset END: ERROR 119: Unmatched open bracket found. A correction does not terminate.\n");
shelp("SKYABS [*BSKYABS?] GRO [?]", "");
shelp(" SKYABS GRO ", "");
shelp("SKYABS [*BSKYABS] GRO [?]", "", "[TIBETAN_NON_PUNCTUATION:{SKYABS}, TIBETAN_PUNCTUATION:{ }, CORRECTION_START:{[*}, TIBETAN_NON_PUNCTUATION:{BSKYABS}, PROBABLE_CORRECTION:{]}, TIBETAN_PUNCTUATION:{ }, TIBETAN_NON_PUNCTUATION:{GRO}, TIBETAN_PUNCTUATION:{ }, QUESTION:{[?]}]");
@ -7301,8 +7307,8 @@ tstHelper("ZUR");
shelp("[* RVA ]", "", "[CORRECTION_START:{[*}, TIBETAN_PUNCTUATION:{ }, TIBETAN_NON_PUNCTUATION:{RVA}, TIBETAN_PUNCTUATION:{ }, PROBABLE_CORRECTION:{]}]");
shelp("[*RVA ?]", "", "[CORRECTION_START:{[*}, TIBETAN_NON_PUNCTUATION:{RVA}, TIBETAN_PUNCTUATION:{ }, POSSIBLE_CORRECTION:{?]}]");
shelp("[*RVA? ]",
"Offset 5: The ACIP {?}, found alone, may intend U+0F08, but it may intend a question mark, i.e. '?', in the output. It may even mean that the original text could not be deciphered with certainty, like the ACIP {[?]} does.\n",
"[CORRECTION_START:{[*}, TIBETAN_NON_PUNCTUATION:{RVA}, ERROR:{The ACIP {?}, found alone, may intend U+0F08, but it may intend a question mark, i.e. '?', in the output. It may even mean that the original text could not be deciphered with certainty, like the ACIP {[?]} does.}, TIBETAN_PUNCTUATION:{ }, PROBABLE_CORRECTION:{]}]");
"Offset 5: ERROR 113: The ACIP {?}, found alone, may intend U+0F08, but it may intend a question mark, i.e. '?', in the output. It may even mean that the original text could not be deciphered with certainty, like the ACIP {[?]} does.\n",
"[CORRECTION_START:{[*}, TIBETAN_NON_PUNCTUATION:{RVA}, ERROR:{113: The ACIP {?}, found alone, may intend U+0F08, but it may intend a question mark, i.e. '?', in the output. It may even mean that the original text could not be deciphered with certainty, like the ACIP {[?]} does.}, TIBETAN_PUNCTUATION:{ }, PROBABLE_CORRECTION:{]}]");
shelp("[*LINE BREAK]", "", "[CORRECTION_START:{[*}, LATIN:{LINE BREAK}, PROBABLE_CORRECTION:{]}]");
shelp("[*LINE BREAK?]", "", "[CORRECTION_START:{[*}, LATIN:{LINE BREAK}, POSSIBLE_CORRECTION:{?]}]");
shelp("[*\n\t\r LINEYO ?]", "", "[CORRECTION_START:{[*}, LATIN:{\n\t\r LINEYO }, POSSIBLE_CORRECTION:{?]}]");
@ -7310,23 +7316,23 @@ tstHelper("ZUR");
shelp("[*DATA INCOMPLETE HERE?]", "", "[CORRECTION_START:{[*}, LATIN:{DATA INCOMPLETE HERE}, POSSIBLE_CORRECTION:{?]}]");
shelp("[*THIS\r\nWAS SUPPOSED TO BE THE SIXTH CATEGORY; THE CATEGORIES MENTIONED\r\nABOVE SEEM TO BE OUT OF ORDER THROUGH THIS SECTION]\r\n", "");
shelp("x o % : m", "");
shelp("x o % : m", "Offset 0: ERROR 132: The ACIP {x} must be glued to the end of a tsheg bar, but this one was not.\nOffset 2: ERROR 132: The ACIP {o} must be glued to the end of a tsheg bar, but this one was not.\nOffset 4: ERROR 132: The ACIP {%} must be glued to the end of a tsheg bar, but this one was not.\n");
shelp("AAx AAo AA% AA: AAm", "");
shelp("/NYA ", "Offset END: Slashes are supposed to occur in pairs, but the input had an unmatched '/' character.\n");
shelp("(NYA ", "Offset END: Unmatched open parenthesis, (, found.\n");
shelp("[*NYA ", "Offset END: Unmatched open bracket found. A correction does not terminate.\n");
shelp("/NYA ", "Offset END: ERROR 120: Slashes are supposed to occur in pairs, but the input had an unmatched '/' character.\n");
shelp("(NYA ", "Offset END: ERROR 121: Parentheses are supposed to occur in pairs, but the input had an unmatched parenthesis, '('.\n");
shelp("[*NYA ", "Offset END: ERROR 119: Unmatched open bracket found. A correction does not terminate.\n");
shelp("[?]", "", "[QUESTION:{[?]}]");
shelp("?",
"Offset 0: The ACIP {?}, found alone, may intend U+0F08, but it may intend a question mark, i.e. '?', in the output. It may even mean that the original text could not be deciphered with certainty, like the ACIP {[?]} does.\n",
"[ERROR:{The ACIP {?}, found alone, may intend U+0F08, but it may intend a question mark, i.e. '?', in the output. It may even mean that the original text could not be deciphered with certainty, like the ACIP {[?]} does.}]");
shelp("KHAN~ BAR ", "Offset 4: Found an illegal character, ~, with ordinal 126.\n");
"Offset 0: ERROR 113: The ACIP {?}, found alone, may intend U+0F08, but it may intend a question mark, i.e. '?', in the output. It may even mean that the original text could not be deciphered with certainty, like the ACIP {[?]} does.\n",
"[ERROR:{113: The ACIP {?}, found alone, may intend U+0F08, but it may intend a question mark, i.e. '?', in the output. It may even mean that the original text could not be deciphered with certainty, like the ACIP {[?]} does.}]");
shelp("KHAN~ BAR ", "Offset 4: ERROR 116: Found an illegal character, '~', with ordinal (in decimal) 126.\n");
shelp("[* Correction with []]",
"Offset 5: Found an illegal character, r, with ordinal 114.\nOffset 6: Found an illegal character, r, with ordinal 114.\nOffset 7: Found an illegal character, e, with ordinal 101.\nOffset 8: Found an illegal character, c, with ordinal 99.\nOffset 14: Found an illegal character, w, with ordinal 119.\nOffset 19: Found an illegal open bracket (in context, this is []]). Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open bracket?\nOffset 21: Found a closing bracket without a matching open bracket. Perhaps a [#COMMENT] incorrectly written as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], caused this.\n");
"Offset 5: ERROR 116: Found an illegal character, 'r', with ordinal (in decimal) 114.\nOffset 6: ERROR 116: Found an illegal character, 'r', with ordinal (in decimal) 114.\nOffset 7: ERROR 116: Found an illegal character, 'e', with ordinal (in decimal) 101.\nOffset 8: ERROR 116: Found an illegal character, 'c', with ordinal (in decimal) 99.\nOffset 14: ERROR 116: Found an illegal character, 'w', with ordinal (in decimal) 119.\nOffset 19: ERROR 106: Found an illegal open bracket (in context, this is '[]]'). Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open bracket?\nOffset 21: ERROR 104: Found a closing bracket, ']', without a matching open bracket. Perhaps a [#COMMENT] incorrectly written as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], caused this.\n");
shelp(",NGES ? PA",
"Offset 6: The ACIP {?}, found alone, may intend U+0F08, but it may intend a question mark, i.e. '?', in the output. It may even mean that the original text could not be deciphered with certainty, like the ACIP {[?]} does.\n",
"[TIBETAN_PUNCTUATION:{,}, TIBETAN_NON_PUNCTUATION:{NGES}, TIBETAN_PUNCTUATION:{ }, ERROR:{The ACIP {?}, found alone, may intend U+0F08, but it may intend a question mark, i.e. '?', in the output. It may even mean that the original text could not be deciphered with certainty, like the ACIP {[?]} does.}, TIBETAN_PUNCTUATION:{ }, TIBETAN_NON_PUNCTUATION:{PA}]");
"Offset 6: ERROR 113: The ACIP {?}, found alone, may intend U+0F08, but it may intend a question mark, i.e. '?', in the output. It may even mean that the original text could not be deciphered with certainty, like the ACIP {[?]} does.\n",
"[TIBETAN_PUNCTUATION:{,}, TIBETAN_NON_PUNCTUATION:{NGES}, TIBETAN_PUNCTUATION:{ }, ERROR:{113: The ACIP {?}, found alone, may intend U+0F08, but it may intend a question mark, i.e. '?', in the output. It may even mean that the original text could not be deciphered with certainty, like the ACIP {[?]} does.}, TIBETAN_PUNCTUATION:{ }, TIBETAN_NON_PUNCTUATION:{PA}]");
@ -7336,16 +7342,16 @@ tstHelper("ZUR");
uhelp(" 1\\ ", "\u0f0b\u0f21\u0f84\u0f0b");
}
shelp("K\\,",
"Offset 1: Found a backslash, \\, which the ACIP Tibetan Input Code standard says represents a Sanskrit virama. In practice, though, this is so often misused (to represent U+0F3D) that {\\} always generates this error. If you want a Sanskrit virama, change the input document to use {\\u0F84} instead of {\\}. If you want U+0F3D, use {/NYA/} or {/NYA\\u0F3D}.\n",
"[TIBETAN_NON_PUNCTUATION:{K}, ERROR:{Found a backslash, \\, which the ACIP Tibetan Input Code standard says represents a Sanskrit virama. In practice, though, this is so often misused (to represent U+0F3D) that {\\} always generates this error. If you want a Sanskrit virama, change the input document to use {\\u0F84} instead of {\\}. If you want U+0F3D, use {/NYA/} or {/NYA\\u0F3D}.}, TIBETAN_PUNCTUATION:{,}]");
"Offset 1: ERROR 115: Found a backslash, \\, which the ACIP Tibetan Input Code standard says represents a Sanskrit virama. In practice, though, this is so often misused (to represent U+0F3D) that {\\} always generates this error. If you want a Sanskrit virama, change the input document to use {\\u0F84} instead of {\\}. If you want U+0F3D, use {/NYA/} or {/NYA\\u0F3D}.\n",
"[TIBETAN_NON_PUNCTUATION:{K}, ERROR:{115: Found a backslash, \\, which the ACIP Tibetan Input Code standard says represents a Sanskrit virama. In practice, though, this is so often misused (to represent U+0F3D) that {\\} always generates this error. If you want a Sanskrit virama, change the input document to use {\\u0F84} instead of {\\}. If you want U+0F3D, use {/NYA/} or {/NYA\\u0F3D}.}, TIBETAN_PUNCTUATION:{,}]");
shelp("MTHAR%", "", "[TIBETAN_NON_PUNCTUATION:{MTHAR}, TSHEG_BAR_ADORNMENT:{%}, WARNING:{The ACIP {%} is treated by this converter as U+0F35, but sometimes might represent U+0F14 in practice. To avoid seeing this warning again, change the input to use {\\u0F35} instead of {%}.}]");
shelp("MTHAR%", "", "[TIBETAN_NON_PUNCTUATION:{MTHAR}, TSHEG_BAR_ADORNMENT:{%}, WARNING:{504: The ACIP {%} is treated by this converter as U+0F35, but sometimes might represent U+0F14 in practice. To avoid seeing this warning again, change the input to use {\\u0F35} instead of {%}.}]");
shelp("MTHARo", "", "[TIBETAN_NON_PUNCTUATION:{MTHAR}, TSHEG_BAR_ADORNMENT:{o}]");
shelp("MTHARx", "", "[TIBETAN_NON_PUNCTUATION:{MTHAR}, TSHEG_BAR_ADORNMENT:{x}]");
shelp("MTHAR\n%", "", "[TIBETAN_NON_PUNCTUATION:{MTHAR}, TIBETAN_PUNCTUATION:{ }, ERROR:{The ACIP % must be glued to the end of a tsheg bar, but this one was not}, WARNING:{The ACIP {%} is treated by this converter as U+0F35, but sometimes might represent U+0F14 in practice. To avoid seeing this warning again, change the input to use {\\u0F35} instead of {%}.}]");
shelp("MTHAR x", "", "[TIBETAN_NON_PUNCTUATION:{MTHAR}, TIBETAN_PUNCTUATION:{ }, ERROR:{The ACIP x must be glued to the end of a tsheg bar, but this one was not}]");
shelp("MTHAR\n%", "Offset 6 or maybe 5: ERROR 132: The ACIP {%} must be glued to the end of a tsheg bar, but this one was not.\n", "[TIBETAN_NON_PUNCTUATION:{MTHAR}, TIBETAN_PUNCTUATION:{ }, ERROR:{132: The ACIP {%} must be glued to the end of a tsheg bar, but this one was not.}, WARNING:{504: The ACIP {%} is treated by this converter as U+0F35, but sometimes might represent U+0F14 in practice. To avoid seeing this warning again, change the input to use {\\u0F35} instead of {%}.}]");
shelp("MTHAR x", "Offset 6: ERROR 132: The ACIP {x} must be glued to the end of a tsheg bar, but this one was not.\n", "[TIBETAN_NON_PUNCTUATION:{MTHAR}, TIBETAN_PUNCTUATION:{ }, ERROR:{132: The ACIP {x} must be glued to the end of a tsheg bar, but this one was not.}]");
shelp("PHYIR;", "", "[TIBETAN_NON_PUNCTUATION:{PHYIR}, TIBETAN_PUNCTUATION:{;}]");
shelp("......,DAM ",
@ -7382,15 +7388,15 @@ tstHelper("ZUR");
shelp("@01A.3 ", "", "[FOLIO_MARKER:{@01A.3}, TIBETAN_PUNCTUATION:{ }]");
shelp("@001 ", "", "[FOLIO_MARKER:{@001}, TIBETAN_PUNCTUATION:{ }]");
shelp("@19-20A",
"Offset 0: Found an illegal at sign, @ (in context, this is @19-20A). @012B is an example of a legal folio marker.\n",
"[ERROR:{Found an illegal at sign, @ (in context, this is @19-20A). @012B is an example of a legal folio marker.}, TIBETAN_NON_PUNCTUATION:{19-20A}]"); // FIXME: yes it occurs in the kangyur.
"Offset 0: ERROR 109: Found an illegal at sign, @ (in context, this is @19-20A). @012B is an example of a legal folio marker.\n",
"[ERROR:{109: Found an illegal at sign, @ (in context, this is @19-20A). @012B is an example of a legal folio marker.}, TIBETAN_NON_PUNCTUATION:{19-20A}]"); // FIXME: yes it occurs in the kangyur.
shelp("@[7B]", "");
shelp("@012A.3KA",
"",
"[FOLIO_MARKER:{@012A.3}, TIBETAN_NON_PUNCTUATION:{KA}]");
shelp("@012A.34",
"Offset 0: Found an illegal at sign, @ (in context, this is @012A.34). This folio marker has a period, '.', at the end of it, which is illegal.\n",
"[ERROR:{Found an illegal at sign, @ (in context, this is @012A.34). This folio marker has a period, '.', at the end of it, which is illegal.}, TIBETAN_NON_PUNCTUATION:{34}]");
"Offset 0: ERROR 107: Found an illegal at sign, @ (in context, this is @012A.34). This folio marker has a period, '.', at the end of it, which is illegal.\n",
"[ERROR:{107: Found an illegal at sign, @ (in context, this is @012A.34). This folio marker has a period, '.', at the end of it, which is illegal.}, TIBETAN_NON_PUNCTUATION:{34}]");
shelp("@[07B]", "");
shelp("@[00007B]", "");
shelp("@7B", "");
@ -7407,11 +7413,11 @@ tstHelper("ZUR");
shelp("{ BP }", "", "[BP:{{ BP }}]"); // TD3790E2.ACT
// LOW-PRIORITY FIXME: support nested comments.
shelp("[# This is a [# nested comment] don't you know?]KA KHA GA NGA",
"Offset 13: Found an open bracket within a [#COMMENT]-style comment. Brackets may not appear in comments.\nOffset 38: Found an illegal character, y, with ordinal 121.\nOffset 40: Found an illegal character, u, with ordinal 117.\nOffset 42: Found an illegal character, k, with ordinal 107.\nOffset 45: Found an illegal character, w, with ordinal 119.\nOffset 46: The ACIP {?}, found alone, may intend U+0F08, but it may intend a question mark, i.e. '?', in the output. It may even mean that the original text could not be deciphered with certainty, like the ACIP {[?]} does.\nOffset 47: Found a truly unmatched close bracket, ']'.\nOffset 47: Found a closing bracket without a matching open bracket. Perhaps a [#COMMENT] incorrectly written as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], caused this.\n",
"[ERROR:{Found an open bracket within a [#COMMENT]-style comment. Brackets may not appear in comments.}, COMMENT:{[# This is a [# nested comment]}, TIBETAN_PUNCTUATION:{ }, TIBETAN_NON_PUNCTUATION:{d}, TSHEG_BAR_ADORNMENT:{o}, TIBETAN_NON_PUNCTUATION:{n't}, TIBETAN_PUNCTUATION:{ }, ERROR:{Found an illegal character, y, with ordinal 121.}, ERROR:{The ACIP o must be glued to the end of a tsheg bar, but this one was not}, ERROR:{Found an illegal character, u, with ordinal 117.}, TIBETAN_PUNCTUATION:{ }, ERROR:{Found an illegal character, k, with ordinal 107.}, TIBETAN_NON_PUNCTUATION:{n}, TSHEG_BAR_ADORNMENT:{o}, ERROR:{Found an illegal character, w, with ordinal 119.}, ERROR:{The ACIP {?}, found alone, may intend U+0F08, but it may intend a question mark, i.e. '?', in the output. It may even mean that the original text could not be deciphered with certainty, like the ACIP {[?]} does.}, ERROR:{Found a truly unmatched close bracket, ']'.}, ERROR:{Found a closing bracket without a matching open bracket. Perhaps a [#COMMENT] incorrectly written as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], caused this.}, TIBETAN_NON_PUNCTUATION:{KA}, TIBETAN_PUNCTUATION:{ }, TIBETAN_NON_PUNCTUATION:{KHA}, TIBETAN_PUNCTUATION:{ }, TIBETAN_NON_PUNCTUATION:{GA}, TIBETAN_PUNCTUATION:{ }, TIBETAN_NON_PUNCTUATION:{NGA}]");
"Offset 13: ERROR 102: Found an open bracket, '[', within a [#COMMENT]-style comment. Brackets may not appear in comments.\nOffset 38: ERROR 116: Found an illegal character, 'y', with ordinal (in decimal) 121.\nOffset 39: ERROR 132: The ACIP {o} must be glued to the end of a tsheg bar, but this one was not.\nOffset 40: ERROR 116: Found an illegal character, 'u', with ordinal (in decimal) 117.\nOffset 42: ERROR 116: Found an illegal character, 'k', with ordinal (in decimal) 107.\nOffset 45: ERROR 116: Found an illegal character, 'w', with ordinal (in decimal) 119.\nOffset 46: ERROR 113: The ACIP {?}, found alone, may intend U+0F08, but it may intend a question mark, i.e. '?', in the output. It may even mean that the original text could not be deciphered with certainty, like the ACIP {[?]} does.\nOffset 47: ERROR 103: Found a truly unmatched close bracket, ']'.\nOffset 47: ERROR 104: Found a closing bracket, ']', without a matching open bracket. Perhaps a [#COMMENT] incorrectly written as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], caused this.\n",
"[ERROR:{102: Found an open bracket, '[', within a [#COMMENT]-style comment. Brackets may not appear in comments.}, COMMENT:{[# This is a [# nested comment]}, TIBETAN_PUNCTUATION:{ }, TIBETAN_NON_PUNCTUATION:{d}, TSHEG_BAR_ADORNMENT:{o}, TIBETAN_NON_PUNCTUATION:{n't}, TIBETAN_PUNCTUATION:{ }, ERROR:{116: Found an illegal character, 'y', with ordinal (in decimal) 121.}, ERROR:{132: The ACIP {o} must be glued to the end of a tsheg bar, but this one was not.}, ERROR:{116: Found an illegal character, 'u', with ordinal (in decimal) 117.}, TIBETAN_PUNCTUATION:{ }, ERROR:{116: Found an illegal character, 'k', with ordinal (in decimal) 107.}, TIBETAN_NON_PUNCTUATION:{n}, TSHEG_BAR_ADORNMENT:{o}, ERROR:{116: Found an illegal character, 'w', with ordinal (in decimal) 119.}, ERROR:{113: The ACIP {?}, found alone, may intend U+0F08, but it may intend a question mark, i.e. '?', in the output. It may even mean that the original text could not be deciphered with certainty, like the ACIP {[?]} does.}, ERROR:{103: Found a truly unmatched close bracket, ']'.}, ERROR:{104: Found a closing bracket, ']', without a matching open bracket. Perhaps a [#COMMENT] incorrectly written as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], caused this.}, TIBETAN_NON_PUNCTUATION:{KA}, TIBETAN_PUNCTUATION:{ }, TIBETAN_NON_PUNCTUATION:{KHA}, TIBETAN_PUNCTUATION:{ }, TIBETAN_NON_PUNCTUATION:{GA}, TIBETAN_PUNCTUATION:{ }, TIBETAN_NON_PUNCTUATION:{NGA}]");
shelp("//NYA\\\\",
"Offset 1: Found //, which could be legal (the Unicode would be \\u0F3C\\u0F3D), but is likely in an illegal construct like //NYA\\\\.\nOffset 5: Found a backslash, \\, which the ACIP Tibetan Input Code standard says represents a Sanskrit virama. In practice, though, this is so often misused (to represent U+0F3D) that {\\} always generates this error. If you want a Sanskrit virama, change the input document to use {\\u0F84} instead of {\\}. If you want U+0F3D, use {/NYA/} or {/NYA\\u0F3D}.\nOffset 6: Found a backslash, \\, which the ACIP Tibetan Input Code standard says represents a Sanskrit virama. In practice, though, this is so often misused (to represent U+0F3D) that {\\} always generates this error. If you want a Sanskrit virama, change the input document to use {\\u0F84} instead of {\\}. If you want U+0F3D, use {/NYA/} or {/NYA\\u0F3D}.\n",
"[START_SLASH:{/}, ERROR:{Found //, which could be legal (the Unicode would be \\u0F3C\\u0F3D), but is likely in an illegal construct like //NYA\\\\.}, END_SLASH:{/}, TIBETAN_NON_PUNCTUATION:{NYA}, ERROR:{Found a backslash, \\, which the ACIP Tibetan Input Code standard says represents a Sanskrit virama. In practice, though, this is so often misused (to represent U+0F3D) that {\\} always generates this error. If you want a Sanskrit virama, change the input document to use {\\u0F84} instead of {\\}. If you want U+0F3D, use {/NYA/} or {/NYA\\u0F3D}.}, ERROR:{Found a backslash, \\, which the ACIP Tibetan Input Code standard says represents a Sanskrit virama. In practice, though, this is so often misused (to represent U+0F3D) that {\\} always generates this error. If you want a Sanskrit virama, change the input document to use {\\u0F84} instead of {\\}. If you want U+0F3D, use {/NYA/} or {/NYA\\u0F3D}.}]");
"Offset 1: ERROR 110: Found //, which could be legal (the Unicode would be \\u0F3C\\u0F3D), but is likely in an illegal construct like //NYA\\\\.\nOffset 5: ERROR 115: Found a backslash, \\, which the ACIP Tibetan Input Code standard says represents a Sanskrit virama. In practice, though, this is so often misused (to represent U+0F3D) that {\\} always generates this error. If you want a Sanskrit virama, change the input document to use {\\u0F84} instead of {\\}. If you want U+0F3D, use {/NYA/} or {/NYA\\u0F3D}.\nOffset 6: ERROR 115: Found a backslash, \\, which the ACIP Tibetan Input Code standard says represents a Sanskrit virama. In practice, though, this is so often misused (to represent U+0F3D) that {\\} always generates this error. If you want a Sanskrit virama, change the input document to use {\\u0F84} instead of {\\}. If you want U+0F3D, use {/NYA/} or {/NYA\\u0F3D}.\n",
"[START_SLASH:{/}, ERROR:{110: Found //, which could be legal (the Unicode would be \\u0F3C\\u0F3D), but is likely in an illegal construct like //NYA\\\\.}, END_SLASH:{/}, TIBETAN_NON_PUNCTUATION:{NYA}, ERROR:{115: Found a backslash, \\, which the ACIP Tibetan Input Code standard says represents a Sanskrit virama. In practice, though, this is so often misused (to represent U+0F3D) that {\\} always generates this error. If you want a Sanskrit virama, change the input document to use {\\u0F84} instead of {\\}. If you want U+0F3D, use {/NYA/} or {/NYA\\u0F3D}.}, ERROR:{115: Found a backslash, \\, which the ACIP Tibetan Input Code standard says represents a Sanskrit virama. In practice, though, this is so often misused (to represent U+0F3D) that {\\} always generates this error. If you want a Sanskrit virama, change the input document to use {\\u0F84} instead of {\\}. If you want U+0F3D, use {/NYA/} or {/NYA\\u0F3D}.}]");
}
private static void uhelp(String acip) {
@ -7420,9 +7426,21 @@ tstHelper("ZUR");
private static void uhelp(String acip, String expectedUnicode) {
uhelp(acip, expectedUnicode, "Most");
}
private static void uhelp(String acip, String expectedUnicode, String warningLevel) {
private static void uhelpShortMessages(String acip,
String expectedUnicode) {
uhelp(acip, expectedUnicode, "Most", true);
}
private static void uhelp(String acip,
String expectedUnicode,
String warningLevel) {
uhelp(acip, expectedUnicode, warningLevel, false);
}
private static void uhelp(String acip, String expectedUnicode,
String warningLevel, boolean shortMessages) {
StringBuffer errors = new StringBuffer();
String unicode = ACIPConverter.convertToUnicodeText(acip, errors, null, true, warningLevel);
String unicode = ACIPConverter.convertToUnicodeText(acip, errors, null,
true, warningLevel,
shortMessages);
if (null == unicode) {
if (null != expectedUnicode && "none" != expectedUnicode) {
System.out.println("No unicode exists for " + acip + " but you expected " + org.thdl.tib.text.tshegbar.UnicodeUtils.unicodeStringToPrettyString(expectedUnicode));
@ -7460,45 +7478,70 @@ MNA'
M+NA
*/
uhelp("B+NA", "\u0f56\u0fa3");
uhelp("BNA", "[#WARNING CONVERTING ACIP DOCUMENT: Warning: We're going with {B+NA}, but only because our knowledge of prefix rules says that {B}{NA} is not a legal Tibetan tsheg bar (\"syllable\")]\u0f56\u0fa3");
uhelp("BNA", "[#WARNING 501: Using {B+NA} for the ACIP {BNA}, but only because the tool's knowledge of prefix rules (see the documentation) says that {B}{NA} is not a legal Tibetan tsheg bar (\"syllable\")]\u0f56\u0fa3");
uhelp("^GONG SA", "\u0f38\u0f42\u0f7c\u0f44\u0f0b\u0f66");
uhelp("^ GONG SA", "\u0f38\u0f42\u0f7c\u0f44\u0f0b\u0f66");
uhelp("^\rGONG SA", "\u0f38\u0f42\u0f7c\u0f44\u0f0b\u0f66");
uhelp("^\r\nGONG SA", "\u0f38\u0f42\u0f7c\u0f44\u0f0b\u0f66");
uhelp("^\nGONG SA", "\u0f38\u0f42\u0f7c\u0f44\u0f0b\u0f66");
uhelp("^ GONG SA", "[#ERROR CONVERTING ACIP DOCUMENT: Lexical error: The ACIP {^} must precede a tsheg bar.] \u0f42\u0f7c\u0f44\u0f0b\u0f66");
uhelp("^ GONG SA", "[#ERROR 131: The ACIP caret, {^}, must precede a tsheg bar.] \u0f42\u0f7c\u0f44\u0f0b\u0f66");
uhelp("BGLA", "\u0f56\u0f42\u0fb3");
uhelp("BLCAG", "\u0f56\u0f63\u0f95\u0f42");
uhelp("DBA", "[#WARNING CONVERTING ACIP DOCUMENT: The ACIP DBA has been interpreted as two stacks, not one, but you may wish to confirm that the original text had two stacks as it would be an easy mistake to make to see one stack and forget to input it with '+' characters.]\u0f51\u0f56");
uhelp("DMAR", "[#WARNING CONVERTING ACIP DOCUMENT: The ACIP DMAR has been interpreted as two stacks, not one, but you may wish to confirm that the original text had two stacks as it would be an easy mistake to make to see one stack and forget to input it with '+' characters.]\u0f51\u0f58\u0f62");
uhelp("DBA", "[#WARNING 508: The ACIP {DBA} has been interpreted as two stacks, not one, but you may wish to confirm that the original text had two stacks as it would be an easy mistake to make to see one stack (because there is such a stack used in Sanskrit transliteration for this particular sequence) and forget to input it with '+' characters.]\u0f51\u0f56");
uhelp("DMAR", "[#WARNING 509: The ACIP {DMAR} has an initial sequence that has been interpreted as two stacks, a prefix and a root stack, not one nonnative stack, but you may wish to confirm that the original text had two stacks as it would be an easy mistake to make to see one stack (because there is such a stack used in Sanskrit transliteration for this particular sequence) and forget to input it with '+' characters.]\u0f51\u0f58\u0f62");
uhelp("D+BA", "\u0f51\u0fa6");
uhelp("MNA", "[#WARNING CONVERTING ACIP DOCUMENT: The ACIP MNA has been interpreted as two stacks, not one, but you may wish to confirm that the original text had two stacks as it would be an easy mistake to make to see one stack and forget to input it with '+' characters.]\u0f58\u0f53");
uhelp("DGRA", "[#WARNING CONVERTING ACIP DOCUMENT: The ACIP DGRA has been interpreted as two stacks, not one, but you may wish to confirm that the original text had two stacks as it would be an easy mistake to make to see one stack and forget to input it with '+' characters.]\u0f51\u0f42\u0fb2");
uhelp("D+GRA", "[#WARNING CONVERTING ACIP DOCUMENT: There is a stack of three or more consonants in D+GRA that uses at least one '+' but does not use a '+' between each consonant.]\u0f51\u0f92\u0fb2");
uhelp("DGYA", "[#WARNING CONVERTING ACIP DOCUMENT: The ACIP DGYA has been interpreted as two stacks, not one, but you may wish to confirm that the original text had two stacks as it would be an easy mistake to make to see one stack and forget to input it with '+' characters.]\u0f51\u0f42\u0fb1");
uhelp("MNA", "[#WARNING 508: The ACIP {MNA} has been interpreted as two stacks, not one, but you may wish to confirm that the original text had two stacks as it would be an easy mistake to make to see one stack (because there is such a stack used in Sanskrit transliteration for this particular sequence) and forget to input it with '+' characters.]\u0f58\u0f53");
uhelp("DGRA", "[#WARNING 508: The ACIP {DGRA} has been interpreted as two stacks, not one, but you may wish to confirm that the original text had two stacks as it would be an easy mistake to make to see one stack (because there is such a stack used in Sanskrit transliteration for this particular sequence) and forget to input it with '+' characters.]\u0f51\u0f42\u0fb2");
uhelp("D+GRA", "[#WARNING 506: There is a stack of three or more consonants in {D+GRA} that uses at least one '+' but does not use a '+' between each consonant.]\u0f51\u0f92\u0fb2");
uhelp("D+G+RA", "\u0f51\u0f92\u0fb2");
uhelp("DGYA", "[#WARNING 508: The ACIP {DGYA} has been interpreted as two stacks, not one, but you may wish to confirm that the original text had two stacks as it would be an easy mistake to make to see one stack (because there is such a stack used in Sanskrit transliteration for this particular sequence) and forget to input it with '+' characters.]\u0f51\u0f42\u0fb1");
uhelp("DGYAMS", "[#WARNING 509: The ACIP {DGYAMS} has an initial sequence that has been interpreted as two stacks, a prefix and a root stack, not one nonnative stack, but you may wish to confirm that the original text had two stacks as it would be an easy mistake to make to see one stack (because there is such a stack used in Sanskrit transliteration for this particular sequence) and forget to input it with '+' characters.]\u0f51\u0f42\u0fb1\u0f58\u0f66");
uhelp("DGYAM--S", "[#WARNING 505: There is a useless disambiguator in {DGYAM--S}.]\u0f51\u0f42\u0fb1\u0f58\u0f66"); // FIXME: 509 should be given too.
}
public void testACIPConversion() {
uhelp("RTSNYA", "[#WARNING CONVERTING ACIP DOCUMENT: There is a chance that the ACIP RTSNYA was intended to represent more consonants than we parsed it as representing -- NNYA, e.g., means N+NYA, but you can imagine seeing N+N+YA and typing NNYA for it too.][#WARNING CONVERTING ACIP DOCUMENT: The ACIP {R+TS+NYA} cannot be represented with the TibetanMachine or TibetanMachineWeb fonts because no such glyph exists in these fonts.]\u0f62\u0faa\u0f99"); // FIXME 936998
uhelp("\\u0FFF", "[#ERROR 138: The Unicode escape '\u0fff' with ordinal (in decimal) 4095 is in the Tibetan range of Unicode (i.e., [U+0F00, U+0FFF]), but is a reserved code in that area.]");
uhelp("\\uF020", "\uF020"); /* not in EWTS's domain */
uhelp("[illegal comment, no '#' mark]",
"[#ERROR 106: Found an illegal open bracket (in context, this is '[illegal c...'). Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open bracket\u003f][#ERROR 128: Cannot convert ACIP {i} because i is a \"vowel\" without an associated consonant.][#ERROR 116: Found an illegal character, 'l', with ordinal (in decimal) 108.][#ERROR 116: Found an illegal character, 'l', with ordinal (in decimal) 108.][#ERROR 116: Found an illegal character, 'e', with ordinal (in decimal) 101.][#ERROR 116: Found an illegal character, 'g', with ordinal (in decimal) 103.][#ERROR 116: Found an illegal character, 'a', with ordinal (in decimal) 97.][#ERROR 116: Found an illegal character, 'l', with ordinal (in decimal) 108.]\u0f0b[#ERROR 116: Found an illegal character, 'c', with ordinal (in decimal) 99.][#ERROR 132: The ACIP {o} must be glued to the end of a tsheg bar, but this one was not.][#ERROR 128: Cannot convert ACIP {mm} because Am is a \"vowel\" without an associated consonant.][#ERROR 116: Found an illegal character, 'e', with ordinal (in decimal) 101.]\u0f4e\u0f9a\u0f0d \u0f4e\u0f37\u0f0b\u0f60\u0f04\u0f05\u0f05\u0f60\u0f0b[#ERROR 128: Cannot convert ACIP {m} because Am is a \"vowel\" without an associated consonant.][#ERROR 116: Found an illegal character, 'a', with ordinal (in decimal) 97.][#ERROR 116: Found an illegal character, 'r', with ordinal (in decimal) 114.][#ERROR 116: Found an illegal character, 'k', with ordinal (in decimal) 107.][#ERROR 104: Found a closing bracket, ']', without a matching open bracket. Perhaps a [#COMMENT] incorrectly written as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], caused this.]");
uhelpShortMessages("[illegal comment, no '#' mark]",
"[#ERROR 106: {[illegal c...}][#ERROR 128: {i}][#ERROR 116: {l}][#ERROR 116: {l}][#ERROR 116: {e}][#ERROR 116: {g}][#ERROR 116: {a}][#ERROR 116: {l}]\u0f0b[#ERROR 116: {c}][#ERROR 132: {o}][#ERROR 128: {mm}][#ERROR 116: {e}]\u0f4e\u0f9a\u0f0d \u0f4e\u0f37\u0f0b\u0f60\u0f04\u0f05\u0f05\u0f60\u0f0b[#ERROR 128: {m}][#ERROR 116: {a}][#ERROR 116: {r}][#ERROR 116: {k}][#ERROR 104: ']']");
uhelp("[illegal [nested comment], no '#' marks either]",
"[#ERROR 106: Found an illegal open bracket (in context, this is '[illegal [...'). Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open bracket\u003f][#ERROR 128: Cannot convert ACIP {i} because i is a \"vowel\" without an associated consonant.][#ERROR 116: Found an illegal character, 'l', with ordinal (in decimal) 108.][#ERROR 116: Found an illegal character, 'l', with ordinal (in decimal) 108.][#ERROR 116: Found an illegal character, 'e', with ordinal (in decimal) 101.][#ERROR 116: Found an illegal character, 'g', with ordinal (in decimal) 103.][#ERROR 116: Found an illegal character, 'a', with ordinal (in decimal) 97.][#ERROR 116: Found an illegal character, 'l', with ordinal (in decimal) 108.]\u0f0b[#ERROR 105: Found a truly unmatched open bracket, '[' or '{', prior to this current illegal open bracket, '['.][#ERROR 106: Found an illegal open bracket (in context, this is '[nested co...'). Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open bracket\u003f]\u0f4e[#ERROR 116: Found an illegal character, 'e', with ordinal (in decimal) 101.][#ERROR 129: Cannot convert ACIP {st} because s is not an ACIP consonant.][#ERROR 116: Found an illegal character, 'e', with ordinal (in decimal) 101.]\u0f4c\u0f0b[#ERROR 116: Found an illegal character, 'c', with ordinal (in decimal) 99.][#ERROR 132: The ACIP {o} must be glued to the end of a tsheg bar, but this one was not.][#ERROR 128: Cannot convert ACIP {mm} because Am is a \"vowel\" without an associated consonant.][#ERROR 116: Found an illegal character, 'e', with ordinal (in decimal) 101.]\u0f4e\u0f9a[#ERROR 104: Found a closing bracket, ']', without a matching open bracket. Perhaps a [#COMMENT] incorrectly written as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], caused this.]\u0f0d \u0f4e\u0f37\u0f0b\u0f60\u0f04\u0f05\u0f05\u0f60\u0f0b[#ERROR 128: Cannot convert ACIP {m} because Am is a \"vowel\" without an associated consonant.][#ERROR 116: Found an illegal character, 'a', with ordinal (in decimal) 97.][#ERROR 116: Found an illegal character, 'r', with ordinal (in decimal) 114.][#ERROR 116: Found an illegal character, 'k', with ordinal (in decimal) 107.][#ERROR 129: Cannot convert ACIP {s} because s is not an ACIP consonant.]\u0f0b[#ERROR 116: Found an illegal character, 'e', with ordinal (in decimal) 101.][#ERROR 128: Cannot convert ACIP {ith} because i is a \"vowel\" without an associated consonant.][#ERROR 116: Found an illegal character, 'e', with ordinal (in decimal) 101.][#ERROR 116: Found an illegal character, 'r', with ordinal (in decimal) 114.][#ERROR 103: Found a truly unmatched close bracket, ']'.][#ERROR 104: Found a closing bracket, ']', without a matching open bracket. Perhaps a [#COMMENT] incorrectly written as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], caused this.]");
uhelpShortMessages("[illegal [nested comment], no '#' marks either]",
"[#ERROR 106: {[illegal [...}][#ERROR 128: {i}][#ERROR 116: {l}][#ERROR 116: {l}][#ERROR 116: {e}][#ERROR 116: {g}][#ERROR 116: {a}][#ERROR 116: {l}]\u0f0b[#ERROR 105: '['][#ERROR 106: {[nested co...}]\u0f4e[#ERROR 116: {e}][#ERROR 129: {st}][#ERROR 116: {e}]\u0f4c\u0f0b[#ERROR 116: {c}][#ERROR 132: {o}][#ERROR 128: {mm}][#ERROR 116: {e}]\u0f4e\u0f9a[#ERROR 104: ']']\u0f0d \u0f4e\u0f37\u0f0b\u0f60\u0f04\u0f05\u0f05\u0f60\u0f0b[#ERROR 128: {m}][#ERROR 116: {a}][#ERROR 116: {r}][#ERROR 116: {k}][#ERROR 129: {s}]\u0f0b[#ERROR 116: {e}][#ERROR 128: {ith}][#ERROR 116: {e}][#ERROR 116: {r}][#ERROR 103: ']'][#ERROR 104: ']']");
uhelp("VA", "[#ERROR 124: Cannot convert ACIP {VA} because {V}, wa-zur, appears without being subscribed to a consonant.]");
uhelp("A", "[#ERROR 125: Cannot convert ACIP {A} because we would be required to assume that {A} is a consonant, when it is not clear if it is a consonant or a vowel.]");
uhelp("A-DZU", "[#ERROR 134: The tsheg bar (\"syllable\") {A-DZU} has no legal parses.]");
uhelp("[# a [# nested comment]]",
"[#ERROR 102: Found an open bracket, '[', within a [#COMMENT]-style comment. Brackets may not appear in comments.][# a [# nested comment][#ERROR 103: Found a truly unmatched close bracket, ']'.][#ERROR 104: Found a closing bracket, ']', without a matching open bracket. Perhaps a [#COMMENT] incorrectly written as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], caused this.]");
uhelp("RTSNYA", "[#WARNING 507: There is a chance that the ACIP {RTSNYA} was intended to represent more consonants than we parsed it as representing -- NNYA, e.g., means N+NYA, but you can imagine seeing N+N+YA and typing NNYA for it too.][#WARNING 511: The ACIP {R+TS+NYA} cannot be represented with the TibetanMachine or TibetanMachineWeb fonts because no such glyph exists in these fonts.]\u0f62\u0faa\u0f99"); // FIXME 936998
uhelp("KO&HAm,", "\u0F40\u0F7C\u0F85\u0F67\u0F7E\u0F0D");
uhelp("x", "[#ERROR CONVERTING ACIP DOCUMENT: Lexical error: The ACIP x must be glued to the end of a tsheg bar, but this one was not]");
uhelp("o", "[#ERROR CONVERTING ACIP DOCUMENT: Lexical error: The ACIP o must be glued to the end of a tsheg bar, but this one was not]");
uhelp("%", "[#ERROR CONVERTING ACIP DOCUMENT: Lexical error: The ACIP % must be glued to the end of a tsheg bar, but this one was not][#WARNING CONVERTING ACIP DOCUMENT: Lexical warning: The ACIP {%} is treated by this converter as U+0F35, but sometimes might represent U+0F14 in practice. To avoid seeing this warning again, change the input to use {\\u0F35} instead of {%}.]");
uhelp(":", "[#ERROR CONVERTING ACIP DOCUMENT: The tsheg bar (\"syllable\") : has these errors: Cannot convert ACIP A: because A: is a \"vowel\" without an associated consonant]");
uhelp("m", "[#ERROR CONVERTING ACIP DOCUMENT: The tsheg bar (\"syllable\") m has these errors: Cannot convert ACIP Am because Am is a \"vowel\" without an associated consonant]");
uhelp("x", "[#ERROR 132: The ACIP {x} must be glued to the end of a tsheg bar, but this one was not.]");
uhelp("o", "[#ERROR 132: The ACIP {o} must be glued to the end of a tsheg bar, but this one was not.]");
uhelp("%", "[#ERROR 132: The ACIP {%} must be glued to the end of a tsheg bar, but this one was not.][#WARNING 504: The ACIP {%} is treated by this converter as U+0F35, but sometimes might represent U+0F14 in practice. To avoid seeing this warning again, change the input to use {\\u0F35} instead of {%}.]");
uhelp(":", "[#ERROR 128: Cannot convert ACIP {:} because A: is a \"vowel\" without an associated consonant.]");
uhelp("m", "[#ERROR 128: Cannot convert ACIP {m} because Am is a \"vowel\" without an associated consonant.]");
uhelp("N+YA", "\u0f53\u0fb1");
uhelp("NA+YA", "\u0f53\u0fb1"); // FIXME: warn about the extra A
uhelp("NE+YA", "[#ERROR CONVERTING ACIP DOCUMENT: The tsheg bar (\"syllable\") NE+YA has these errors: Cannot convert ACIP NE+-YA because + is not an ACIP consonant]");
uhelp("tRAStA", "[#WARNING CONVERTING ACIP DOCUMENT: The ACIP {t+RA} cannot be represented with the TibetanMachine or TibetanMachineWeb fonts because no such glyph exists in these fonts.]\u0f4a\u0fb2\u0f66\u0f9a");
uhelp("DZHDZHA", "[#WARNING CONVERTING ACIP DOCUMENT: There is a chance that the ACIP DZHDZHA was intended to represent more consonants than we parsed it as representing -- NNYA, e.g., means N+NYA, but you can imagine seeing N+N+YA and typing NNYA for it too.]\u0f5b\u0fb7\u0fab\u0fb7"); // tricky because DZHDZA is not in TMW but DZHDZHA is
uhelp("DZHDZA", "[#WARNING CONVERTING ACIP DOCUMENT: There is a chance that the ACIP DZHDZA was intended to represent more consonants than we parsed it as representing -- NNYA, e.g., means N+NYA, but you can imagine seeing N+N+YA and typing NNYA for it too.][#WARNING CONVERTING ACIP DOCUMENT: The ACIP {DZH+DZA} cannot be represented with the TibetanMachine or TibetanMachineWeb fonts because no such glyph exists in these fonts.]\u0f5b\u0fb7\u0fab");
uhelp("NE+YA", "[#ERROR 129: Cannot convert ACIP {NE+YA} because + is not an ACIP consonant.]");
uhelp("tRAStA", "[#WARNING 511: The ACIP {t+RA} cannot be represented with the TibetanMachine or TibetanMachineWeb fonts because no such glyph exists in these fonts.]\u0f4a\u0fb2\u0f66\u0f9a");
uhelp("DZHDZHA", "[#WARNING 507: There is a chance that the ACIP {DZHDZHA} was intended to represent more consonants than we parsed it as representing -- NNYA, e.g., means N+NYA, but you can imagine seeing N+N+YA and typing NNYA for it too.]\u0f5b\u0fb7\u0fab\u0fb7"); // tricky because DZHDZA is not in TMW but DZHDZHA is
uhelp("DZHDZA", "[#WARNING 507: There is a chance that the ACIP {DZHDZA} was intended to represent more consonants than we parsed it as representing -- NNYA, e.g., means N+NYA, but you can imagine seeing N+N+YA and typing NNYA for it too.][#WARNING 511: The ACIP {DZH+DZA} cannot be represented with the TibetanMachine or TibetanMachineWeb fonts because no such glyph exists in these fonts.]\u0f5b\u0fb7\u0fab");
uhelp("P+S+N+YA", "\u0f54\u0fb6\u0fa3\u0fb1");
uhelp("P+S+NYA", "[#WARNING CONVERTING ACIP DOCUMENT: The ACIP {P+S+NYA} cannot be represented with the TibetanMachine or TibetanMachineWeb fonts because no such glyph exists in these fonts.]\u0f54\u0fb6\u0f99");
uhelp("PSNYA", "[#WARNING CONVERTING ACIP DOCUMENT: There is a chance that the ACIP PSNYA was intended to represent more consonants than we parsed it as representing -- NNYA, e.g., means N+NYA, but you can imagine seeing N+N+YA and typing NNYA for it too.][#WARNING CONVERTING ACIP DOCUMENT: The ACIP {P+S+NYA} cannot be represented with the TibetanMachine or TibetanMachineWeb fonts because no such glyph exists in these fonts.]\u0f54\u0fb6\u0f99"); // Is this P+S+N+YA? No, it's P+S+NYA. But warn!
uhelp("NNYA", "[#WARNING CONVERTING ACIP DOCUMENT: There is a chance that the ACIP NNYA was intended to represent more consonants than we parsed it as representing -- NNYA, e.g., means N+NYA, but you can imagine seeing N+N+YA and typing NNYA for it too.][#WARNING CONVERTING ACIP DOCUMENT: The ACIP {N+NYA} cannot be represented with the TibetanMachine or TibetanMachineWeb fonts because no such glyph exists in these fonts.]\u0f53\u0f99");
uhelp("GHNYA", "[#WARNING CONVERTING ACIP DOCUMENT: There is a chance that the ACIP GHNYA was intended to represent more consonants than we parsed it as representing -- NNYA, e.g., means N+NYA, but you can imagine seeing N+N+YA and typing NNYA for it too.]\u0f42\u0fb7\u0f99");
uhelp("P+S+NYA", "[#WARNING 511: The ACIP {P+S+NYA} cannot be represented with the TibetanMachine or TibetanMachineWeb fonts because no such glyph exists in these fonts.]\u0f54\u0fb6\u0f99");
uhelp("PSNYA", "[#WARNING 507: There is a chance that the ACIP {PSNYA} was intended to represent more consonants than we parsed it as representing -- NNYA, e.g., means N+NYA, but you can imagine seeing N+N+YA and typing NNYA for it too.][#WARNING 511: The ACIP {P+S+NYA} cannot be represented with the TibetanMachine or TibetanMachineWeb fonts because no such glyph exists in these fonts.]\u0f54\u0fb6\u0f99"); // Is this P+S+N+YA? No, it's P+S+NYA. But warn!
uhelp("NNYA", "[#WARNING 507: There is a chance that the ACIP {NNYA} was intended to represent more consonants than we parsed it as representing -- NNYA, e.g., means N+NYA, but you can imagine seeing N+N+YA and typing NNYA for it too.][#WARNING 511: The ACIP {N+NYA} cannot be represented with the TibetanMachine or TibetanMachineWeb fonts because no such glyph exists in these fonts.]\u0f53\u0f99");
uhelp("GHNYA", "[#WARNING 507: There is a chance that the ACIP {GHNYA} was intended to represent more consonants than we parsed it as representing -- NNYA, e.g., means N+NYA, but you can imagine seeing N+N+YA and typing NNYA for it too.]\u0f42\u0fb7\u0f99");
// TS+NYA and T+S+N+YA are both legal, so what is TSNYA?
// Private correspondence with Robert Chilton says that it is
@ -7506,22 +7549,22 @@ M+NA
uhelp("THAG PA", "\u0f50\u0f42\u0f0b\u0f54");
uhelp("KA \nKHA\n\nGA", "\u0f40\u0f0b\u0f41\u0f0b\n\n\u0f42");
uhelp("KA%\nKHA", "\u0f40\u0f35[#WARNING CONVERTING ACIP DOCUMENT: Lexical warning: The ACIP {%} is treated by this converter as U+0F35, but sometimes might represent U+0F14 in practice. To avoid seeing this warning again, change the input to use {\\u0F35} instead of {%}.]\u0f0b\u0f41");
uhelp("KA%", "\u0f40\u0f35[#WARNING CONVERTING ACIP DOCUMENT: Lexical warning: The ACIP {%} is treated by this converter as U+0F35, but sometimes might represent U+0F14 in practice. To avoid seeing this warning again, change the input to use {\\u0F35} instead of {%}.]");
uhelp("KA%\nKHA", "\u0f40\u0f35[#WARNING 504: The ACIP {%} is treated by this converter as U+0F35, but sometimes might represent U+0F14 in practice. To avoid seeing this warning again, change the input to use {\\u0F35} instead of {%}.]\u0f0b\u0f41");
uhelp("KA%", "\u0f40\u0f35[#WARNING 504: The ACIP {%} is treated by this converter as U+0F35, but sometimes might represent U+0F14 in practice. To avoid seeing this warning again, change the input to use {\\u0F35} instead of {%}.]");
uhelp("KAo", "\u0f40\u0f37");
uhelp("KAo\n\nKA", "\u0f40\u0f37\u0f0b\n\n\u0f40");
uhelp("KAo\nKHA", "\u0f40\u0f37\u0f0b\u0f41");
uhelp("KAo KHA", "\u0f40\u0f37\u0f0b\u0f41");
uhelp("KA KAo KHA", "\u0f40\u0f0b\u0f40\u0f37\u0f0b\u0f41");
uhelp("KAx", "\u0f40[#ERROR CONVERTING ACIP DOCUMENT: This converter cannot convert the ACIP {x} to Tibetan because it is unclear what the result should be.]");
uhelp("KAx", "\u0f40[#ERROR 133: Cannot convert the ACIP {x} to Tibetan because it is unclear what the result should be.]");
uhelp("G+DHA", "\u0f42\u0fa1\u0fb7");
uhelp("P'EE", "\u0f54\u0f71\u0f7b");
uhelp("BA ? HA", "\u0f56\u0f0b[#ERROR CONVERTING ACIP DOCUMENT: Lexical error: The ACIP {?}, found alone, may intend U+0F08, but it may intend a question mark, i.e. '?', in the output. It may even mean that the original text could not be deciphered with certainty, like the ACIP {[?]} does.] \u0f67");
uhelp("BA ? HA", "\u0f56\u0f0b[#ERROR 113: The ACIP {?}, found alone, may intend U+0F08, but it may intend a question mark, i.e. '?', in the output. It may even mean that the original text could not be deciphered with certainty, like the ACIP {[?]} does.] \u0f67");
uhelp("KA", "\u0f40");
uhelp("\\u0F35", "\u0F35");
uhelp("\\uF035", "[#ERROR CONVERTING ACIP DOCUMENT: The Unicode escape '\uf035' with ordinal 61493 is in the private-use area (PUA) of Unicode and will thus not be written out into the output lest you think other tools will be able to understand this non-standard construction.]");
uhelp("\\uF035", "[#ERROR 135: The Unicode escape '\uf035' with ordinal (in decimal) 61493 is specified by the Extended Wylie Transliteration Scheme (EWTS), but is in the private-use area (PUA) of Unicode and will thus not be written out into the output lest you think other tools will be able to understand this non-standard construction.]");
uhelp("KI", "\u0f40\u0f72");
uhelp("KO", "\u0f40\u0f7c");
uhelp("KE", "\u0f40\u0f7a");
@ -7603,7 +7646,7 @@ M+NA
uhelp("*#HUm: G+DHOO GRO`;.,",
"\u0f04\u0f05\u0f04\u0f05\u0f05\u0f67\u0f74\u0f7e\u0f7f\u0f0b\u0f42\u0fa1\u0fb7\u0f7d\u0f0b\u0f42\u0fb2\u0f7c\u0f08\u0f11\u0f0c\u0f0d");
uhelp("*#HUm: K+DHA GRO`;.,",
"\u0f04\u0f05\u0f04\u0f05\u0f05\u0f67\u0f74\u0f7e\u0f7f\u0f0b[#WARNING CONVERTING ACIP DOCUMENT: The ACIP {K+DHA} cannot be represented with the TibetanMachine or TibetanMachineWeb fonts because no such glyph exists in these fonts.]\u0f40\u0fa1\u0fb7\u0f0b\u0f42\u0fb2\u0f7c\u0f08\u0f11\u0f0c\u0f0d");
"\u0f04\u0f05\u0f04\u0f05\u0f05\u0f67\u0f74\u0f7e\u0f7f\u0f0b[#WARNING 511: The ACIP {K+DHA} cannot be represented with the TibetanMachine or TibetanMachineWeb fonts because no such glyph exists in these fonts.]\u0f40\u0fa1\u0fb7\u0f0b\u0f42\u0fb2\u0f7c\u0f08\u0f11\u0f0c\u0f0d");
uhelp("HA,\nHA\n\nHA", "\u0f67\u0f0d \u0f67\u0f0b\n\n\u0f67");
uhelp("NGA,", "\u0f44\u0f0c\u0f0d");
uhelp("NGA,\nHA\n\nHA", "\u0f44\u0f0c\u0f0d \u0f67\u0f0b\n\n\u0f67");
@ -7616,8 +7659,8 @@ M+NA
uhelp("GU, ,KHO", "\u0f42\u0f74\u0f0d \u0f0d\u0f41\u0f7c");
uhelp("GU ,KHO", "\u0f42\u0f74\u0f0b \u0f0d\u0f41\u0f7c"); // FIXME: missing a shad after GU, warn about that.
uhelp("GA HA", "\u0f42\u0f0b \u0f67");
uhelp("BCWA", "[#WARNING CONVERTING ACIP DOCUMENT: The ACIP {B+C+WA} cannot be represented with the TibetanMachine or TibetanMachineWeb fonts because no such glyph exists in these fonts.]\u0f56\u0f95\u0fba");
uhelp("'KYO", "[#WARNING CONVERTING ACIP DOCUMENT: Warning: We're going with {'+K+YO}, but only because our knowledge of prefix rules says that {'}{K+YO} is not a legal Tibetan tsheg bar (\"syllable\")][#WARNING CONVERTING ACIP DOCUMENT: The ACIP {'+K+YO} cannot be represented with the TibetanMachine or TibetanMachineWeb fonts because no such glyph exists in these fonts.]\u0f60\u0f90\u0fb1\u0f7c");
uhelp("BCWA", "[#WARNING 511: The ACIP {B+C+WA} cannot be represented with the TibetanMachine or TibetanMachineWeb fonts because no such glyph exists in these fonts.]\u0f56\u0f95\u0fba");
uhelp("'KYO", "[#WARNING 501: Using {'+K+YO} for the ACIP {'KYO}, but only because the tool's knowledge of prefix rules (see the documentation) says that {'}{K+YO} is not a legal Tibetan tsheg bar (\"syllable\")][#WARNING 511: The ACIP {'+K+YO} cannot be represented with the TibetanMachine or TibetanMachineWeb fonts because no such glyph exists in these fonts.]\u0f60\u0f90\u0fb1\u0f7c");
uhelp("WA", "\u0f5d");
uhelp("W", "\u0f5d");
uhelp("WO", "\u0f5d\u0f7c");
@ -7635,21 +7678,21 @@ M+NA
uhelp("WRA", "\u0f5d\u0fb2");
uhelp("W+RA", "\u0f5d\u0fb2");
uhelp("W+R", "\u0f5d\u0fb2");
uhelp("BCWA", "[#WARNING CONVERTING ACIP DOCUMENT: The ACIP {B+C+WA} cannot be represented with the TibetanMachine or TibetanMachineWeb fonts because no such glyph exists in these fonts.]\u0f56\u0f95\u0fba");
uhelp("BCW", "[#WARNING CONVERTING ACIP DOCUMENT: The ACIP {B+C+W} cannot be represented with the TibetanMachine or TibetanMachineWeb fonts because no such glyph exists in these fonts.]\u0f56\u0f95\u0fba");
uhelp("BCWO", "[#WARNING CONVERTING ACIP DOCUMENT: The ACIP {B+C+WO} cannot be represented with the TibetanMachine or TibetanMachineWeb fonts because no such glyph exists in these fonts.]\u0f56\u0f95\u0fba\u0f7c");
uhelp("BCWA", "[#WARNING 511: The ACIP {B+C+WA} cannot be represented with the TibetanMachine or TibetanMachineWeb fonts because no such glyph exists in these fonts.]\u0f56\u0f95\u0fba");
uhelp("BCW", "[#WARNING 511: The ACIP {B+C+W} cannot be represented with the TibetanMachine or TibetanMachineWeb fonts because no such glyph exists in these fonts.]\u0f56\u0f95\u0fba");
uhelp("BCWO", "[#WARNING 511: The ACIP {B+C+WO} cannot be represented with the TibetanMachine or TibetanMachineWeb fonts because no such glyph exists in these fonts.]\u0f56\u0f95\u0fba\u0f7c");
uhelp("BCVA", "\u0f56\u0f45\u0fad");
uhelp("BCV", "\u0f56\u0f45\u0fad");
uhelp("BCV'O", "\u0f56\u0f45\u0fad\u0f71\u0f7c");
uhelp("BCV'A", "\u0f56\u0f45\u0fad\u0f71");
uhelp("BCV'", "[#WARNING CONVERTING ACIP DOCUMENT: The ACIP {B+C+V+'} cannot be represented with the TibetanMachine or TibetanMachineWeb fonts because no such glyph exists in these fonts.]\u0f56\u0f95\u0fad\u0fb0");
uhelp("BCV'", "[#WARNING 511: The ACIP {B+C+V+'} cannot be represented with the TibetanMachine or TibetanMachineWeb fonts because no such glyph exists in these fonts.]\u0f56\u0f95\u0fad\u0fb0");
uhelp("GYA", "\u0f42\u0fb1");
uhelp("GY", "\u0f42\u0fb1");
uhelp("G-YA", "\u0f42\u0f61");
uhelp("GA-YA", "[#WARNING CONVERTING ACIP DOCUMENT: There is a useless disambiguator in GA-YA.]\u0f42\u0f61");
uhelp("GA-YO", "[#WARNING CONVERTING ACIP DOCUMENT: There is a useless disambiguator in GA-YO.]\u0f42\u0f61\u0F7c");
uhelp("RTZVA", "[#WARNING CONVERTING ACIP DOCUMENT: There is a chance that the ACIP RTZVA was intended to represent more consonants than we parsed it as representing -- NNYA, e.g., means N+NYA, but you can imagine seeing N+N+YA and typing NNYA for it too.]\u0f62\u0fa9\u0fad");
uhelp("RTZWA", "[#WARNING CONVERTING ACIP DOCUMENT: There is a chance that the ACIP RTZWA was intended to represent more consonants than we parsed it as representing -- NNYA, e.g., means N+NYA, but you can imagine seeing N+N+YA and typing NNYA for it too.][#WARNING CONVERTING ACIP DOCUMENT: The ACIP {R+TZ+WA} cannot be represented with the TibetanMachine or TibetanMachineWeb fonts because no such glyph exists in these fonts.]\u0f62\u0fa9\u0fba");
uhelp("GA-YA", "[#WARNING 505: There is a useless disambiguator in {GA-YA}.]\u0f42\u0f61");
uhelp("GA-YO", "[#WARNING 505: There is a useless disambiguator in {GA-YO}.]\u0f42\u0f61\u0F7c");
uhelp("RTZVA", "[#WARNING 507: There is a chance that the ACIP {RTZVA} was intended to represent more consonants than we parsed it as representing -- NNYA, e.g., means N+NYA, but you can imagine seeing N+N+YA and typing NNYA for it too.]\u0f62\u0fa9\u0fad");
uhelp("RTZWA", "[#WARNING 507: There is a chance that the ACIP {RTZWA} was intended to represent more consonants than we parsed it as representing -- NNYA, e.g., means N+NYA, but you can imagine seeing N+N+YA and typing NNYA for it too.][#WARNING 511: The ACIP {R+TZ+WA} cannot be represented with the TibetanMachine or TibetanMachineWeb fonts because no such glyph exists in these fonts.]\u0f62\u0fa9\u0fba");
}
public void testFixedFormSubjoinedConsonants() {
// Usual subjoined RA:
@ -7675,7 +7718,7 @@ M+NA
+ "\u0f61\u0fbb\u0f7b\u0f0b" // Y+YEE
+ "\u0f4e\u0f9c\u0fbc\u0fb1\u0f0b" // ndRYA
+ "\u0f4e\u0f9c\u0fbc\u0fb1\u0f7b\u0f0b" // n+d+R+YEE
+ "[#WARNING CONVERTING ACIP DOCUMENT: There is a chance that the ACIP KshR was intended to represent more consonants than we parsed it as representing -- NNYA, e.g., means N+NYA, but you can imagine seeing N+N+YA and typing NNYA for it too.]\u0f40\u0fb5\u0fbc\u0f0b" // KshR
+ "[#WARNING 507: There is a chance that the ACIP {KshR} was intended to represent more consonants than we parsed it as representing -- NNYA, e.g., means N+NYA, but you can imagine seeing N+N+YA and typing NNYA for it too.]\u0f40\u0fb5\u0fbc\u0f0b" // KshR
+ "\u0f40\u0fb5\u0fbc\u0f7b\u0f0b" // K+sh+REE
+ "\u0f4e\u0f9c\u0fbb\u0f0b" // ndY
+ "\u0f4e\u0f9c\u0fbb\u0f7b\u0f0d" // n+d+YEE
@ -7702,6 +7745,9 @@ M+NA
a2ahelp("/'A/");
a2ahelp("/1/");
a2ahelp("/1/");
a2ahelp("#**##*");
a2ahelp("#");
a2ahelp("*");
assertTrue(ACIP2TMW2ACIP("RTSNYA") == null); // R+TS+NYA is thought of, not R+T+S+N+YA -- FIXME 936998
a2ahelp("N+DZY", "N+DZ+YA"); // R+TS+NYA is not thought of as R+T+S+N+YA; note the (documented and necessary) inconsistency
}
@ -10171,8 +10217,8 @@ tstHelper("shKA");
// 'BYONGS [BLO,) S0375M.ACT
/* FIXME: BDEm: is different than BDE for us, is that OK?
uhelp("BDEm:", "[#WARNING CONVERTING ACIP DOCUMENT: The ACIP BDEm: has been interpreted as two stacks, not one, but you may wish to confirm that the original text had two stacks as it would be an easy mistake to make to see one stack and forget to input it with '+' characters.]PLACEHOLDER");
/* DLC FIXME: BDEm: is different than BDE for us, is that OK?
uhelp("BDEm:", "[#WARNING The ACIP BDEm: has been interpreted as two stacks, not one, but you may wish to confirm that the original text had two stacks as it would be an easy mistake to make to see one stack and forget to input it with '+' characters.]PLACEHOLDER");
tstHelper("BDA:", "{B}{DA:}",
new String[] { "{B+DA:}", "{B}{DA:}" },
new String[] { "{B}{DA:}" },

View file

@ -153,92 +153,56 @@ class TPairList {
* Returns an error message, or null if there is no error that
* you can find without the help of tsheg bar syntax rules. */
// FIXME: This is needlessly ACIP specific -- rename and change text of messages
String getACIPError() {
String getACIPError(String originalACIP, boolean shortMessages) {
// FIXME: this returns just the first error. List all errors
// at once.
int sz = size();
if (0 == sz)
return "Warning, empty tsheg bar found while converting from ACIP!";
boolean first = true;
StringBuffer rv = null;
if (0 == sz) // FIXME: see if you can make this happen...
return ErrorsAndWarnings.getMessage(122, shortMessages,
((null != originalACIP)
? originalACIP
: ""));
String translit
= (null != originalACIP) ? originalACIP : recoverACIP();
boolean mustBeEntirelyNumeric = get(0).isNumeric();
for (int i = 0; i < sz; i++) {
TPair p = get(i);
if (mustBeEntirelyNumeric != p.isNumeric())
return "Cannot convert ACIP " + recoverACIP() + " because it contains a number but also a non-number.";
return ErrorsAndWarnings.getMessage(123, shortMessages, translit);
if ((i == 0 && "V".equals(p.getLeft()))
|| (i > 0 && "V".equals(p.getLeft())
&& (null != get(i - 1).getRight()
&& !"+".equals(get(i - 1).getRight())))) {
if (first) {
first = false;
rv = new StringBuffer("Cannot convert ACIP ");
rv.append(recoverACIP());
rv.append(" because {V}, wa-zur, appears without being subscribed to a consonant.");
} else {
rv.append("; also, {V}, wa-zur, appears without being subscribed to a consonant");
}
return ErrorsAndWarnings.getMessage(124, shortMessages, translit);
} else if ("A".equals(p.getLeft()) && (null == p.getRight() || "".equals(p.getRight()))) {
if (first) {
first = false;
rv = new StringBuffer("Cannot convert ACIP ");
rv.append(recoverACIP());
rv.append(" because we would be required to assume that {A} is a consonant, when it is not clear if it is a consonant or a vowel.");
} else {
rv.append("; also, we would be required to assume that {A} is a consonant, when it is not clear if it is a consonant or a vowel.");
}
return ErrorsAndWarnings.getMessage(125, shortMessages, translit);
} else if ((null == p.getLeft() && !"-".equals(p.getRight()))
|| (null != p.getLeft()
&& !ACIPRules.isConsonant(p.getLeft())
&& !p.isNumeric())) {
if (first) {
first = false;
rv = new StringBuffer("Cannot convert ACIP ");
rv.append(recoverACIP());
rv.append(" because ");
// FIXME: stop handling this outside of ErrorsAndWarnings:
if (null == p.getLeft()) {
rv.append(p.getRight());
rv.append(" is a \"vowel\" without an associated consonant");
if (shortMessages)
return "128: {" + translit + "}";
else
return "128: Cannot convert ACIP {" + translit + "} because " + p.getRight() + " is a \"vowel\" without an associated consonant.";
} else {
rv.append(p.getLeft());
rv.append(" is not an ACIP consonant");
}
} else {
if (null == p.getLeft()) {
rv.append("; also, ");
rv.append(p.getRight());
rv.append(" is an ACIP \"vowel\" without an associated consonant");
} else {
rv.append("; also, ");
rv.append(p.getLeft());
rv.append(" is not an ACIP consonant");
}
if (shortMessages)
return "129: {" + translit + "}";
else
return "129: Cannot convert ACIP {" + translit + "} because " + p.getLeft() + " is not an ACIP consonant.";
}
}
}
if ("+".equals(get(sz - 1).getRight())) {
if (first) {
first = false;
rv = new StringBuffer("Cannot convert ACIP ");
rv.append(recoverACIP());
rv.append(" because it ends with a {+}.");
} else {
rv.append("; also, it ends with a {+}.");
return ErrorsAndWarnings.getMessage(126, shortMessages, translit);
}
}
// FIXME: really this is a warning, not an error:
if ("-".equals(get(sz - 1).getRight())) {
if (first) {
first = false;
rv = new StringBuffer("Cannot convert ACIP ");
rv.append(recoverACIP());
rv.append(" because it ends with a {-}.");
} else {
rv.append("; also, it ends with a {-}.");
return ErrorsAndWarnings.getMessage(127, shortMessages, translit);
}
}
return (rv == null) ? null : rv.toString();
return null;
}
/** Returns true if and only if either x is an TPairList object
@ -657,10 +621,14 @@ class TPairList {
}
/** Appends the DuffCodes that correspond to this grapheme cluster
* to duffsAndErrors, or appends a String that is an error
* message saying that TMW cannot represent this grapheme
* cluster. */
void getDuff(ArrayList duffsAndErrors) {
* to duffsAndErrors, or appends a String that is an error or
* warning message (a short one iff shortMessages is true) saying
* that TMW cannot represent this grapheme cluster. The message
* is Error 137 if noCorrespondingTMWGlyphIsError is true;
* otherwise, it's Warning 511. */
void getDuff(ArrayList duffsAndErrors,
boolean shortMessages,
boolean noCorrespondingTMWGlyphIsError) {
int previousSize = duffsAndErrors.size();
StringBuffer wylieForConsonant = new StringBuffer();
for (int x = 0; x + 1 < size(); x++) {
@ -716,7 +684,11 @@ class TPairList {
if (!TibetanMachineWeb.isKnownHashKey(hashKey)) {
hashKey = hashKey.replace('+', '-');
if (!TibetanMachineWeb.isKnownHashKey(hashKey)) {
duffsAndErrors.add("The ACIP {" + recoverACIP() + "} cannot be represented with the TibetanMachine or TibetanMachineWeb fonts because no such glyph exists in these fonts.");
duffsAndErrors.add(ErrorsAndWarnings.getMessage(noCorrespondingTMWGlyphIsError
? 137
: 511,
shortMessages,
recoverACIP()));
return;
}
}

View file

@ -18,6 +18,8 @@ Contributor(s): ______________________________________.
package org.thdl.tib.text.ttt;
import org.thdl.util.ThdlDebug;
import java.util.ArrayList;
/** A list of non-empty list of {@link TStackListList
@ -266,16 +268,28 @@ class TParseTree {
* warnings about lacking vowels on final stacks, "Some" to see
* warnings about lacking vowels on non-final stacks and also
* warnings about when prefix rules affect you, "None" if you
* like to see IllegalArgumentExceptions.
* like to see IllegalArgumentExceptions thrown. (Actually, this
* refers only to the default values -- the level at which any
* particular warning appears is customizable.)
* @param pl the pair list from which this parse tree originated
* @param originalACIP the original ACIP, or null if you want
* this parse tree to make a best guess. */
* this parse tree to make a best guess.
* @param shortMessages true iff you want short error and warning
* messages */
public String getWarning(String warningLevel,
TPairList pl,
String originalACIP) {
if (warningLevel != "Some"
&& warningLevel != "Most"
&& warningLevel != "All")
String originalACIP,
boolean shortMessages) {
// ROOM_FOR_IMPROVEMENT: Allow one tsheg bar to have multiple
// warnings/errors associated with it. Make this a private
// subroutine, and have the public getWarning(..) call on this
// subroutine again and again until no new error is found. If
// call N yields warning 506, then disable 506 and call again.
// If you get 508, call again, etc. Finally, restore 506
// etc. and return the concatenation of messages 506 and 508.
// {DGYAM--S} should yield both 505 and 509.
if (!ErrorsAndWarnings.warningLevelIsKnown(warningLevel))
throw new IllegalArgumentException("warning level bad: is it interned?");
TStackList bestParse = getBestParse();
@ -283,37 +297,51 @@ class TParseTree {
TStackListList noPrefixTestsUniqueParse = getUniqueParse(true);
if (noPrefixTestsUniqueParse.size() == 1
&& !noPrefixTestsUniqueParse.get(0).equals(bestParse)) {
if (warningLevel != "Some")
return "Warning: We're going with " + bestParse + ", but only because our knowledge of prefix rules says that " + noPrefixTestsUniqueParse.get(0) + " is not a legal Tibetan tsheg bar (\"syllable\")";
if (ErrorsAndWarnings.isEnabled(501, warningLevel))
if (shortMessages)
return "501: Using " + bestParse + ", not " + noPrefixTestsUniqueParse.get(0);
else
return "501: Using " + bestParse + ((null != originalACIP) ? (" for the ACIP {" + originalACIP + "}") : "") + ", but only because the tool's knowledge of prefix rules (see the documentation) says that " + noPrefixTestsUniqueParse.get(0) + " is not a legal Tibetan tsheg bar (\"syllable\")";
}
}
String translit = (null != originalACIP) ? originalACIP : recoverACIP();
TStackListList up = getUniqueParse(false);
if (null == up || up.size() != 1) {
// FIXME: code duplication
boolean isLastStack[] = new boolean[1];
TStackListList nip = getNonIllegalParses();
if (nip.size() != 1) {
if (null == bestParse) {
return "Warning: There's not even a unique, non-illegal parse for ACIP {" + ((null != originalACIP) ? originalACIP : recoverACIP()) + "}";
/* FIXME: Is this case possible? We can get to it
in unit testing (and we do), but is there any
ACIP input file that will cause this? */
// FIXME: IS 101 NOT TREATED AS AN error, BUT
// INSTEAD TREATED AS A warning?
//
// FIXME: The caller will prepend "WARNING " to this error!
if (ErrorsAndWarnings.isEnabled(101, warningLevel))
return ErrorsAndWarnings.getMessage(101, shortMessages,
translit);
} else {
if (bestParse.hasStackWithoutVowel(pl, isLastStack)) {
if (isLastStack[0]) {
if (warningLevel == "All")
return "Warning: The last stack does not have a vowel in the ACIP {" + ((null != originalACIP) ? originalACIP : recoverACIP()) + "}; this may indicate a typo, because Sanskrit, which this is (because it's not legal Tibetan), should have a vowel after each stack.";
if (ErrorsAndWarnings.isEnabled(502, warningLevel))
return ErrorsAndWarnings.getMessage(502, shortMessages,
translit);
} else {
throw new Error("Can't happen now that we stack greedily");
}
}
if ("All" == warningLevel) {
return "Warning: Though the ACIP {" + ((null != originalACIP) ? originalACIP : recoverACIP()) + "} is unambiguous, it would be more computer-friendly if + signs were used to stack things because there are two (or more) ways to interpret this ACIP if you're not careful.";
}
if (ErrorsAndWarnings.isEnabled(503, warningLevel))
return ErrorsAndWarnings.getMessage(503, shortMessages,
translit);
}
} else {
if (nip.get(0).hasStackWithoutVowel(pl, isLastStack)) {
if (isLastStack[0]) {
if (warningLevel == "All")
return "Warning: The last stack does not have a vowel in the ACIP {" + ((null != originalACIP) ? originalACIP : recoverACIP()) + "}; this may indicate a typo, because Sanskrit, which this is (because it's not legal Tibetan), should have a vowel after each stack.";
if (ErrorsAndWarnings.isEnabled(502, warningLevel))
return ErrorsAndWarnings.getMessage(502, shortMessages,
translit);
} else {
throw new Error("Can't happen now that we stack greedily [2]");
}
@ -330,14 +358,13 @@ class TParseTree {
// Check for useless disambiguators.
{
int plnum = 0;
String swarn
= "There is a stack of three or more consonants in " + ((null != originalACIP) ? originalACIP : recoverACIP()) + " that uses at least one '+' but does not use a '+' between each consonant.";
String disamWarn
= "There is a useless disambiguator in " + ((null != originalACIP) ? originalACIP : recoverACIP()) + ".";
while (plnum < pl.size() && pl.get(plnum).isDisambiguator()) {
++plnum;
return disamWarn;
if (ErrorsAndWarnings.isEnabled(505, warningLevel))
return ErrorsAndWarnings.getMessage(505, shortMessages,
translit);
}
plnum = 0;
for (int stackNum = 0; stackNum < bestParse.size(); stackNum++) {
TPairList stack = bestParse.get(stackNum);
int type = 0;
@ -350,12 +377,16 @@ class TParseTree {
if (type == 0)
type = -1;
else if (type == 1)
return swarn;
if (ErrorsAndWarnings.isEnabled(506, warningLevel))
return ErrorsAndWarnings.getMessage(506, shortMessages,
translit);
} else {
if (type == 0)
type = 1;
else if (type == -1)
return swarn;
if (ErrorsAndWarnings.isEnabled(506, warningLevel))
return ErrorsAndWarnings.getMessage(506, shortMessages,
translit);
}
}
if (stackSize > 1 && tp.getLeft() != null && tp.getLeft().length() > 1) {
@ -364,12 +395,15 @@ class TParseTree {
}
}
if (hasAmbiguousConsonant && -1 == type) {
if ("Most" == warningLevel || "All" == warningLevel)
return "There is a chance that the ACIP " + ((null != originalACIP) ? originalACIP : recoverACIP()) + " was intended to represent more consonants than we parsed it as representing -- NNYA, e.g., means N+NYA, but you can imagine seeing N+N+YA and typing NNYA for it too.";
if (ErrorsAndWarnings.isEnabled(507, warningLevel))
return ErrorsAndWarnings.getMessage(507, shortMessages,
translit);
}
while (plnum < pl.size() && pl.get(plnum).isDisambiguator()) {
++plnum;
return disamWarn;
if (ErrorsAndWarnings.isEnabled(505, warningLevel))
return ErrorsAndWarnings.getMessage(505, shortMessages,
translit);
}
}
}
@ -386,8 +420,15 @@ class TParseTree {
&& null != left && null != right) {
if (("D".equals(left) && "G".equals(middle) && "R".equals(right))
|| ("D".equals(left) && "G".equals(middle) && "Y".equals(right))) {
if (pl.size() == 3 || "Some" != warningLevel)
return "The ACIP " + ((null != originalACIP) ? originalACIP : recoverACIP()) + " has been interpreted as two stacks, not one, but you may wish to confirm that the original text had two stacks as it would be an easy mistake to make to see one stack and forget to input it with '+' characters.";
if (pl.size() == 3) {
if (ErrorsAndWarnings.isEnabled(508, warningLevel))
return ErrorsAndWarnings.getMessage(508, shortMessages,
translit);
} else {
if (ErrorsAndWarnings.isEnabled(509, warningLevel))
return ErrorsAndWarnings.getMessage(509, shortMessages,
translit);
}
}
}
}
@ -404,8 +445,15 @@ class TParseTree {
|| ("G".equals(left) && "D".equals(right))
|| ("D".equals(left) && "N".equals(right))
|| ("M".equals(left) && "N".equals(right))) {
if (pl.size() == 2 || "Some" != warningLevel)
return "The ACIP " + ((null != originalACIP) ? originalACIP : recoverACIP()) + " has been interpreted as two stacks, not one, but you may wish to confirm that the original text had two stacks as it would be an easy mistake to make to see one stack and forget to input it with '+' characters.";
if (pl.size() == 2) {
if (ErrorsAndWarnings.isEnabled(508, warningLevel))
return ErrorsAndWarnings.getMessage(508, shortMessages,
translit);
} else {
if (ErrorsAndWarnings.isEnabled(509, warningLevel))
return ErrorsAndWarnings.getMessage(509, shortMessages,
translit);
}
}
}
}

View file

@ -183,8 +183,10 @@ class TStackList {
boolean isClearlyIllegal() {
// check for {D}{VA} sorts of things:
for (int i = 0; i < size(); i++) {
if (get(i).getACIPError() != null) {
if (ddebug) System.out.println("ddebug: error is " + get(i).getACIPError());
if (get(i).getACIPError("THIS MAKES IT FASTER AND IS SAFE, DON'T WORRY",
true /* faster... */)
!= null) {
if (ddebug) System.out.println("ddebug: error is " + get(i).getACIPError("THIS MAKES IT FASTER AND IS SAFE, DON'T WORRY", false));
return true;
}
}
@ -237,12 +239,14 @@ class TStackList {
/** Returns the DuffCodes and errors corresponding to this stack
list. Each element of the array is a DuffCode or a String, the
latter if and only if the TMW font cannot represent the
corresponding stack in this list. */
Object[] getDuff() {
corresponding stack in this list. Iff shortMessages is true,
the String elements will be shorter messages. */
Object[] getDuff(boolean shortMessages,
boolean noCorrespondingTMWGlyphIsError) {
ArrayList al = new ArrayList(size()*2); // rough estimate
int count = 0;
for (int i = 0; i < size(); i++) {
get(i).getDuff(al);
get(i).getDuff(al, shortMessages, noCorrespondingTMWGlyphIsError);
}
if (size() > 0 && al.size() == 0) {
throw new Error("But this stack list, " + this + ", contains " + size() + " stacks! How can it not have DuffCodes associated with it?");

View file

@ -10,7 +10,7 @@ License for the specific terms governing rights and limitations under the
License.
The Initial Developer of this software is the Tibetan and Himalayan Digital
Library (THDL). Portions created by the THDL are Copyright 2001 THDL.
Library (THDL). Portions created by the THDL are Copyright 2001, 2004 THDL.
All Rights Reserved.
Contributor(s): ______________________________________.
@ -91,4 +91,11 @@ public final class ThdlLazyException extends Error {
public Throwable getRealException() {
return wrappedException;
}
public String toString() {
return "ThdlLazyException [" + super.toString() + "] wrapping " + ((getRealException() == null) ? "nothing" : getRealException().toString());
}
public String getMessage() {
return "ThdlLazyException [" + super.getMessage() + "] wrapping " + ((getRealException() == null) ? "nothing" : getRealException().getMessage());
}
}

View file

@ -87,7 +87,8 @@ public class ThdlLazyExceptionTest extends TestCase {
public void testThdlLazyExceptionString() {
String msg = "foo";
ThdlLazyException e = new ThdlLazyException(msg);
assertTrue(msg.equals(e.getMessage()));
assertTrue("Oops: " + e.getMessage(),
"ThdlLazyException [foo] wrapping nothing".equals(e.getMessage()));
assertTrue(null == e.getRealException());
}
@ -98,7 +99,8 @@ public class ThdlLazyExceptionTest extends TestCase {
String msg = "foo";
IOException ioe = new IOException("bah");
ThdlLazyException e = new ThdlLazyException(msg, ioe);
assertTrue(msg.equals(e.getMessage()));
assertTrue("oops: " + e.getMessage(),
"ThdlLazyException [foo] wrapping bah".equals(e.getMessage()));
assertTrue(ioe.equals(e.getRealException()));
assertTrue("bah".equals(e.getRealException().getMessage()));
}