Jskad has a new feature: Convert Selection from ACIP to Tibetan. It uses the ACIP converter to do its work.

Improved some error messages from the ACIP->Tibetan converter.
This commit is contained in:
dchandler 2003-10-19 20:16:06 +00:00
parent 5ce84d4d9a
commit 4b1395e0ba
8 changed files with 193 additions and 94 deletions

View file

@ -1549,7 +1549,7 @@ public void paste(int offset) {
ThdlDebug.noteIffyCode();
}
} else {
DuffData[] dd = TibTextUtils.getTibetanMachineWeb(next);
DuffData[] dd = TibTextUtils.getTibetanMachineWebForEWTS(next);
offset = getTibDoc().insertDuff(offset, dd);
}
}
@ -1563,21 +1563,24 @@ public void paste(int offset) {
}
/**
* Converts the currently selected text from Extended Wylie to TibetanMachineWeb.
*/
public void toTibetanMachineWeb() {
* Converts the currently selected text from Roman transliteration to
* TibetanMachineWeb.
* @param fromACIP true if the selection is ACIP, false if it is EWTS
* */
public void toTibetanMachineWeb(boolean fromACIP) {
int start = getSelectionStart();
int end = getSelectionEnd();
toTibetanMachineWeb(start, end);
toTibetanMachineWeb(fromACIP, start, end);
}
/**
* Converts a stretch of text from Extended Wylie to TibetanMachineWeb.
* @param fromACIP true if the selection is ACIP, false if it is EWTS
* @param start the begin point for the conversion
* @param end the end point for the conversion
*/
public void toTibetanMachineWeb(int start, int end) {
public void toTibetanMachineWeb(boolean fromACIP, int start, int end) {
if (start == end)
return;
@ -1599,17 +1602,28 @@ public void paste(int offset) {
if ((0 != TibetanMachineWeb.getTMWFontNumber(fontName)) || i==endPos.getOffset()) {
if (i != start) {
try {
DuffData[] duffdata = TibTextUtils.getTibetanMachineWeb(sb.toString());
getTibDoc().remove(start, i-start);
getTibDoc().insertDuff(start, duffdata);
}
catch (InvalidWylieException iwe) {
DuffData[] duffdata = null;
if (fromACIP) {
getTibDoc().remove(start, i-start);
TibTextUtils.insertTibetanMachineWebForACIP(sb.toString(), getTibDoc(), start);
}
else
duffdata = TibTextUtils.getTibetanMachineWebForEWTS(sb.toString());
if (!fromACIP) {
getTibDoc().remove(start, i-start);
getTibDoc().insertDuff(start, duffdata);
}
} catch (InvalidWylieException iwe) {
JOptionPane.showMessageDialog(this,
"The Wylie you are trying to convert is invalid, " +
"beginning from:\n " + iwe.getCulpritInContext() +
"\nThe culprit is probably the character '" +
iwe.getCulprit() + "'.");
return;
} catch (InvalidACIPException iae) {
JOptionPane.showMessageDialog(this,
"The ACIP you are trying to convert is invalid:\n" + iae.getMessage());
return;
}
}
start = i+1;

View file

@ -434,11 +434,19 @@ public class Jskad extends JPanel implements DocumentListener {
JMenuItem wylieTMWItem = new JMenuItem("Convert Wylie to Tibetan");
wylieTMWItem.addActionListener(new ThdlActionListener() {
public void theRealActionPerformed(ActionEvent e) {
toTibetan();
toTibetan(false);
}
});
convertSelectionMenu.add(wylieTMWItem);
JMenuItem ACIPTMWItem = new JMenuItem("Convert ACIP to Tibetan");
ACIPTMWItem.addActionListener(new ThdlActionListener() {
public void theRealActionPerformed(ActionEvent e) {
toTibetan(true);
}
});
convertSelectionMenu.add(ACIPTMWItem);
JMenu convertAllMenu = new JMenu("Convert All");
JMenuItem toTMItem = new JMenuItem("Convert Tibetan to TM"); // DLC FIXME: do it just in the selection?
@ -1122,9 +1130,9 @@ public class Jskad extends JPanel implements DocumentListener {
dp.paste(dp.getCaret().getDot());
}
private void toTibetan() {
private void toTibetan(boolean fromACIP) {
Jskad.this.setCursor(Cursor.getPredefinedCursor(Cursor.WAIT_CURSOR));
dp.toTibetanMachineWeb();
dp.toTibetanMachineWeb(fromACIP);
Jskad.this.setCursor(Cursor.getPredefinedCursor(Cursor.DEFAULT_CURSOR));
}

View file

@ -25,6 +25,8 @@ import javax.swing.text.rtf.RTFEditorKit;
import java.io.*;
import org.thdl.util.ThdlDebug;
import org.thdl.tib.text.ttt.ACIPTshegBarScanner;
import org.thdl.tib.text.ttt.ACIPConverter;
import org.thdl.tib.text.tshegbar.LegalTshegBar;
import org.thdl.tib.text.tshegbar.UnicodeConstants;
import org.thdl.tib.text.tshegbar.UnicodeUtils;
@ -310,6 +312,44 @@ public class TibTextUtils implements THDLWylieConstants {
public static final boolean[] weDoNotCareIfThereIsCorrespondingWylieOrNot
= new boolean[] { false };
/**
* Converts a string of ACIP into TibetanMachineWeb and inserts that
* into tdoc at offset loc.
* @param acip the ACIP you want to convert
* @param tdoc the document in which to insert the TMW
* @param lco the offset inside the document at which to insert the TMW
* @throws InvalidACIPException if the ACIP is deemed invalid, i.e. if
* it does not conform to the ACIP transcription rules (those in the
* official document and the subtler rules pieced together by David
* Chandler through study and private correspondence with Robert
* Chilton) */
public static void insertTibetanMachineWebForACIP(String acip, TibetanDocument tdoc, int loc)
throws InvalidACIPException
{
StringBuffer errors = new StringBuffer();
ArrayList al = ACIPTshegBarScanner.scan(acip, errors, 500);
if (null == al || errors.length() > 0) {
if (errors.length() > 0)
throw new InvalidACIPException(errors.toString());
else
throw new InvalidACIPException("Fatal error converting ACIP to TMW.");
}
String warningLevel = "Most";
boolean colors = false;
StringBuffer warnings = null;
boolean putWarningsInOutput = false;
if ("None" != warningLevel) {
warnings = new StringBuffer();
putWarningsInOutput = true;
}
try {
ACIPConverter.convertToTMW(al, tdoc, errors, warnings,
putWarningsInOutput, warningLevel, colors, loc);
} catch (IOException e) {
throw new Error("Can't happen: " + e);
}
}
/**
* Converts a string of Extended Wylie into {@link DuffData DuffData}.
* @param wylie the Wylie you want to convert
@ -318,7 +358,7 @@ public class TibTextUtils implements THDLWylieConstants {
* @throws InvalidWylieException if the Wylie is deemed invalid,
* i.e. if it does not conform to the Extended Wylie standard
*/
public static DuffData[] getTibetanMachineWeb(String wylie) throws InvalidWylieException {
public static DuffData[] getTibetanMachineWebForEWTS(String wylie) throws InvalidWylieException {
List chars = new ArrayList();
DuffCode dc;
int start = 0;

View file

@ -226,30 +226,17 @@ public class TibetanDocument extends DefaultStyledDocument {
return insertDuff(tibetanFontSize, pos, glyphs, true, Color.BLACK);
}
/**
* Appends all DuffCodes in glyphs to the end of this document.
* @param glyphs the array of Tibetan data you want to insert
* @param color the color in which to insert, which is used if and only
* if {@link #colorsEnabled() colors are enabled}
*/
public void appendDuffCodes(DuffCode[] glyphs, Color color) {
// PERFORMANCE FIXME: this isn't so speedy, but it reuses
// existing code.
for (int i = 0; i < glyphs.length; i++) {
appendDuffCode(glyphs[i], color);
}
}
/**
* Appends glyph to the end of this document.
* @param loc the position at which to insert these glyphs
* @param glyph the Tibetan glyph you want to insert
* @param color the color in which to insert, which is used if and only
* if {@link #colorsEnabled() colors are enabled}
*/
public void appendDuffCode(DuffCode glyph, Color color) {
public void appendDuffCode(int loc, DuffCode glyph, Color color) {
// PERFORMANCE FIXME: this isn't so speedy, but it reuses
// existing code.
insertDuff(getLength(),
insertDuff(loc,
new DuffData[] { new DuffData(new String(new char[] { glyph.getCharacter() }),
glyph.getFontNum()) },
color);

View file

@ -26,11 +26,11 @@ public class TibetanHTML {
String next = tokenizer.nextToken();
if (next.equals("\t") || next.equals("\n")) {
buffer.append("<wbr/>");
buffer.append(getHTML(TibTextUtils.getTibetanMachineWeb("_")));
buffer.append(getHTML(TibTextUtils.getTibetanMachineWebForEWTS("_")));
buffer.append("<wbr/>");
}
else
buffer.append(getHTML(TibTextUtils.getTibetanMachineWeb(next)));
buffer.append(getHTML(TibTextUtils.getTibetanMachineWebForEWTS(next)));
}
return buffer.toString();
} catch (InvalidWylieException ive) {
@ -95,11 +95,11 @@ public class TibetanHTML {
String next = tokenizer.nextToken();
if (next.equals("\t") || next.equals("\n")) {
buffer.append("<wbr/>");
buffer.append(getHTML(TibTextUtils.getTibetanMachineWeb("_")));
buffer.append(getHTML(TibTextUtils.getTibetanMachineWebForEWTS("_")));
buffer.append("<wbr/>");
}
else
buffer.append(getHTML(TibTextUtils.getTibetanMachineWeb(next)));
buffer.append(getHTML(TibTextUtils.getTibetanMachineWebForEWTS(next)));
}
return buffer.toString();
} catch (InvalidWylieException ive) {
@ -163,11 +163,11 @@ public class TibetanHTML {
String next = tokenizer.nextToken();
if (next.equals("\t") || next.equals("\n")) {
buffer.append("<wbr>");
buffer.append(getHTML(TibTextUtils.getTibetanMachineWeb("_")));
buffer.append(getHTML(TibTextUtils.getTibetanMachineWebForEWTS("_")));
buffer.append("<wbr>");
}
else
buffer.append(getHTML(TibTextUtils.getTibetanMachineWeb(next)));
buffer.append(getHTML(TibTextUtils.getTibetanMachineWebForEWTS(next)));
}
return buffer.toString();
} catch (InvalidWylieException ive) {

View file

@ -148,22 +148,25 @@ public class ACIPConverter {
TibetanDocument tdoc = new TibetanDocument();
boolean rv
= convertToTMW(scan, tdoc, errors, warnings,
writeWarningsToResult, warningLevel, colors);
writeWarningsToResult, warningLevel, colors,
tdoc.getLength());
tdoc.writeRTFOutputStream(out);
return rv;
}
private static boolean convertToTMW(ArrayList scan,
TibetanDocument tdoc,
StringBuffer errors,
StringBuffer warnings,
boolean writeWarningsToResult,
String warningLevel,
boolean colors)
public static boolean convertToTMW(ArrayList scan,
TibetanDocument tdoc,
StringBuffer errors,
StringBuffer warnings,
boolean writeWarningsToResult,
String warningLevel,
boolean colors,
int loc)
throws IOException
{
return convertTo(false, scan, null, tdoc, errors, warnings,
writeWarningsToResult, warningLevel, colors);
writeWarningsToResult, warningLevel, colors,
loc, loc == tdoc.getLength());
}
/** Returns UTF-8 encoded Unicode. A bit indirect, so use this
@ -225,7 +228,7 @@ public class ACIPConverter {
throws IOException
{
return convertTo(true, scan, out, null, errors, warnings,
writeWarningsToOut, warningLevel, false);
writeWarningsToOut, warningLevel, false, -1, true);
}
private static boolean peekaheadFindsSpacesAndComma(ArrayList /* of TString */ scan,
@ -254,9 +257,12 @@ public class ACIPConverter {
StringBuffer warnings,
boolean writeWarningsToOut,
String warningLevel,
boolean colors)
boolean colors,
int tdocstart,
boolean isCleanDoc)
throws IOException
{
try {
int smallFontSize = -1;
int regularFontSize = -1;
if (null != tdoc) {
@ -297,7 +303,10 @@ public class ACIPConverter {
hasErrors = true;
String text = "[#ERROR CONVERTING ACIP DOCUMENT: Lexical error: " + s.getText() + "]";
if (null != writer) writer.write(text);
if (null != tdoc) tdoc.appendRoman(text, Color.RED);
if (null != tdoc) {
tdoc.appendRoman(tdocstart, text, Color.RED);
tdocstart += text.length();
}
} else if (stype == TString.TSHEG_BAR_ADORNMENT) {
if (lastGuyWasNonPunct) {
String err = "[#ERROR CONVERTING ACIP DOCUMENT: This converter cannot yet convert " + s.getText() + " because the converter's author is unclear what the result should be.]";
@ -314,9 +323,11 @@ public class ACIPConverter {
= ACIPRules.getWylieForACIPOther(s.getText());
if (null == wylie) {
hasErrors = true;
tdoc.appendRoman(err, Color.RED);
tdoc.appendRoman(tdocstart, err, Color.RED);
tdocstart += err.length();
} else {
tdoc.appendDuffCode(TibetanMachineWeb.getGlyph(wylie),
tdoc.appendDuffCode(tdocstart++,
TibetanMachineWeb.getGlyph(wylie),
Color.BLACK);
}
}
@ -331,7 +342,10 @@ public class ACIPConverter {
if (writeWarningsToOut) {
String text = "[#WARNING CONVERTING ACIP DOCUMENT: Lexical warning: " + s.getText() + "]";
if (null != writer) writer.write(text);
if (null != tdoc) tdoc.appendRoman(text, Color.RED);
if (null != tdoc) {
tdoc.appendRoman(tdocstart, text, Color.RED);
tdocstart += text.length();
}
}
if (null != warnings) {
@ -348,7 +362,10 @@ public class ACIPConverter {
+ s.getText()
+ ((stype == TString.FOLIO_MARKER) ? "}" : ""));
if (null != writer) writer.write(text);
if (null != tdoc) tdoc.appendRoman(text, Color.BLACK);
if (null != tdoc) {
tdoc.appendRoman(tdocstart, text, Color.BLACK);
tdocstart += text.length();
}
} else {
String unicode = null;
Object[] duff = null;
@ -362,7 +379,11 @@ public class ACIPConverter {
hasErrors = true;
String errorMessage = "[#ERROR CONVERTING ACIP DOCUMENT: THE TSHEG BAR (\"SYLLABLE\") " + s.getText() + " HAS THESE ERRORS: " + acipError + "]";
if (null != writer) writer.write(errorMessage);
if (null != tdoc) tdoc.appendRoman(errorMessage, Color.RED);
if (null != tdoc) {
tdoc.appendRoman(tdocstart, errorMessage,
Color.RED);
tdocstart += errorMessage.length();
}
if (null != errors)
errors.append(errorMessage + "\n");
} else {
@ -373,7 +394,11 @@ public class ACIPConverter {
hasErrors = true;
String errorMessage = "[#ERROR CONVERTING ACIP DOCUMENT: THE TSHEG BAR (\"SYLLABLE\") " + s.getText() + " IS ESSENTIALLY NOTHING.]";
if (null != writer) writer.write(errorMessage);
if (null != tdoc) tdoc.appendRoman(errorMessage, Color.RED);
if (null != tdoc) {
tdoc.appendRoman(tdocstart, errorMessage,
Color.RED);
tdocstart += errorMessage.length();
}
if (null != errors)
errors.append(errorMessage + "\n");
} else {
@ -384,7 +409,12 @@ public class ACIPConverter {
hasErrors = true;
String errorMessage = "[#ERROR CONVERTING ACIP DOCUMENT: THE TSHEG BAR (\"SYLLABLE\") " + s.getText() + " HAS NO LEGAL PARSES.]";
if (null != writer) writer.write(errorMessage);
if (null != tdoc) tdoc.appendRoman(errorMessage, Color.RED);
if (null != tdoc) {
tdoc.appendRoman(tdocstart,
errorMessage,
Color.RED);
tdocstart += errorMessage.length();
}
if (null != errors)
errors.append(errorMessage + "\n");
} else {
@ -420,7 +450,12 @@ public class ACIPConverter {
= ("[#WARNING CONVERTING ACIP DOCUMENT: "
+ warning + "]");
if (null != writer) writer.write(text);
if (null != tdoc) tdoc.appendRoman(text, Color.RED);
if (null != tdoc) {
tdoc.appendRoman(tdocstart,
text,
Color.RED);
tdocstart += text.length();
}
}
if (null != warnings) {
warnings.append(warning);
@ -494,7 +529,11 @@ public class ACIPConverter {
done = true;
}
if (null != tdoc) {
tdoc.appendRoman(" ", Color.BLACK);
String x = " ";
tdoc.appendRoman(tdocstart,
x,
Color.BLACK);
tdocstart += x.length();
continue;
}
// DLC AM I DOING THIS? By normal Tibetan & Dzongkha spelling, writing, and input rules
@ -511,7 +550,8 @@ public class ACIPConverter {
&& lpl.get(0).getLeft().equals("NG")) {
DuffCode tshegDuff = TibetanMachineWeb.getGlyph(" ");
if (null == tshegDuff) throw new Error("tsheg duff");
tdoc.appendDuffCode(tshegDuff, lastColor);
tdoc.appendDuffCode(tdocstart++,
tshegDuff, lastColor);
}
if (!done) {
@ -521,7 +561,9 @@ public class ACIPConverter {
|| s.getText().equals("\t")
|| s.getText().equals("\n")
|| s.getText().equals("\r\n")) {
tdoc.appendRoman(s.getText(), Color.BLACK);
tdoc.appendRoman(tdocstart, s.getText(),
Color.BLACK);
tdocstart += s.getText().length();
continue;
} else {
String wy = ACIPRules.getWylieForACIPOther(s.getText());
@ -555,14 +597,17 @@ public class ACIPConverter {
if (null != duff && 0 != duff.length) {
for (int j = 0; j < duff.length; j++) {
if (duff[j] instanceof DuffCode)
tdoc.appendDuffCode((DuffCode)duff[j],
tdoc.appendDuffCode(tdocstart++,
(DuffCode)duff[j],
color);
else {
hasErrors = true;
if (null != errors)
errors.append((String)duff[j] + "\n");
tdoc.appendRoman((String)duff[j],
tdoc.appendRoman(tdocstart,
(String)duff[j],
Color.RED);
tdocstart += ((String)duff[j]).length();
}
}
} else {
@ -579,6 +624,11 @@ public class ACIPConverter {
if (null != writer) {
writer.close();
}
if (isCleanDoc && null != tdoc && tdocstart != tdoc.getLength())
throw new Error("Oops -- we dropped something from the output! tdocstart++; and tdocstart+=xyz; are not being used correctly.");
return !hasErrors;
} catch (javax.swing.text.BadLocationException e) {
throw new IllegalArgumentException("tdocstart is no good: " + tdocstart);
}
}
}

View file

@ -154,7 +154,7 @@ public class ACIPTshegBarScanner {
al.add(new TString("Found an open bracket within a [#COMMENT]-style comment. Brackets may not appear in comments.\n",
TString.ERROR));
if (null != errors)
errors.append("Offset " + i + " or maybe " + (i-numNewlines) + ": "
errors.append("Offset " + i + ((numNewlines == 0) ? "" : (" or maybe " + (i-numNewlines))) + ": "
+ "Found an open bracket within a [#COMMENT]-style comment. Brackets may not appear in comments.\n");
if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
}
@ -173,7 +173,7 @@ public class ACIPTshegBarScanner {
al.add(new TString("Found a truly unmatched close bracket, " + s.substring(i, i+1),
TString.ERROR));
if (null != errors) {
errors.append("Offset " + i + " or maybe " + (i-numNewlines) + ": "
errors.append("Offset " + i + ((numNewlines == 0) ? "" : (" or maybe " + (i-numNewlines))) + ": "
+ "Found a truly unmatched close bracket, ] or }.\n");
}
if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
@ -182,7 +182,7 @@ public class ACIPTshegBarScanner {
al.add(new TString("Found a closing bracket without a matching open bracket. Perhaps a [#COMMENT] incorrectly written as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], caused this.",
TString.ERROR));
if (null != errors)
errors.append("Offset " + i + " or maybe " + (i-numNewlines) + ": "
errors.append("Offset " + i + ((numNewlines == 0) ? "" : (" or maybe " + (i-numNewlines))) + ": "
+ "Found a closing bracket without a matching open bracket. Perhaps a [#COMMENT] incorrectly written as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], caused this.\n");
if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
startOfString = i+1;
@ -438,7 +438,7 @@ public class ACIPTshegBarScanner {
al.add(new TString("Found a truly unmatched open bracket, [ or {, prior to this current illegal open bracket.",
TString.ERROR));
if (null != errors) {
errors.append("Offset " + i + " or maybe " + (i-numNewlines) + ": "
errors.append("Offset " + i + ((numNewlines == 0) ? "" : (" or maybe " + (i-numNewlines))) + ": "
+ "Found a truly unmatched open bracket, [ or {, prior to this current illegal open bracket.\n");
}
if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
@ -457,7 +457,7 @@ public class ACIPTshegBarScanner {
}
al.add(new TString("Found an illegal open bracket (in context, this is " + inContext + "). Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open bracket?",
TString.ERROR));
errors.append("Offset " + i + " or maybe " + (i-numNewlines) + ": "
errors.append("Offset " + i + ((numNewlines == 0) ? "" : (" or maybe " + (i-numNewlines))) + ": "
+ "Found an illegal open bracket (in context, this is " + inContext + "). Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open bracket?\n");
if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
}
@ -512,7 +512,7 @@ public class ACIPTshegBarScanner {
al.add(new TString("Found an illegal at sign, @ (in context, this is " + inContext + "). This folio marker has a period, '.', at the end of it, which is illegal.",
TString.ERROR));
if (null != errors)
errors.append("Offset " + i + " or maybe " + (i-numNewlines) + ": "
errors.append("Offset " + i + ((numNewlines == 0) ? "" : (" or maybe " + (i-numNewlines))) + ": "
+ "Found an illegal at sign, @ (in context, this is " + inContext + "). This folio marker has a period, '.', at the end of it, which is illegal.\n");
if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
startOfString = i+numdigits+3;
@ -534,7 +534,7 @@ public class ACIPTshegBarScanner {
al.add(new TString("Found an illegal at sign, @ (in context, this is " + inContext + "). This folio marker is not followed by whitespace, as is expected.",
TString.ERROR));
if (null != errors)
errors.append("Offset " + i + " or maybe " + (i-numNewlines) + ": "
errors.append("Offset " + i + ((numNewlines == 0) ? "" : (" or maybe " + (i-numNewlines))) + ": "
+ "Found an illegal at sign, @ (in context, this is " + inContext + "). This folio marker is not followed by whitespace, as is expected.\n");
if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
startOfString = i+1; // DLC FIXME: skip over more?
@ -630,7 +630,7 @@ public class ACIPTshegBarScanner {
al.add(new TString("Found an illegal at sign, @ (in context, this is " + inContext + "). @012B is an example of a legal folio marker.",
TString.ERROR));
if (null != errors)
errors.append("Offset " + i + " or maybe " + (i-numNewlines) + ": "
errors.append("Offset " + i + ((numNewlines == 0) ? "" : (" or maybe " + (i-numNewlines))) + ": "
+ "Found an illegal at sign, @ (in context, this is " + inContext + "). @012B is an example of a legal folio marker.\n");
if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
startOfString = i+1;
@ -656,7 +656,7 @@ public class ACIPTshegBarScanner {
al.add(new TString("Found //, which could be legal (the Unicode would be \\u0F3C\\u0F3D), but is likely in an illegal construct like //NYA\\\\.",
TString.ERROR));
if (errors != null) {
errors.append("Offset " + i + " or maybe " + (i-numNewlines) + ": "
errors.append("Offset " + i + ((numNewlines == 0) ? "" : (" or maybe " + (i-numNewlines))) + ": "
+ "Found //, which could be legal (the Unicode would be \\u0F3C\\u0F3D), but is likely in an illegal construct like //NYA\\\\.\n");
}
if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
@ -692,7 +692,7 @@ public class ACIPTshegBarScanner {
al.add(new TString("Found an illegal open parenthesis, (. Nesting of parentheses is not allowed.",
TString.ERROR));
if (null != errors)
errors.append("Offset " + i + " or maybe " + (i-numNewlines) + ": "
errors.append("Offset " + i + ((numNewlines == 0) ? "" : (" or maybe " + (i-numNewlines))) + ": "
+ "Found an illegal open parenthesis, (. Nesting of parentheses is not allowed.\n");
if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
} else {
@ -706,7 +706,7 @@ public class ACIPTshegBarScanner {
al.add(new TString("Unexpected closing parenthesis, ), found.",
TString.ERROR));
if (null != errors)
errors.append("Offset " + i + " or maybe " + (i-numNewlines) + ": "
errors.append("Offset " + i + ((numNewlines == 0) ? "" : (" or maybe " + (i-numNewlines))) + ": "
+ "Unexpected closing parenthesis, ), found.\n");
if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
} else {
@ -851,19 +851,19 @@ public class ACIPTshegBarScanner {
al.add(new TString("Found an illegal, unprintable character.",
TString.ERROR));
if (null != errors)
errors.append("Offset " + i + " or maybe " + (i-numNewlines) + ": "
errors.append("Offset " + i + ((numNewlines == 0) ? "" : (" or maybe " + (i-numNewlines))) + ": "
+ "Found an illegal, unprintable character.\n");
} else if ('\\' == ch) {
al.add(new TString("Found a Sanskrit virama, \\, but the converter currently doesn't treat these properly. Sorry! Please do complain to the maintainers.",
TString.ERROR));
if (null != errors)
errors.append("Offset " + i + " or maybe " + (i-numNewlines) + ": "
errors.append("Offset " + i + ((numNewlines == 0) ? "" : (" or maybe " + (i-numNewlines))) + ": "
+ "Found a Sanskrit virama, \\, but the converter currently doesn't treat these properly. Sorry! Please do complain to the maintainers.\n");
} else {
al.add(new TString("Found an illegal character, " + ch + ", with ordinal " + (int)ch + ".",
TString.ERROR));
if (null != errors)
errors.append("Offset " + i + " or maybe " + (i-numNewlines) + ": "
errors.append("Offset " + i + ((numNewlines == 0) ? "" : (" or maybe " + (i-numNewlines))) + ": "
+ "Found an illegal character, " + ch + ", with ordinal " + (int)ch + ".\n");
}
if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;

View file

@ -7203,28 +7203,28 @@ tstHelper("ZUR");
shelp("", "", "[]");
shelp("[DD]", "");
shelp("[",
"Offset 0 or maybe 0: Found an illegal open bracket (in context, this is [). Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open bracket?\nOffset END: Truly unmatched open bracket found.\n");
"Offset 0: Found an illegal open bracket (in context, this is [). Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open bracket?\nOffset END: Truly unmatched open bracket found.\n");
shelp("{",
"Offset 0 or maybe 0: Found an illegal open bracket (in context, this is {). Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open bracket?\nOffset END: Truly unmatched open bracket found.\n");
"Offset 0: Found an illegal open bracket (in context, this is {). Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open bracket?\nOffset END: Truly unmatched open bracket found.\n");
shelp("DD", "");
shelp("DD]",
"Offset 2 or maybe 2: Found a truly unmatched close bracket, ] or }.\nOffset 2 or maybe 2: Found a closing bracket without a matching open bracket. Perhaps a [#COMMENT] incorrectly written as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], caused this.\n");
"Offset 2: Found a truly unmatched close bracket, ] or }.\nOffset 2: Found a closing bracket without a matching open bracket. Perhaps a [#COMMENT] incorrectly written as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], caused this.\n");
shelp("///NYA", "Offset 1 or maybe 1: Found //, which could be legal (the Unicode would be \\u0F3C\\u0F3D), but is likely in an illegal construct like //NYA\\\\.\nOffset END: Slashes are supposed to occur in pairs, but the input had an unmatched '/' character.\n");
shelp("///NYA", "Offset 1: Found //, which could be legal (the Unicode would be \\u0F3C\\u0F3D), but is likely in an illegal construct like //NYA\\\\.\nOffset END: Slashes are supposed to occur in pairs, but the input had an unmatched '/' character.\n");
shelp("/NYA/", "");
shelp("[?][BP][LS][DD1][DD2][DDD][DR][# (<{A COMMENT)}>]", "");
shelp("[LS][# A [[[[[COMMENT][LS]",
"Offset 9 or maybe 9: Found an open bracket within a [#COMMENT]-style comment. Brackets may not appear in comments.\n"
+ "Offset 10 or maybe 10: Found an open bracket within a [#COMMENT]-style comment. Brackets may not appear in comments.\n"
+ "Offset 11 or maybe 11: Found an open bracket within a [#COMMENT]-style comment. Brackets may not appear in comments.\n"
+ "Offset 12 or maybe 12: Found an open bracket within a [#COMMENT]-style comment. Brackets may not appear in comments.\n"
+ "Offset 13 or maybe 13: Found an open bracket within a [#COMMENT]-style comment. Brackets may not appear in comments.\n");
"Offset 9: Found an open bracket within a [#COMMENT]-style comment. Brackets may not appear in comments.\n"
+ "Offset 10: Found an open bracket within a [#COMMENT]-style comment. Brackets may not appear in comments.\n"
+ "Offset 11: Found an open bracket within a [#COMMENT]-style comment. Brackets may not appear in comments.\n"
+ "Offset 12: Found an open bracket within a [#COMMENT]-style comment. Brackets may not appear in comments.\n"
+ "Offset 13: Found an open bracket within a [#COMMENT]-style comment. Brackets may not appear in comments.\n");
shelp("[ILLEGAL COMMENT]",
"Offset 0 or maybe 0: Found an illegal open bracket (in context, this is [ILLEGAL C...). Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open bracket?\nOffset 16 or maybe 16: Found a closing bracket without a matching open bracket. Perhaps a [#COMMENT] incorrectly written as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], caused this.\n");
"Offset 0: Found an illegal open bracket (in context, this is [ILLEGAL C...). Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open bracket?\nOffset 16: Found a closing bracket without a matching open bracket. Perhaps a [#COMMENT] incorrectly written as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], caused this.\n");
shelp("(BSKYABS GRO)", "");
shelp("BSKYABS GRO)", "Offset 11 or maybe 11: Unexpected closing parenthesis, ), found.\n");
shelp("BSKYABS GRO)", "Offset 11: Unexpected closing parenthesis, ), found.\n");
shelp("BSKYABS GRO(", "Offset END: Unmatched open parenthesis, (, found.\n");
shelp("((NESTAGE))", "Offset 1 or maybe 1: Found an illegal open parenthesis, (. Nesting of parentheses is not allowed.\nOffset 10 or maybe 10: Unexpected closing parenthesis, ), found.\n");
shelp("((NESTAGE))", "Offset 1: Found an illegal open parenthesis, (. Nesting of parentheses is not allowed.\nOffset 10: Unexpected closing parenthesis, ), found.\n");
shelp("(BA)(PA)NYA(CA)", "");
shelp("NYAx", "");
shelp("NYA x", "");
@ -7253,9 +7253,9 @@ tstHelper("ZUR");
shelp("(NYA ", "Offset END: Unmatched open parenthesis, (, found.\n");
shelp("[*NYA ", "Offset END: Unmatched open bracket found. A correction does not terminate.\n");
shelp("?", "", "[QUESTION:{?}]");
shelp("KHAN~ BAR ", "Offset 4 or maybe 4: Found an illegal character, ~, with ordinal 126.\n");
shelp("KHAN~ BAR ", "Offset 4: Found an illegal character, ~, with ordinal 126.\n");
shelp("[* Correction with []]",
"Offset 5 or maybe 5: Found an illegal character, r, with ordinal 114.\nOffset 6 or maybe 6: Found an illegal character, r, with ordinal 114.\nOffset 7 or maybe 7: Found an illegal character, e, with ordinal 101.\nOffset 8 or maybe 8: Found an illegal character, c, with ordinal 99.\nOffset 14 or maybe 14: Found an illegal character, w, with ordinal 119.\nOffset 19 or maybe 19: Found an illegal open bracket (in context, this is []]). Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open bracket?\nOffset 21 or maybe 21: Found a closing bracket without a matching open bracket. Perhaps a [#COMMENT] incorrectly written as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], caused this.\n");
"Offset 5: Found an illegal character, r, with ordinal 114.\nOffset 6: Found an illegal character, r, with ordinal 114.\nOffset 7: Found an illegal character, e, with ordinal 101.\nOffset 8: Found an illegal character, c, with ordinal 99.\nOffset 14: Found an illegal character, w, with ordinal 119.\nOffset 19: Found an illegal open bracket (in context, this is []]). Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open bracket?\nOffset 21: Found a closing bracket without a matching open bracket. Perhaps a [#COMMENT] incorrectly written as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], caused this.\n");
// DLC DOC: the line SDIG PA'I GROGS PO'I LAG TU SON PAR 'GYUR PA is followed by a blank line. Note that it's "PA", not "PA ", ending it. We autocorrect to the latter.
@ -7271,7 +7271,7 @@ tstHelper("ZUR");
uhelp(" 1\\ ", "\u0f0b\u0f21\u0f84\u0f0b");
}
shelp("K\\,",
"Offset 1 or maybe 1: Found a Sanskrit virama, \\, but the converter currently doesn't treat these properly. Sorry! Please do complain to the maintainers.\n",
"Offset 1: Found a Sanskrit virama, \\, but the converter currently doesn't treat these properly. Sorry! Please do complain to the maintainers.\n",
"[TIBETAN_NON_PUNCTUATION:{K}, ERROR:{Found a Sanskrit virama, \\, but the converter currently doesn't treat these properly. Sorry! Please do complain to the maintainers.}, TIBETAN_PUNCTUATION:{,}]");
@ -7315,14 +7315,14 @@ tstHelper("ZUR");
shelp("@01A.3 ", "", "[FOLIO_MARKER:{@01A.3}, TIBETAN_PUNCTUATION:{ }]");
shelp("@001 ", "", "[FOLIO_MARKER:{@001}, TIBETAN_PUNCTUATION:{ }]");
shelp("@19-20A",
"Offset 0 or maybe 0: Found an illegal at sign, @ (in context, this is @19-20A). @012B is an example of a legal folio marker.\n",
"Offset 0: Found an illegal at sign, @ (in context, this is @19-20A). @012B is an example of a legal folio marker.\n",
"[ERROR:{Found an illegal at sign, @ (in context, this is @19-20A). @012B is an example of a legal folio marker.}, TIBETAN_NON_PUNCTUATION:{19-20A}]"); // DLC FIXME: yes it occurs in the kangyur.
shelp("@[7B]", "");
shelp("@012A.3KA",
"",
"[FOLIO_MARKER:{@012A.3}, TIBETAN_NON_PUNCTUATION:{KA}]");
shelp("@012A.34",
"Offset 0 or maybe 0: Found an illegal at sign, @ (in context, this is @012A.34). This folio marker has a period, '.', at the end of it, which is illegal.\n",
"Offset 0: Found an illegal at sign, @ (in context, this is @012A.34). This folio marker has a period, '.', at the end of it, which is illegal.\n",
"[ERROR:{Found an illegal at sign, @ (in context, this is @012A.34). This folio marker has a period, '.', at the end of it, which is illegal.}, TIBETAN_NON_PUNCTUATION:{34}]");
shelp("@[07B]", "");
shelp("@[00007B]", "");
@ -7339,7 +7339,7 @@ tstHelper("ZUR");
shelp("{ DD }", "", "[DD:{{ DD }}]"); // TD3790E2.ACT
shelp("{ BP }", "", "[BP:{{ BP }}]"); // TD3790E2.ACT
shelp("//NYA\\\\",
"Offset 1 or maybe 1: Found //, which could be legal (the Unicode would be \\u0F3C\\u0F3D), but is likely in an illegal construct like //NYA\\\\.\nOffset 5 or maybe 5: Found a Sanskrit virama, \\, but the converter currently doesn't treat these properly. Sorry! Please do complain to the maintainers.\nOffset 6 or maybe 6: Found a Sanskrit virama, \\, but the converter currently doesn't treat these properly. Sorry! Please do complain to the maintainers.\n",
"Offset 1: Found //, which could be legal (the Unicode would be \\u0F3C\\u0F3D), but is likely in an illegal construct like //NYA\\\\.\nOffset 5: Found a Sanskrit virama, \\, but the converter currently doesn't treat these properly. Sorry! Please do complain to the maintainers.\nOffset 6: Found a Sanskrit virama, \\, but the converter currently doesn't treat these properly. Sorry! Please do complain to the maintainers.\n",
"[START_SLASH:{/}, ERROR:{Found //, which could be legal (the Unicode would be \\u0F3C\\u0F3D), but is likely in an illegal construct like //NYA\\\\.}, END_SLASH:{/}, TIBETAN_NON_PUNCTUATION:{NYA}, ERROR:{Found a Sanskrit virama, \\, but the converter currently doesn't treat these properly. Sorry! Please do complain to the maintainers.}, ERROR:{Found a Sanskrit virama, \\, but the converter currently doesn't treat these properly. Sorry! Please do complain to the maintainers.}]");
}