Jskad has a new feature: Convert Selection from ACIP to Tibetan. It uses the ACIP converter to do its work.
Improved some error messages from the ACIP->Tibetan converter.
This commit is contained in:
parent
5ce84d4d9a
commit
4b1395e0ba
8 changed files with 193 additions and 94 deletions
|
@ -1549,7 +1549,7 @@ public void paste(int offset) {
|
|||
ThdlDebug.noteIffyCode();
|
||||
}
|
||||
} else {
|
||||
DuffData[] dd = TibTextUtils.getTibetanMachineWeb(next);
|
||||
DuffData[] dd = TibTextUtils.getTibetanMachineWebForEWTS(next);
|
||||
offset = getTibDoc().insertDuff(offset, dd);
|
||||
}
|
||||
}
|
||||
|
@ -1563,21 +1563,24 @@ public void paste(int offset) {
|
|||
}
|
||||
|
||||
/**
|
||||
* Converts the currently selected text from Extended Wylie to TibetanMachineWeb.
|
||||
*/
|
||||
public void toTibetanMachineWeb() {
|
||||
* Converts the currently selected text from Roman transliteration to
|
||||
* TibetanMachineWeb.
|
||||
* @param fromACIP true if the selection is ACIP, false if it is EWTS
|
||||
* */
|
||||
public void toTibetanMachineWeb(boolean fromACIP) {
|
||||
int start = getSelectionStart();
|
||||
int end = getSelectionEnd();
|
||||
|
||||
toTibetanMachineWeb(start, end);
|
||||
toTibetanMachineWeb(fromACIP, start, end);
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts a stretch of text from Extended Wylie to TibetanMachineWeb.
|
||||
* @param fromACIP true if the selection is ACIP, false if it is EWTS
|
||||
* @param start the begin point for the conversion
|
||||
* @param end the end point for the conversion
|
||||
*/
|
||||
public void toTibetanMachineWeb(int start, int end) {
|
||||
public void toTibetanMachineWeb(boolean fromACIP, int start, int end) {
|
||||
if (start == end)
|
||||
return;
|
||||
|
||||
|
@ -1599,17 +1602,28 @@ public void paste(int offset) {
|
|||
if ((0 != TibetanMachineWeb.getTMWFontNumber(fontName)) || i==endPos.getOffset()) {
|
||||
if (i != start) {
|
||||
try {
|
||||
DuffData[] duffdata = TibTextUtils.getTibetanMachineWeb(sb.toString());
|
||||
getTibDoc().remove(start, i-start);
|
||||
getTibDoc().insertDuff(start, duffdata);
|
||||
}
|
||||
catch (InvalidWylieException iwe) {
|
||||
DuffData[] duffdata = null;
|
||||
if (fromACIP) {
|
||||
getTibDoc().remove(start, i-start);
|
||||
TibTextUtils.insertTibetanMachineWebForACIP(sb.toString(), getTibDoc(), start);
|
||||
}
|
||||
else
|
||||
duffdata = TibTextUtils.getTibetanMachineWebForEWTS(sb.toString());
|
||||
if (!fromACIP) {
|
||||
getTibDoc().remove(start, i-start);
|
||||
getTibDoc().insertDuff(start, duffdata);
|
||||
}
|
||||
} catch (InvalidWylieException iwe) {
|
||||
JOptionPane.showMessageDialog(this,
|
||||
"The Wylie you are trying to convert is invalid, " +
|
||||
"beginning from:\n " + iwe.getCulpritInContext() +
|
||||
"\nThe culprit is probably the character '" +
|
||||
iwe.getCulprit() + "'.");
|
||||
return;
|
||||
} catch (InvalidACIPException iae) {
|
||||
JOptionPane.showMessageDialog(this,
|
||||
"The ACIP you are trying to convert is invalid:\n" + iae.getMessage());
|
||||
return;
|
||||
}
|
||||
}
|
||||
start = i+1;
|
||||
|
|
|
@ -434,11 +434,19 @@ public class Jskad extends JPanel implements DocumentListener {
|
|||
JMenuItem wylieTMWItem = new JMenuItem("Convert Wylie to Tibetan");
|
||||
wylieTMWItem.addActionListener(new ThdlActionListener() {
|
||||
public void theRealActionPerformed(ActionEvent e) {
|
||||
toTibetan();
|
||||
toTibetan(false);
|
||||
}
|
||||
});
|
||||
convertSelectionMenu.add(wylieTMWItem);
|
||||
|
||||
JMenuItem ACIPTMWItem = new JMenuItem("Convert ACIP to Tibetan");
|
||||
ACIPTMWItem.addActionListener(new ThdlActionListener() {
|
||||
public void theRealActionPerformed(ActionEvent e) {
|
||||
toTibetan(true);
|
||||
}
|
||||
});
|
||||
convertSelectionMenu.add(ACIPTMWItem);
|
||||
|
||||
JMenu convertAllMenu = new JMenu("Convert All");
|
||||
|
||||
JMenuItem toTMItem = new JMenuItem("Convert Tibetan to TM"); // DLC FIXME: do it just in the selection?
|
||||
|
@ -1122,9 +1130,9 @@ public class Jskad extends JPanel implements DocumentListener {
|
|||
dp.paste(dp.getCaret().getDot());
|
||||
}
|
||||
|
||||
private void toTibetan() {
|
||||
private void toTibetan(boolean fromACIP) {
|
||||
Jskad.this.setCursor(Cursor.getPredefinedCursor(Cursor.WAIT_CURSOR));
|
||||
dp.toTibetanMachineWeb();
|
||||
dp.toTibetanMachineWeb(fromACIP);
|
||||
Jskad.this.setCursor(Cursor.getPredefinedCursor(Cursor.DEFAULT_CURSOR));
|
||||
}
|
||||
|
||||
|
|
|
@ -25,6 +25,8 @@ import javax.swing.text.rtf.RTFEditorKit;
|
|||
import java.io.*;
|
||||
|
||||
import org.thdl.util.ThdlDebug;
|
||||
import org.thdl.tib.text.ttt.ACIPTshegBarScanner;
|
||||
import org.thdl.tib.text.ttt.ACIPConverter;
|
||||
import org.thdl.tib.text.tshegbar.LegalTshegBar;
|
||||
import org.thdl.tib.text.tshegbar.UnicodeConstants;
|
||||
import org.thdl.tib.text.tshegbar.UnicodeUtils;
|
||||
|
@ -310,6 +312,44 @@ public class TibTextUtils implements THDLWylieConstants {
|
|||
public static final boolean[] weDoNotCareIfThereIsCorrespondingWylieOrNot
|
||||
= new boolean[] { false };
|
||||
|
||||
/**
|
||||
* Converts a string of ACIP into TibetanMachineWeb and inserts that
|
||||
* into tdoc at offset loc.
|
||||
* @param acip the ACIP you want to convert
|
||||
* @param tdoc the document in which to insert the TMW
|
||||
* @param lco the offset inside the document at which to insert the TMW
|
||||
* @throws InvalidACIPException if the ACIP is deemed invalid, i.e. if
|
||||
* it does not conform to the ACIP transcription rules (those in the
|
||||
* official document and the subtler rules pieced together by David
|
||||
* Chandler through study and private correspondence with Robert
|
||||
* Chilton) */
|
||||
public static void insertTibetanMachineWebForACIP(String acip, TibetanDocument tdoc, int loc)
|
||||
throws InvalidACIPException
|
||||
{
|
||||
StringBuffer errors = new StringBuffer();
|
||||
ArrayList al = ACIPTshegBarScanner.scan(acip, errors, 500);
|
||||
if (null == al || errors.length() > 0) {
|
||||
if (errors.length() > 0)
|
||||
throw new InvalidACIPException(errors.toString());
|
||||
else
|
||||
throw new InvalidACIPException("Fatal error converting ACIP to TMW.");
|
||||
}
|
||||
String warningLevel = "Most";
|
||||
boolean colors = false;
|
||||
StringBuffer warnings = null;
|
||||
boolean putWarningsInOutput = false;
|
||||
if ("None" != warningLevel) {
|
||||
warnings = new StringBuffer();
|
||||
putWarningsInOutput = true;
|
||||
}
|
||||
try {
|
||||
ACIPConverter.convertToTMW(al, tdoc, errors, warnings,
|
||||
putWarningsInOutput, warningLevel, colors, loc);
|
||||
} catch (IOException e) {
|
||||
throw new Error("Can't happen: " + e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts a string of Extended Wylie into {@link DuffData DuffData}.
|
||||
* @param wylie the Wylie you want to convert
|
||||
|
@ -318,7 +358,7 @@ public class TibTextUtils implements THDLWylieConstants {
|
|||
* @throws InvalidWylieException if the Wylie is deemed invalid,
|
||||
* i.e. if it does not conform to the Extended Wylie standard
|
||||
*/
|
||||
public static DuffData[] getTibetanMachineWeb(String wylie) throws InvalidWylieException {
|
||||
public static DuffData[] getTibetanMachineWebForEWTS(String wylie) throws InvalidWylieException {
|
||||
List chars = new ArrayList();
|
||||
DuffCode dc;
|
||||
int start = 0;
|
||||
|
|
|
@ -226,30 +226,17 @@ public class TibetanDocument extends DefaultStyledDocument {
|
|||
return insertDuff(tibetanFontSize, pos, glyphs, true, Color.BLACK);
|
||||
}
|
||||
|
||||
/**
|
||||
* Appends all DuffCodes in glyphs to the end of this document.
|
||||
* @param glyphs the array of Tibetan data you want to insert
|
||||
* @param color the color in which to insert, which is used if and only
|
||||
* if {@link #colorsEnabled() colors are enabled}
|
||||
*/
|
||||
public void appendDuffCodes(DuffCode[] glyphs, Color color) {
|
||||
// PERFORMANCE FIXME: this isn't so speedy, but it reuses
|
||||
// existing code.
|
||||
for (int i = 0; i < glyphs.length; i++) {
|
||||
appendDuffCode(glyphs[i], color);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Appends glyph to the end of this document.
|
||||
* @param loc the position at which to insert these glyphs
|
||||
* @param glyph the Tibetan glyph you want to insert
|
||||
* @param color the color in which to insert, which is used if and only
|
||||
* if {@link #colorsEnabled() colors are enabled}
|
||||
*/
|
||||
public void appendDuffCode(DuffCode glyph, Color color) {
|
||||
public void appendDuffCode(int loc, DuffCode glyph, Color color) {
|
||||
// PERFORMANCE FIXME: this isn't so speedy, but it reuses
|
||||
// existing code.
|
||||
insertDuff(getLength(),
|
||||
insertDuff(loc,
|
||||
new DuffData[] { new DuffData(new String(new char[] { glyph.getCharacter() }),
|
||||
glyph.getFontNum()) },
|
||||
color);
|
||||
|
|
|
@ -26,11 +26,11 @@ public class TibetanHTML {
|
|||
String next = tokenizer.nextToken();
|
||||
if (next.equals("\t") || next.equals("\n")) {
|
||||
buffer.append("<wbr/>");
|
||||
buffer.append(getHTML(TibTextUtils.getTibetanMachineWeb("_")));
|
||||
buffer.append(getHTML(TibTextUtils.getTibetanMachineWebForEWTS("_")));
|
||||
buffer.append("<wbr/>");
|
||||
}
|
||||
else
|
||||
buffer.append(getHTML(TibTextUtils.getTibetanMachineWeb(next)));
|
||||
buffer.append(getHTML(TibTextUtils.getTibetanMachineWebForEWTS(next)));
|
||||
}
|
||||
return buffer.toString();
|
||||
} catch (InvalidWylieException ive) {
|
||||
|
@ -95,11 +95,11 @@ public class TibetanHTML {
|
|||
String next = tokenizer.nextToken();
|
||||
if (next.equals("\t") || next.equals("\n")) {
|
||||
buffer.append("<wbr/>");
|
||||
buffer.append(getHTML(TibTextUtils.getTibetanMachineWeb("_")));
|
||||
buffer.append(getHTML(TibTextUtils.getTibetanMachineWebForEWTS("_")));
|
||||
buffer.append("<wbr/>");
|
||||
}
|
||||
else
|
||||
buffer.append(getHTML(TibTextUtils.getTibetanMachineWeb(next)));
|
||||
buffer.append(getHTML(TibTextUtils.getTibetanMachineWebForEWTS(next)));
|
||||
}
|
||||
return buffer.toString();
|
||||
} catch (InvalidWylieException ive) {
|
||||
|
@ -163,11 +163,11 @@ public class TibetanHTML {
|
|||
String next = tokenizer.nextToken();
|
||||
if (next.equals("\t") || next.equals("\n")) {
|
||||
buffer.append("<wbr>");
|
||||
buffer.append(getHTML(TibTextUtils.getTibetanMachineWeb("_")));
|
||||
buffer.append(getHTML(TibTextUtils.getTibetanMachineWebForEWTS("_")));
|
||||
buffer.append("<wbr>");
|
||||
}
|
||||
else
|
||||
buffer.append(getHTML(TibTextUtils.getTibetanMachineWeb(next)));
|
||||
buffer.append(getHTML(TibTextUtils.getTibetanMachineWebForEWTS(next)));
|
||||
}
|
||||
return buffer.toString();
|
||||
} catch (InvalidWylieException ive) {
|
||||
|
|
|
@ -148,22 +148,25 @@ public class ACIPConverter {
|
|||
TibetanDocument tdoc = new TibetanDocument();
|
||||
boolean rv
|
||||
= convertToTMW(scan, tdoc, errors, warnings,
|
||||
writeWarningsToResult, warningLevel, colors);
|
||||
writeWarningsToResult, warningLevel, colors,
|
||||
tdoc.getLength());
|
||||
tdoc.writeRTFOutputStream(out);
|
||||
return rv;
|
||||
}
|
||||
|
||||
private static boolean convertToTMW(ArrayList scan,
|
||||
TibetanDocument tdoc,
|
||||
StringBuffer errors,
|
||||
StringBuffer warnings,
|
||||
boolean writeWarningsToResult,
|
||||
String warningLevel,
|
||||
boolean colors)
|
||||
public static boolean convertToTMW(ArrayList scan,
|
||||
TibetanDocument tdoc,
|
||||
StringBuffer errors,
|
||||
StringBuffer warnings,
|
||||
boolean writeWarningsToResult,
|
||||
String warningLevel,
|
||||
boolean colors,
|
||||
int loc)
|
||||
throws IOException
|
||||
{
|
||||
return convertTo(false, scan, null, tdoc, errors, warnings,
|
||||
writeWarningsToResult, warningLevel, colors);
|
||||
writeWarningsToResult, warningLevel, colors,
|
||||
loc, loc == tdoc.getLength());
|
||||
}
|
||||
|
||||
/** Returns UTF-8 encoded Unicode. A bit indirect, so use this
|
||||
|
@ -225,7 +228,7 @@ public class ACIPConverter {
|
|||
throws IOException
|
||||
{
|
||||
return convertTo(true, scan, out, null, errors, warnings,
|
||||
writeWarningsToOut, warningLevel, false);
|
||||
writeWarningsToOut, warningLevel, false, -1, true);
|
||||
}
|
||||
|
||||
private static boolean peekaheadFindsSpacesAndComma(ArrayList /* of TString */ scan,
|
||||
|
@ -254,9 +257,12 @@ public class ACIPConverter {
|
|||
StringBuffer warnings,
|
||||
boolean writeWarningsToOut,
|
||||
String warningLevel,
|
||||
boolean colors)
|
||||
boolean colors,
|
||||
int tdocstart,
|
||||
boolean isCleanDoc)
|
||||
throws IOException
|
||||
{
|
||||
try {
|
||||
int smallFontSize = -1;
|
||||
int regularFontSize = -1;
|
||||
if (null != tdoc) {
|
||||
|
@ -297,7 +303,10 @@ public class ACIPConverter {
|
|||
hasErrors = true;
|
||||
String text = "[#ERROR CONVERTING ACIP DOCUMENT: Lexical error: " + s.getText() + "]";
|
||||
if (null != writer) writer.write(text);
|
||||
if (null != tdoc) tdoc.appendRoman(text, Color.RED);
|
||||
if (null != tdoc) {
|
||||
tdoc.appendRoman(tdocstart, text, Color.RED);
|
||||
tdocstart += text.length();
|
||||
}
|
||||
} else if (stype == TString.TSHEG_BAR_ADORNMENT) {
|
||||
if (lastGuyWasNonPunct) {
|
||||
String err = "[#ERROR CONVERTING ACIP DOCUMENT: This converter cannot yet convert " + s.getText() + " because the converter's author is unclear what the result should be.]";
|
||||
|
@ -314,9 +323,11 @@ public class ACIPConverter {
|
|||
= ACIPRules.getWylieForACIPOther(s.getText());
|
||||
if (null == wylie) {
|
||||
hasErrors = true;
|
||||
tdoc.appendRoman(err, Color.RED);
|
||||
tdoc.appendRoman(tdocstart, err, Color.RED);
|
||||
tdocstart += err.length();
|
||||
} else {
|
||||
tdoc.appendDuffCode(TibetanMachineWeb.getGlyph(wylie),
|
||||
tdoc.appendDuffCode(tdocstart++,
|
||||
TibetanMachineWeb.getGlyph(wylie),
|
||||
Color.BLACK);
|
||||
}
|
||||
}
|
||||
|
@ -331,7 +342,10 @@ public class ACIPConverter {
|
|||
if (writeWarningsToOut) {
|
||||
String text = "[#WARNING CONVERTING ACIP DOCUMENT: Lexical warning: " + s.getText() + "]";
|
||||
if (null != writer) writer.write(text);
|
||||
if (null != tdoc) tdoc.appendRoman(text, Color.RED);
|
||||
if (null != tdoc) {
|
||||
tdoc.appendRoman(tdocstart, text, Color.RED);
|
||||
tdocstart += text.length();
|
||||
}
|
||||
}
|
||||
|
||||
if (null != warnings) {
|
||||
|
@ -348,7 +362,10 @@ public class ACIPConverter {
|
|||
+ s.getText()
|
||||
+ ((stype == TString.FOLIO_MARKER) ? "}" : ""));
|
||||
if (null != writer) writer.write(text);
|
||||
if (null != tdoc) tdoc.appendRoman(text, Color.BLACK);
|
||||
if (null != tdoc) {
|
||||
tdoc.appendRoman(tdocstart, text, Color.BLACK);
|
||||
tdocstart += text.length();
|
||||
}
|
||||
} else {
|
||||
String unicode = null;
|
||||
Object[] duff = null;
|
||||
|
@ -362,7 +379,11 @@ public class ACIPConverter {
|
|||
hasErrors = true;
|
||||
String errorMessage = "[#ERROR CONVERTING ACIP DOCUMENT: THE TSHEG BAR (\"SYLLABLE\") " + s.getText() + " HAS THESE ERRORS: " + acipError + "]";
|
||||
if (null != writer) writer.write(errorMessage);
|
||||
if (null != tdoc) tdoc.appendRoman(errorMessage, Color.RED);
|
||||
if (null != tdoc) {
|
||||
tdoc.appendRoman(tdocstart, errorMessage,
|
||||
Color.RED);
|
||||
tdocstart += errorMessage.length();
|
||||
}
|
||||
if (null != errors)
|
||||
errors.append(errorMessage + "\n");
|
||||
} else {
|
||||
|
@ -373,7 +394,11 @@ public class ACIPConverter {
|
|||
hasErrors = true;
|
||||
String errorMessage = "[#ERROR CONVERTING ACIP DOCUMENT: THE TSHEG BAR (\"SYLLABLE\") " + s.getText() + " IS ESSENTIALLY NOTHING.]";
|
||||
if (null != writer) writer.write(errorMessage);
|
||||
if (null != tdoc) tdoc.appendRoman(errorMessage, Color.RED);
|
||||
if (null != tdoc) {
|
||||
tdoc.appendRoman(tdocstart, errorMessage,
|
||||
Color.RED);
|
||||
tdocstart += errorMessage.length();
|
||||
}
|
||||
if (null != errors)
|
||||
errors.append(errorMessage + "\n");
|
||||
} else {
|
||||
|
@ -384,7 +409,12 @@ public class ACIPConverter {
|
|||
hasErrors = true;
|
||||
String errorMessage = "[#ERROR CONVERTING ACIP DOCUMENT: THE TSHEG BAR (\"SYLLABLE\") " + s.getText() + " HAS NO LEGAL PARSES.]";
|
||||
if (null != writer) writer.write(errorMessage);
|
||||
if (null != tdoc) tdoc.appendRoman(errorMessage, Color.RED);
|
||||
if (null != tdoc) {
|
||||
tdoc.appendRoman(tdocstart,
|
||||
errorMessage,
|
||||
Color.RED);
|
||||
tdocstart += errorMessage.length();
|
||||
}
|
||||
if (null != errors)
|
||||
errors.append(errorMessage + "\n");
|
||||
} else {
|
||||
|
@ -420,7 +450,12 @@ public class ACIPConverter {
|
|||
= ("[#WARNING CONVERTING ACIP DOCUMENT: "
|
||||
+ warning + "]");
|
||||
if (null != writer) writer.write(text);
|
||||
if (null != tdoc) tdoc.appendRoman(text, Color.RED);
|
||||
if (null != tdoc) {
|
||||
tdoc.appendRoman(tdocstart,
|
||||
text,
|
||||
Color.RED);
|
||||
tdocstart += text.length();
|
||||
}
|
||||
}
|
||||
if (null != warnings) {
|
||||
warnings.append(warning);
|
||||
|
@ -494,7 +529,11 @@ public class ACIPConverter {
|
|||
done = true;
|
||||
}
|
||||
if (null != tdoc) {
|
||||
tdoc.appendRoman(" ", Color.BLACK);
|
||||
String x = " ";
|
||||
tdoc.appendRoman(tdocstart,
|
||||
x,
|
||||
Color.BLACK);
|
||||
tdocstart += x.length();
|
||||
continue;
|
||||
}
|
||||
// DLC AM I DOING THIS? By normal Tibetan & Dzongkha spelling, writing, and input rules
|
||||
|
@ -511,7 +550,8 @@ public class ACIPConverter {
|
|||
&& lpl.get(0).getLeft().equals("NG")) {
|
||||
DuffCode tshegDuff = TibetanMachineWeb.getGlyph(" ");
|
||||
if (null == tshegDuff) throw new Error("tsheg duff");
|
||||
tdoc.appendDuffCode(tshegDuff, lastColor);
|
||||
tdoc.appendDuffCode(tdocstart++,
|
||||
tshegDuff, lastColor);
|
||||
}
|
||||
|
||||
if (!done) {
|
||||
|
@ -521,7 +561,9 @@ public class ACIPConverter {
|
|||
|| s.getText().equals("\t")
|
||||
|| s.getText().equals("\n")
|
||||
|| s.getText().equals("\r\n")) {
|
||||
tdoc.appendRoman(s.getText(), Color.BLACK);
|
||||
tdoc.appendRoman(tdocstart, s.getText(),
|
||||
Color.BLACK);
|
||||
tdocstart += s.getText().length();
|
||||
continue;
|
||||
} else {
|
||||
String wy = ACIPRules.getWylieForACIPOther(s.getText());
|
||||
|
@ -555,14 +597,17 @@ public class ACIPConverter {
|
|||
if (null != duff && 0 != duff.length) {
|
||||
for (int j = 0; j < duff.length; j++) {
|
||||
if (duff[j] instanceof DuffCode)
|
||||
tdoc.appendDuffCode((DuffCode)duff[j],
|
||||
tdoc.appendDuffCode(tdocstart++,
|
||||
(DuffCode)duff[j],
|
||||
color);
|
||||
else {
|
||||
hasErrors = true;
|
||||
if (null != errors)
|
||||
errors.append((String)duff[j] + "\n");
|
||||
tdoc.appendRoman((String)duff[j],
|
||||
tdoc.appendRoman(tdocstart,
|
||||
(String)duff[j],
|
||||
Color.RED);
|
||||
tdocstart += ((String)duff[j]).length();
|
||||
}
|
||||
}
|
||||
} else {
|
||||
|
@ -579,6 +624,11 @@ public class ACIPConverter {
|
|||
if (null != writer) {
|
||||
writer.close();
|
||||
}
|
||||
if (isCleanDoc && null != tdoc && tdocstart != tdoc.getLength())
|
||||
throw new Error("Oops -- we dropped something from the output! tdocstart++; and tdocstart+=xyz; are not being used correctly.");
|
||||
return !hasErrors;
|
||||
} catch (javax.swing.text.BadLocationException e) {
|
||||
throw new IllegalArgumentException("tdocstart is no good: " + tdocstart);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -154,7 +154,7 @@ public class ACIPTshegBarScanner {
|
|||
al.add(new TString("Found an open bracket within a [#COMMENT]-style comment. Brackets may not appear in comments.\n",
|
||||
TString.ERROR));
|
||||
if (null != errors)
|
||||
errors.append("Offset " + i + " or maybe " + (i-numNewlines) + ": "
|
||||
errors.append("Offset " + i + ((numNewlines == 0) ? "" : (" or maybe " + (i-numNewlines))) + ": "
|
||||
+ "Found an open bracket within a [#COMMENT]-style comment. Brackets may not appear in comments.\n");
|
||||
if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
|
||||
}
|
||||
|
@ -173,7 +173,7 @@ public class ACIPTshegBarScanner {
|
|||
al.add(new TString("Found a truly unmatched close bracket, " + s.substring(i, i+1),
|
||||
TString.ERROR));
|
||||
if (null != errors) {
|
||||
errors.append("Offset " + i + " or maybe " + (i-numNewlines) + ": "
|
||||
errors.append("Offset " + i + ((numNewlines == 0) ? "" : (" or maybe " + (i-numNewlines))) + ": "
|
||||
+ "Found a truly unmatched close bracket, ] or }.\n");
|
||||
}
|
||||
if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
|
||||
|
@ -182,7 +182,7 @@ public class ACIPTshegBarScanner {
|
|||
al.add(new TString("Found a closing bracket without a matching open bracket. Perhaps a [#COMMENT] incorrectly written as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], caused this.",
|
||||
TString.ERROR));
|
||||
if (null != errors)
|
||||
errors.append("Offset " + i + " or maybe " + (i-numNewlines) + ": "
|
||||
errors.append("Offset " + i + ((numNewlines == 0) ? "" : (" or maybe " + (i-numNewlines))) + ": "
|
||||
+ "Found a closing bracket without a matching open bracket. Perhaps a [#COMMENT] incorrectly written as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], caused this.\n");
|
||||
if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
|
||||
startOfString = i+1;
|
||||
|
@ -438,7 +438,7 @@ public class ACIPTshegBarScanner {
|
|||
al.add(new TString("Found a truly unmatched open bracket, [ or {, prior to this current illegal open bracket.",
|
||||
TString.ERROR));
|
||||
if (null != errors) {
|
||||
errors.append("Offset " + i + " or maybe " + (i-numNewlines) + ": "
|
||||
errors.append("Offset " + i + ((numNewlines == 0) ? "" : (" or maybe " + (i-numNewlines))) + ": "
|
||||
+ "Found a truly unmatched open bracket, [ or {, prior to this current illegal open bracket.\n");
|
||||
}
|
||||
if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
|
||||
|
@ -457,7 +457,7 @@ public class ACIPTshegBarScanner {
|
|||
}
|
||||
al.add(new TString("Found an illegal open bracket (in context, this is " + inContext + "). Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open bracket?",
|
||||
TString.ERROR));
|
||||
errors.append("Offset " + i + " or maybe " + (i-numNewlines) + ": "
|
||||
errors.append("Offset " + i + ((numNewlines == 0) ? "" : (" or maybe " + (i-numNewlines))) + ": "
|
||||
+ "Found an illegal open bracket (in context, this is " + inContext + "). Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open bracket?\n");
|
||||
if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
|
||||
}
|
||||
|
@ -512,7 +512,7 @@ public class ACIPTshegBarScanner {
|
|||
al.add(new TString("Found an illegal at sign, @ (in context, this is " + inContext + "). This folio marker has a period, '.', at the end of it, which is illegal.",
|
||||
TString.ERROR));
|
||||
if (null != errors)
|
||||
errors.append("Offset " + i + " or maybe " + (i-numNewlines) + ": "
|
||||
errors.append("Offset " + i + ((numNewlines == 0) ? "" : (" or maybe " + (i-numNewlines))) + ": "
|
||||
+ "Found an illegal at sign, @ (in context, this is " + inContext + "). This folio marker has a period, '.', at the end of it, which is illegal.\n");
|
||||
if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
|
||||
startOfString = i+numdigits+3;
|
||||
|
@ -534,7 +534,7 @@ public class ACIPTshegBarScanner {
|
|||
al.add(new TString("Found an illegal at sign, @ (in context, this is " + inContext + "). This folio marker is not followed by whitespace, as is expected.",
|
||||
TString.ERROR));
|
||||
if (null != errors)
|
||||
errors.append("Offset " + i + " or maybe " + (i-numNewlines) + ": "
|
||||
errors.append("Offset " + i + ((numNewlines == 0) ? "" : (" or maybe " + (i-numNewlines))) + ": "
|
||||
+ "Found an illegal at sign, @ (in context, this is " + inContext + "). This folio marker is not followed by whitespace, as is expected.\n");
|
||||
if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
|
||||
startOfString = i+1; // DLC FIXME: skip over more?
|
||||
|
@ -630,7 +630,7 @@ public class ACIPTshegBarScanner {
|
|||
al.add(new TString("Found an illegal at sign, @ (in context, this is " + inContext + "). @012B is an example of a legal folio marker.",
|
||||
TString.ERROR));
|
||||
if (null != errors)
|
||||
errors.append("Offset " + i + " or maybe " + (i-numNewlines) + ": "
|
||||
errors.append("Offset " + i + ((numNewlines == 0) ? "" : (" or maybe " + (i-numNewlines))) + ": "
|
||||
+ "Found an illegal at sign, @ (in context, this is " + inContext + "). @012B is an example of a legal folio marker.\n");
|
||||
if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
|
||||
startOfString = i+1;
|
||||
|
@ -656,7 +656,7 @@ public class ACIPTshegBarScanner {
|
|||
al.add(new TString("Found //, which could be legal (the Unicode would be \\u0F3C\\u0F3D), but is likely in an illegal construct like //NYA\\\\.",
|
||||
TString.ERROR));
|
||||
if (errors != null) {
|
||||
errors.append("Offset " + i + " or maybe " + (i-numNewlines) + ": "
|
||||
errors.append("Offset " + i + ((numNewlines == 0) ? "" : (" or maybe " + (i-numNewlines))) + ": "
|
||||
+ "Found //, which could be legal (the Unicode would be \\u0F3C\\u0F3D), but is likely in an illegal construct like //NYA\\\\.\n");
|
||||
}
|
||||
if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
|
||||
|
@ -692,7 +692,7 @@ public class ACIPTshegBarScanner {
|
|||
al.add(new TString("Found an illegal open parenthesis, (. Nesting of parentheses is not allowed.",
|
||||
TString.ERROR));
|
||||
if (null != errors)
|
||||
errors.append("Offset " + i + " or maybe " + (i-numNewlines) + ": "
|
||||
errors.append("Offset " + i + ((numNewlines == 0) ? "" : (" or maybe " + (i-numNewlines))) + ": "
|
||||
+ "Found an illegal open parenthesis, (. Nesting of parentheses is not allowed.\n");
|
||||
if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
|
||||
} else {
|
||||
|
@ -706,7 +706,7 @@ public class ACIPTshegBarScanner {
|
|||
al.add(new TString("Unexpected closing parenthesis, ), found.",
|
||||
TString.ERROR));
|
||||
if (null != errors)
|
||||
errors.append("Offset " + i + " or maybe " + (i-numNewlines) + ": "
|
||||
errors.append("Offset " + i + ((numNewlines == 0) ? "" : (" or maybe " + (i-numNewlines))) + ": "
|
||||
+ "Unexpected closing parenthesis, ), found.\n");
|
||||
if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
|
||||
} else {
|
||||
|
@ -851,19 +851,19 @@ public class ACIPTshegBarScanner {
|
|||
al.add(new TString("Found an illegal, unprintable character.",
|
||||
TString.ERROR));
|
||||
if (null != errors)
|
||||
errors.append("Offset " + i + " or maybe " + (i-numNewlines) + ": "
|
||||
errors.append("Offset " + i + ((numNewlines == 0) ? "" : (" or maybe " + (i-numNewlines))) + ": "
|
||||
+ "Found an illegal, unprintable character.\n");
|
||||
} else if ('\\' == ch) {
|
||||
al.add(new TString("Found a Sanskrit virama, \\, but the converter currently doesn't treat these properly. Sorry! Please do complain to the maintainers.",
|
||||
TString.ERROR));
|
||||
if (null != errors)
|
||||
errors.append("Offset " + i + " or maybe " + (i-numNewlines) + ": "
|
||||
errors.append("Offset " + i + ((numNewlines == 0) ? "" : (" or maybe " + (i-numNewlines))) + ": "
|
||||
+ "Found a Sanskrit virama, \\, but the converter currently doesn't treat these properly. Sorry! Please do complain to the maintainers.\n");
|
||||
} else {
|
||||
al.add(new TString("Found an illegal character, " + ch + ", with ordinal " + (int)ch + ".",
|
||||
TString.ERROR));
|
||||
if (null != errors)
|
||||
errors.append("Offset " + i + " or maybe " + (i-numNewlines) + ": "
|
||||
errors.append("Offset " + i + ((numNewlines == 0) ? "" : (" or maybe " + (i-numNewlines))) + ": "
|
||||
+ "Found an illegal character, " + ch + ", with ordinal " + (int)ch + ".\n");
|
||||
}
|
||||
if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
|
||||
|
|
|
@ -7203,28 +7203,28 @@ tstHelper("ZUR");
|
|||
shelp("", "", "[]");
|
||||
shelp("[DD]", "");
|
||||
shelp("[",
|
||||
"Offset 0 or maybe 0: Found an illegal open bracket (in context, this is [). Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open bracket?\nOffset END: Truly unmatched open bracket found.\n");
|
||||
"Offset 0: Found an illegal open bracket (in context, this is [). Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open bracket?\nOffset END: Truly unmatched open bracket found.\n");
|
||||
shelp("{",
|
||||
"Offset 0 or maybe 0: Found an illegal open bracket (in context, this is {). Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open bracket?\nOffset END: Truly unmatched open bracket found.\n");
|
||||
"Offset 0: Found an illegal open bracket (in context, this is {). Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open bracket?\nOffset END: Truly unmatched open bracket found.\n");
|
||||
shelp("DD", "");
|
||||
shelp("DD]",
|
||||
"Offset 2 or maybe 2: Found a truly unmatched close bracket, ] or }.\nOffset 2 or maybe 2: Found a closing bracket without a matching open bracket. Perhaps a [#COMMENT] incorrectly written as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], caused this.\n");
|
||||
"Offset 2: Found a truly unmatched close bracket, ] or }.\nOffset 2: Found a closing bracket without a matching open bracket. Perhaps a [#COMMENT] incorrectly written as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], caused this.\n");
|
||||
|
||||
shelp("///NYA", "Offset 1 or maybe 1: Found //, which could be legal (the Unicode would be \\u0F3C\\u0F3D), but is likely in an illegal construct like //NYA\\\\.\nOffset END: Slashes are supposed to occur in pairs, but the input had an unmatched '/' character.\n");
|
||||
shelp("///NYA", "Offset 1: Found //, which could be legal (the Unicode would be \\u0F3C\\u0F3D), but is likely in an illegal construct like //NYA\\\\.\nOffset END: Slashes are supposed to occur in pairs, but the input had an unmatched '/' character.\n");
|
||||
shelp("/NYA/", "");
|
||||
shelp("[?][BP][LS][DD1][DD2][DDD][DR][# (<{A COMMENT)}>]", "");
|
||||
shelp("[LS][# A [[[[[COMMENT][LS]",
|
||||
"Offset 9 or maybe 9: Found an open bracket within a [#COMMENT]-style comment. Brackets may not appear in comments.\n"
|
||||
+ "Offset 10 or maybe 10: Found an open bracket within a [#COMMENT]-style comment. Brackets may not appear in comments.\n"
|
||||
+ "Offset 11 or maybe 11: Found an open bracket within a [#COMMENT]-style comment. Brackets may not appear in comments.\n"
|
||||
+ "Offset 12 or maybe 12: Found an open bracket within a [#COMMENT]-style comment. Brackets may not appear in comments.\n"
|
||||
+ "Offset 13 or maybe 13: Found an open bracket within a [#COMMENT]-style comment. Brackets may not appear in comments.\n");
|
||||
"Offset 9: Found an open bracket within a [#COMMENT]-style comment. Brackets may not appear in comments.\n"
|
||||
+ "Offset 10: Found an open bracket within a [#COMMENT]-style comment. Brackets may not appear in comments.\n"
|
||||
+ "Offset 11: Found an open bracket within a [#COMMENT]-style comment. Brackets may not appear in comments.\n"
|
||||
+ "Offset 12: Found an open bracket within a [#COMMENT]-style comment. Brackets may not appear in comments.\n"
|
||||
+ "Offset 13: Found an open bracket within a [#COMMENT]-style comment. Brackets may not appear in comments.\n");
|
||||
shelp("[ILLEGAL COMMENT]",
|
||||
"Offset 0 or maybe 0: Found an illegal open bracket (in context, this is [ILLEGAL C...). Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open bracket?\nOffset 16 or maybe 16: Found a closing bracket without a matching open bracket. Perhaps a [#COMMENT] incorrectly written as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], caused this.\n");
|
||||
"Offset 0: Found an illegal open bracket (in context, this is [ILLEGAL C...). Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open bracket?\nOffset 16: Found a closing bracket without a matching open bracket. Perhaps a [#COMMENT] incorrectly written as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], caused this.\n");
|
||||
shelp("(BSKYABS GRO)", "");
|
||||
shelp("BSKYABS GRO)", "Offset 11 or maybe 11: Unexpected closing parenthesis, ), found.\n");
|
||||
shelp("BSKYABS GRO)", "Offset 11: Unexpected closing parenthesis, ), found.\n");
|
||||
shelp("BSKYABS GRO(", "Offset END: Unmatched open parenthesis, (, found.\n");
|
||||
shelp("((NESTAGE))", "Offset 1 or maybe 1: Found an illegal open parenthesis, (. Nesting of parentheses is not allowed.\nOffset 10 or maybe 10: Unexpected closing parenthesis, ), found.\n");
|
||||
shelp("((NESTAGE))", "Offset 1: Found an illegal open parenthesis, (. Nesting of parentheses is not allowed.\nOffset 10: Unexpected closing parenthesis, ), found.\n");
|
||||
shelp("(BA)(PA)NYA(CA)", "");
|
||||
shelp("NYAx", "");
|
||||
shelp("NYA x", "");
|
||||
|
@ -7253,9 +7253,9 @@ tstHelper("ZUR");
|
|||
shelp("(NYA ", "Offset END: Unmatched open parenthesis, (, found.\n");
|
||||
shelp("[*NYA ", "Offset END: Unmatched open bracket found. A correction does not terminate.\n");
|
||||
shelp("?", "", "[QUESTION:{?}]");
|
||||
shelp("KHAN~ BAR ", "Offset 4 or maybe 4: Found an illegal character, ~, with ordinal 126.\n");
|
||||
shelp("KHAN~ BAR ", "Offset 4: Found an illegal character, ~, with ordinal 126.\n");
|
||||
shelp("[* Correction with []]",
|
||||
"Offset 5 or maybe 5: Found an illegal character, r, with ordinal 114.\nOffset 6 or maybe 6: Found an illegal character, r, with ordinal 114.\nOffset 7 or maybe 7: Found an illegal character, e, with ordinal 101.\nOffset 8 or maybe 8: Found an illegal character, c, with ordinal 99.\nOffset 14 or maybe 14: Found an illegal character, w, with ordinal 119.\nOffset 19 or maybe 19: Found an illegal open bracket (in context, this is []]). Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open bracket?\nOffset 21 or maybe 21: Found a closing bracket without a matching open bracket. Perhaps a [#COMMENT] incorrectly written as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], caused this.\n");
|
||||
"Offset 5: Found an illegal character, r, with ordinal 114.\nOffset 6: Found an illegal character, r, with ordinal 114.\nOffset 7: Found an illegal character, e, with ordinal 101.\nOffset 8: Found an illegal character, c, with ordinal 99.\nOffset 14: Found an illegal character, w, with ordinal 119.\nOffset 19: Found an illegal open bracket (in context, this is []]). Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open bracket?\nOffset 21: Found a closing bracket without a matching open bracket. Perhaps a [#COMMENT] incorrectly written as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], caused this.\n");
|
||||
|
||||
// DLC DOC: the line SDIG PA'I GROGS PO'I LAG TU SON PAR 'GYUR PA is followed by a blank line. Note that it's "PA", not "PA ", ending it. We autocorrect to the latter.
|
||||
|
||||
|
@ -7271,7 +7271,7 @@ tstHelper("ZUR");
|
|||
uhelp(" 1\\ ", "\u0f0b\u0f21\u0f84\u0f0b");
|
||||
}
|
||||
shelp("K\\,",
|
||||
"Offset 1 or maybe 1: Found a Sanskrit virama, \\, but the converter currently doesn't treat these properly. Sorry! Please do complain to the maintainers.\n",
|
||||
"Offset 1: Found a Sanskrit virama, \\, but the converter currently doesn't treat these properly. Sorry! Please do complain to the maintainers.\n",
|
||||
"[TIBETAN_NON_PUNCTUATION:{K}, ERROR:{Found a Sanskrit virama, \\, but the converter currently doesn't treat these properly. Sorry! Please do complain to the maintainers.}, TIBETAN_PUNCTUATION:{,}]");
|
||||
|
||||
|
||||
|
@ -7315,14 +7315,14 @@ tstHelper("ZUR");
|
|||
shelp("@01A.3 ", "", "[FOLIO_MARKER:{@01A.3}, TIBETAN_PUNCTUATION:{ }]");
|
||||
shelp("@001 ", "", "[FOLIO_MARKER:{@001}, TIBETAN_PUNCTUATION:{ }]");
|
||||
shelp("@19-20A",
|
||||
"Offset 0 or maybe 0: Found an illegal at sign, @ (in context, this is @19-20A). @012B is an example of a legal folio marker.\n",
|
||||
"Offset 0: Found an illegal at sign, @ (in context, this is @19-20A). @012B is an example of a legal folio marker.\n",
|
||||
"[ERROR:{Found an illegal at sign, @ (in context, this is @19-20A). @012B is an example of a legal folio marker.}, TIBETAN_NON_PUNCTUATION:{19-20A}]"); // DLC FIXME: yes it occurs in the kangyur.
|
||||
shelp("@[7B]", "");
|
||||
shelp("@012A.3KA",
|
||||
"",
|
||||
"[FOLIO_MARKER:{@012A.3}, TIBETAN_NON_PUNCTUATION:{KA}]");
|
||||
shelp("@012A.34",
|
||||
"Offset 0 or maybe 0: Found an illegal at sign, @ (in context, this is @012A.34). This folio marker has a period, '.', at the end of it, which is illegal.\n",
|
||||
"Offset 0: Found an illegal at sign, @ (in context, this is @012A.34). This folio marker has a period, '.', at the end of it, which is illegal.\n",
|
||||
"[ERROR:{Found an illegal at sign, @ (in context, this is @012A.34). This folio marker has a period, '.', at the end of it, which is illegal.}, TIBETAN_NON_PUNCTUATION:{34}]");
|
||||
shelp("@[07B]", "");
|
||||
shelp("@[00007B]", "");
|
||||
|
@ -7339,7 +7339,7 @@ tstHelper("ZUR");
|
|||
shelp("{ DD }", "", "[DD:{{ DD }}]"); // TD3790E2.ACT
|
||||
shelp("{ BP }", "", "[BP:{{ BP }}]"); // TD3790E2.ACT
|
||||
shelp("//NYA\\\\",
|
||||
"Offset 1 or maybe 1: Found //, which could be legal (the Unicode would be \\u0F3C\\u0F3D), but is likely in an illegal construct like //NYA\\\\.\nOffset 5 or maybe 5: Found a Sanskrit virama, \\, but the converter currently doesn't treat these properly. Sorry! Please do complain to the maintainers.\nOffset 6 or maybe 6: Found a Sanskrit virama, \\, but the converter currently doesn't treat these properly. Sorry! Please do complain to the maintainers.\n",
|
||||
"Offset 1: Found //, which could be legal (the Unicode would be \\u0F3C\\u0F3D), but is likely in an illegal construct like //NYA\\\\.\nOffset 5: Found a Sanskrit virama, \\, but the converter currently doesn't treat these properly. Sorry! Please do complain to the maintainers.\nOffset 6: Found a Sanskrit virama, \\, but the converter currently doesn't treat these properly. Sorry! Please do complain to the maintainers.\n",
|
||||
"[START_SLASH:{/}, ERROR:{Found //, which could be legal (the Unicode would be \\u0F3C\\u0F3D), but is likely in an illegal construct like //NYA\\\\.}, END_SLASH:{/}, TIBETAN_NON_PUNCTUATION:{NYA}, ERROR:{Found a Sanskrit virama, \\, but the converter currently doesn't treat these properly. Sorry! Please do complain to the maintainers.}, ERROR:{Found a Sanskrit virama, \\, but the converter currently doesn't treat these properly. Sorry! Please do complain to the maintainers.}]");
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue