diff --git a/source/org/thdl/tib/input/DuffPane.java b/source/org/thdl/tib/input/DuffPane.java index 7b65bcc..9cfec57 100644 --- a/source/org/thdl/tib/input/DuffPane.java +++ b/source/org/thdl/tib/input/DuffPane.java @@ -1549,7 +1549,7 @@ public void paste(int offset) { ThdlDebug.noteIffyCode(); } } else { - DuffData[] dd = TibTextUtils.getTibetanMachineWeb(next); + DuffData[] dd = TibTextUtils.getTibetanMachineWebForEWTS(next); offset = getTibDoc().insertDuff(offset, dd); } } @@ -1563,21 +1563,24 @@ public void paste(int offset) { } /** -* Converts the currently selected text from Extended Wylie to TibetanMachineWeb. -*/ - public void toTibetanMachineWeb() { +* Converts the currently selected text from Roman transliteration to +* TibetanMachineWeb. +* @param fromACIP true if the selection is ACIP, false if it is EWTS +* */ + public void toTibetanMachineWeb(boolean fromACIP) { int start = getSelectionStart(); int end = getSelectionEnd(); - toTibetanMachineWeb(start, end); + toTibetanMachineWeb(fromACIP, start, end); } /** * Converts a stretch of text from Extended Wylie to TibetanMachineWeb. +* @param fromACIP true if the selection is ACIP, false if it is EWTS * @param start the begin point for the conversion * @param end the end point for the conversion */ - public void toTibetanMachineWeb(int start, int end) { + public void toTibetanMachineWeb(boolean fromACIP, int start, int end) { if (start == end) return; @@ -1599,17 +1602,28 @@ public void paste(int offset) { if ((0 != TibetanMachineWeb.getTMWFontNumber(fontName)) || i==endPos.getOffset()) { if (i != start) { try { - DuffData[] duffdata = TibTextUtils.getTibetanMachineWeb(sb.toString()); - getTibDoc().remove(start, i-start); - getTibDoc().insertDuff(start, duffdata); - } - catch (InvalidWylieException iwe) { + DuffData[] duffdata = null; + if (fromACIP) { + getTibDoc().remove(start, i-start); + TibTextUtils.insertTibetanMachineWebForACIP(sb.toString(), getTibDoc(), start); + } + else + duffdata = TibTextUtils.getTibetanMachineWebForEWTS(sb.toString()); + if (!fromACIP) { + getTibDoc().remove(start, i-start); + getTibDoc().insertDuff(start, duffdata); + } + } catch (InvalidWylieException iwe) { JOptionPane.showMessageDialog(this, "The Wylie you are trying to convert is invalid, " + "beginning from:\n " + iwe.getCulpritInContext() + "\nThe culprit is probably the character '" + iwe.getCulprit() + "'."); return; + } catch (InvalidACIPException iae) { + JOptionPane.showMessageDialog(this, + "The ACIP you are trying to convert is invalid:\n" + iae.getMessage()); + return; } } start = i+1; diff --git a/source/org/thdl/tib/input/Jskad.java b/source/org/thdl/tib/input/Jskad.java index 8f04e89..a4c9b31 100644 --- a/source/org/thdl/tib/input/Jskad.java +++ b/source/org/thdl/tib/input/Jskad.java @@ -434,11 +434,19 @@ public class Jskad extends JPanel implements DocumentListener { JMenuItem wylieTMWItem = new JMenuItem("Convert Wylie to Tibetan"); wylieTMWItem.addActionListener(new ThdlActionListener() { public void theRealActionPerformed(ActionEvent e) { - toTibetan(); + toTibetan(false); } }); convertSelectionMenu.add(wylieTMWItem); + JMenuItem ACIPTMWItem = new JMenuItem("Convert ACIP to Tibetan"); + ACIPTMWItem.addActionListener(new ThdlActionListener() { + public void theRealActionPerformed(ActionEvent e) { + toTibetan(true); + } + }); + convertSelectionMenu.add(ACIPTMWItem); + JMenu convertAllMenu = new JMenu("Convert All"); JMenuItem toTMItem = new JMenuItem("Convert Tibetan to TM"); // DLC FIXME: do it just in the selection? @@ -1122,9 +1130,9 @@ public class Jskad extends JPanel implements DocumentListener { dp.paste(dp.getCaret().getDot()); } - private void toTibetan() { + private void toTibetan(boolean fromACIP) { Jskad.this.setCursor(Cursor.getPredefinedCursor(Cursor.WAIT_CURSOR)); - dp.toTibetanMachineWeb(); + dp.toTibetanMachineWeb(fromACIP); Jskad.this.setCursor(Cursor.getPredefinedCursor(Cursor.DEFAULT_CURSOR)); } diff --git a/source/org/thdl/tib/text/TibTextUtils.java b/source/org/thdl/tib/text/TibTextUtils.java index 695b093..dbb8f2e 100644 --- a/source/org/thdl/tib/text/TibTextUtils.java +++ b/source/org/thdl/tib/text/TibTextUtils.java @@ -25,6 +25,8 @@ import javax.swing.text.rtf.RTFEditorKit; import java.io.*; import org.thdl.util.ThdlDebug; +import org.thdl.tib.text.ttt.ACIPTshegBarScanner; +import org.thdl.tib.text.ttt.ACIPConverter; import org.thdl.tib.text.tshegbar.LegalTshegBar; import org.thdl.tib.text.tshegbar.UnicodeConstants; import org.thdl.tib.text.tshegbar.UnicodeUtils; @@ -310,6 +312,44 @@ public class TibTextUtils implements THDLWylieConstants { public static final boolean[] weDoNotCareIfThereIsCorrespondingWylieOrNot = new boolean[] { false }; +/** +* Converts a string of ACIP into TibetanMachineWeb and inserts that +* into tdoc at offset loc. +* @param acip the ACIP you want to convert +* @param tdoc the document in which to insert the TMW +* @param lco the offset inside the document at which to insert the TMW +* @throws InvalidACIPException if the ACIP is deemed invalid, i.e. if +* it does not conform to the ACIP transcription rules (those in the +* official document and the subtler rules pieced together by David +* Chandler through study and private correspondence with Robert +* Chilton) */ + public static void insertTibetanMachineWebForACIP(String acip, TibetanDocument tdoc, int loc) + throws InvalidACIPException + { + StringBuffer errors = new StringBuffer(); + ArrayList al = ACIPTshegBarScanner.scan(acip, errors, 500); + if (null == al || errors.length() > 0) { + if (errors.length() > 0) + throw new InvalidACIPException(errors.toString()); + else + throw new InvalidACIPException("Fatal error converting ACIP to TMW."); + } + String warningLevel = "Most"; + boolean colors = false; + StringBuffer warnings = null; + boolean putWarningsInOutput = false; + if ("None" != warningLevel) { + warnings = new StringBuffer(); + putWarningsInOutput = true; + } + try { + ACIPConverter.convertToTMW(al, tdoc, errors, warnings, + putWarningsInOutput, warningLevel, colors, loc); + } catch (IOException e) { + throw new Error("Can't happen: " + e); + } + } + /** * Converts a string of Extended Wylie into {@link DuffData DuffData}. * @param wylie the Wylie you want to convert @@ -318,7 +358,7 @@ public class TibTextUtils implements THDLWylieConstants { * @throws InvalidWylieException if the Wylie is deemed invalid, * i.e. if it does not conform to the Extended Wylie standard */ - public static DuffData[] getTibetanMachineWeb(String wylie) throws InvalidWylieException { + public static DuffData[] getTibetanMachineWebForEWTS(String wylie) throws InvalidWylieException { List chars = new ArrayList(); DuffCode dc; int start = 0; diff --git a/source/org/thdl/tib/text/TibetanDocument.java b/source/org/thdl/tib/text/TibetanDocument.java index 7eee74e..32a32da 100644 --- a/source/org/thdl/tib/text/TibetanDocument.java +++ b/source/org/thdl/tib/text/TibetanDocument.java @@ -226,30 +226,17 @@ public class TibetanDocument extends DefaultStyledDocument { return insertDuff(tibetanFontSize, pos, glyphs, true, Color.BLACK); } -/** -* Appends all DuffCodes in glyphs to the end of this document. -* @param glyphs the array of Tibetan data you want to insert -* @param color the color in which to insert, which is used if and only -* if {@link #colorsEnabled() colors are enabled} -*/ - public void appendDuffCodes(DuffCode[] glyphs, Color color) { - // PERFORMANCE FIXME: this isn't so speedy, but it reuses - // existing code. - for (int i = 0; i < glyphs.length; i++) { - appendDuffCode(glyphs[i], color); - } - } - /** * Appends glyph to the end of this document. +* @param loc the position at which to insert these glyphs * @param glyph the Tibetan glyph you want to insert * @param color the color in which to insert, which is used if and only * if {@link #colorsEnabled() colors are enabled} */ - public void appendDuffCode(DuffCode glyph, Color color) { + public void appendDuffCode(int loc, DuffCode glyph, Color color) { // PERFORMANCE FIXME: this isn't so speedy, but it reuses // existing code. - insertDuff(getLength(), + insertDuff(loc, new DuffData[] { new DuffData(new String(new char[] { glyph.getCharacter() }), glyph.getFontNum()) }, color); diff --git a/source/org/thdl/tib/text/TibetanHTML.java b/source/org/thdl/tib/text/TibetanHTML.java index d4d64a3..f1b48e8 100644 --- a/source/org/thdl/tib/text/TibetanHTML.java +++ b/source/org/thdl/tib/text/TibetanHTML.java @@ -26,11 +26,11 @@ public class TibetanHTML { String next = tokenizer.nextToken(); if (next.equals("\t") || next.equals("\n")) { buffer.append(""); - buffer.append(getHTML(TibTextUtils.getTibetanMachineWeb("_"))); + buffer.append(getHTML(TibTextUtils.getTibetanMachineWebForEWTS("_"))); buffer.append(""); } else - buffer.append(getHTML(TibTextUtils.getTibetanMachineWeb(next))); + buffer.append(getHTML(TibTextUtils.getTibetanMachineWebForEWTS(next))); } return buffer.toString(); } catch (InvalidWylieException ive) { @@ -95,11 +95,11 @@ public class TibetanHTML { String next = tokenizer.nextToken(); if (next.equals("\t") || next.equals("\n")) { buffer.append(""); - buffer.append(getHTML(TibTextUtils.getTibetanMachineWeb("_"))); + buffer.append(getHTML(TibTextUtils.getTibetanMachineWebForEWTS("_"))); buffer.append(""); } else - buffer.append(getHTML(TibTextUtils.getTibetanMachineWeb(next))); + buffer.append(getHTML(TibTextUtils.getTibetanMachineWebForEWTS(next))); } return buffer.toString(); } catch (InvalidWylieException ive) { @@ -163,11 +163,11 @@ public class TibetanHTML { String next = tokenizer.nextToken(); if (next.equals("\t") || next.equals("\n")) { buffer.append(""); - buffer.append(getHTML(TibTextUtils.getTibetanMachineWeb("_"))); + buffer.append(getHTML(TibTextUtils.getTibetanMachineWebForEWTS("_"))); buffer.append(""); } else - buffer.append(getHTML(TibTextUtils.getTibetanMachineWeb(next))); + buffer.append(getHTML(TibTextUtils.getTibetanMachineWebForEWTS(next))); } return buffer.toString(); } catch (InvalidWylieException ive) { diff --git a/source/org/thdl/tib/text/ttt/ACIPConverter.java b/source/org/thdl/tib/text/ttt/ACIPConverter.java index 5e2bafa..fe9e048 100644 --- a/source/org/thdl/tib/text/ttt/ACIPConverter.java +++ b/source/org/thdl/tib/text/ttt/ACIPConverter.java @@ -148,22 +148,25 @@ public class ACIPConverter { TibetanDocument tdoc = new TibetanDocument(); boolean rv = convertToTMW(scan, tdoc, errors, warnings, - writeWarningsToResult, warningLevel, colors); + writeWarningsToResult, warningLevel, colors, + tdoc.getLength()); tdoc.writeRTFOutputStream(out); return rv; } - private static boolean convertToTMW(ArrayList scan, - TibetanDocument tdoc, - StringBuffer errors, - StringBuffer warnings, - boolean writeWarningsToResult, - String warningLevel, - boolean colors) + public static boolean convertToTMW(ArrayList scan, + TibetanDocument tdoc, + StringBuffer errors, + StringBuffer warnings, + boolean writeWarningsToResult, + String warningLevel, + boolean colors, + int loc) throws IOException { return convertTo(false, scan, null, tdoc, errors, warnings, - writeWarningsToResult, warningLevel, colors); + writeWarningsToResult, warningLevel, colors, + loc, loc == tdoc.getLength()); } /** Returns UTF-8 encoded Unicode. A bit indirect, so use this @@ -225,7 +228,7 @@ public class ACIPConverter { throws IOException { return convertTo(true, scan, out, null, errors, warnings, - writeWarningsToOut, warningLevel, false); + writeWarningsToOut, warningLevel, false, -1, true); } private static boolean peekaheadFindsSpacesAndComma(ArrayList /* of TString */ scan, @@ -254,9 +257,12 @@ public class ACIPConverter { StringBuffer warnings, boolean writeWarningsToOut, String warningLevel, - boolean colors) + boolean colors, + int tdocstart, + boolean isCleanDoc) throws IOException { + try { int smallFontSize = -1; int regularFontSize = -1; if (null != tdoc) { @@ -297,7 +303,10 @@ public class ACIPConverter { hasErrors = true; String text = "[#ERROR CONVERTING ACIP DOCUMENT: Lexical error: " + s.getText() + "]"; if (null != writer) writer.write(text); - if (null != tdoc) tdoc.appendRoman(text, Color.RED); + if (null != tdoc) { + tdoc.appendRoman(tdocstart, text, Color.RED); + tdocstart += text.length(); + } } else if (stype == TString.TSHEG_BAR_ADORNMENT) { if (lastGuyWasNonPunct) { String err = "[#ERROR CONVERTING ACIP DOCUMENT: This converter cannot yet convert " + s.getText() + " because the converter's author is unclear what the result should be.]"; @@ -314,9 +323,11 @@ public class ACIPConverter { = ACIPRules.getWylieForACIPOther(s.getText()); if (null == wylie) { hasErrors = true; - tdoc.appendRoman(err, Color.RED); + tdoc.appendRoman(tdocstart, err, Color.RED); + tdocstart += err.length(); } else { - tdoc.appendDuffCode(TibetanMachineWeb.getGlyph(wylie), + tdoc.appendDuffCode(tdocstart++, + TibetanMachineWeb.getGlyph(wylie), Color.BLACK); } } @@ -331,7 +342,10 @@ public class ACIPConverter { if (writeWarningsToOut) { String text = "[#WARNING CONVERTING ACIP DOCUMENT: Lexical warning: " + s.getText() + "]"; if (null != writer) writer.write(text); - if (null != tdoc) tdoc.appendRoman(text, Color.RED); + if (null != tdoc) { + tdoc.appendRoman(tdocstart, text, Color.RED); + tdocstart += text.length(); + } } if (null != warnings) { @@ -348,7 +362,10 @@ public class ACIPConverter { + s.getText() + ((stype == TString.FOLIO_MARKER) ? "}" : "")); if (null != writer) writer.write(text); - if (null != tdoc) tdoc.appendRoman(text, Color.BLACK); + if (null != tdoc) { + tdoc.appendRoman(tdocstart, text, Color.BLACK); + tdocstart += text.length(); + } } else { String unicode = null; Object[] duff = null; @@ -362,7 +379,11 @@ public class ACIPConverter { hasErrors = true; String errorMessage = "[#ERROR CONVERTING ACIP DOCUMENT: THE TSHEG BAR (\"SYLLABLE\") " + s.getText() + " HAS THESE ERRORS: " + acipError + "]"; if (null != writer) writer.write(errorMessage); - if (null != tdoc) tdoc.appendRoman(errorMessage, Color.RED); + if (null != tdoc) { + tdoc.appendRoman(tdocstart, errorMessage, + Color.RED); + tdocstart += errorMessage.length(); + } if (null != errors) errors.append(errorMessage + "\n"); } else { @@ -373,7 +394,11 @@ public class ACIPConverter { hasErrors = true; String errorMessage = "[#ERROR CONVERTING ACIP DOCUMENT: THE TSHEG BAR (\"SYLLABLE\") " + s.getText() + " IS ESSENTIALLY NOTHING.]"; if (null != writer) writer.write(errorMessage); - if (null != tdoc) tdoc.appendRoman(errorMessage, Color.RED); + if (null != tdoc) { + tdoc.appendRoman(tdocstart, errorMessage, + Color.RED); + tdocstart += errorMessage.length(); + } if (null != errors) errors.append(errorMessage + "\n"); } else { @@ -384,7 +409,12 @@ public class ACIPConverter { hasErrors = true; String errorMessage = "[#ERROR CONVERTING ACIP DOCUMENT: THE TSHEG BAR (\"SYLLABLE\") " + s.getText() + " HAS NO LEGAL PARSES.]"; if (null != writer) writer.write(errorMessage); - if (null != tdoc) tdoc.appendRoman(errorMessage, Color.RED); + if (null != tdoc) { + tdoc.appendRoman(tdocstart, + errorMessage, + Color.RED); + tdocstart += errorMessage.length(); + } if (null != errors) errors.append(errorMessage + "\n"); } else { @@ -420,7 +450,12 @@ public class ACIPConverter { = ("[#WARNING CONVERTING ACIP DOCUMENT: " + warning + "]"); if (null != writer) writer.write(text); - if (null != tdoc) tdoc.appendRoman(text, Color.RED); + if (null != tdoc) { + tdoc.appendRoman(tdocstart, + text, + Color.RED); + tdocstart += text.length(); + } } if (null != warnings) { warnings.append(warning); @@ -494,7 +529,11 @@ public class ACIPConverter { done = true; } if (null != tdoc) { - tdoc.appendRoman(" ", Color.BLACK); + String x = " "; + tdoc.appendRoman(tdocstart, + x, + Color.BLACK); + tdocstart += x.length(); continue; } // DLC AM I DOING THIS? By normal Tibetan & Dzongkha spelling, writing, and input rules @@ -511,7 +550,8 @@ public class ACIPConverter { && lpl.get(0).getLeft().equals("NG")) { DuffCode tshegDuff = TibetanMachineWeb.getGlyph(" "); if (null == tshegDuff) throw new Error("tsheg duff"); - tdoc.appendDuffCode(tshegDuff, lastColor); + tdoc.appendDuffCode(tdocstart++, + tshegDuff, lastColor); } if (!done) { @@ -521,7 +561,9 @@ public class ACIPConverter { || s.getText().equals("\t") || s.getText().equals("\n") || s.getText().equals("\r\n")) { - tdoc.appendRoman(s.getText(), Color.BLACK); + tdoc.appendRoman(tdocstart, s.getText(), + Color.BLACK); + tdocstart += s.getText().length(); continue; } else { String wy = ACIPRules.getWylieForACIPOther(s.getText()); @@ -555,14 +597,17 @@ public class ACIPConverter { if (null != duff && 0 != duff.length) { for (int j = 0; j < duff.length; j++) { if (duff[j] instanceof DuffCode) - tdoc.appendDuffCode((DuffCode)duff[j], + tdoc.appendDuffCode(tdocstart++, + (DuffCode)duff[j], color); else { hasErrors = true; if (null != errors) errors.append((String)duff[j] + "\n"); - tdoc.appendRoman((String)duff[j], + tdoc.appendRoman(tdocstart, + (String)duff[j], Color.RED); + tdocstart += ((String)duff[j]).length(); } } } else { @@ -579,6 +624,11 @@ public class ACIPConverter { if (null != writer) { writer.close(); } + if (isCleanDoc && null != tdoc && tdocstart != tdoc.getLength()) + throw new Error("Oops -- we dropped something from the output! tdocstart++; and tdocstart+=xyz; are not being used correctly."); return !hasErrors; + } catch (javax.swing.text.BadLocationException e) { + throw new IllegalArgumentException("tdocstart is no good: " + tdocstart); + } } } diff --git a/source/org/thdl/tib/text/ttt/ACIPTshegBarScanner.java b/source/org/thdl/tib/text/ttt/ACIPTshegBarScanner.java index 9d9acc8..e370d40 100644 --- a/source/org/thdl/tib/text/ttt/ACIPTshegBarScanner.java +++ b/source/org/thdl/tib/text/ttt/ACIPTshegBarScanner.java @@ -154,7 +154,7 @@ public class ACIPTshegBarScanner { al.add(new TString("Found an open bracket within a [#COMMENT]-style comment. Brackets may not appear in comments.\n", TString.ERROR)); if (null != errors) - errors.append("Offset " + i + " or maybe " + (i-numNewlines) + ": " + errors.append("Offset " + i + ((numNewlines == 0) ? "" : (" or maybe " + (i-numNewlines))) + ": " + "Found an open bracket within a [#COMMENT]-style comment. Brackets may not appear in comments.\n"); if (maxErrors >= 0 && ++numErrors >= maxErrors) return null; } @@ -173,7 +173,7 @@ public class ACIPTshegBarScanner { al.add(new TString("Found a truly unmatched close bracket, " + s.substring(i, i+1), TString.ERROR)); if (null != errors) { - errors.append("Offset " + i + " or maybe " + (i-numNewlines) + ": " + errors.append("Offset " + i + ((numNewlines == 0) ? "" : (" or maybe " + (i-numNewlines))) + ": " + "Found a truly unmatched close bracket, ] or }.\n"); } if (maxErrors >= 0 && ++numErrors >= maxErrors) return null; @@ -182,7 +182,7 @@ public class ACIPTshegBarScanner { al.add(new TString("Found a closing bracket without a matching open bracket. Perhaps a [#COMMENT] incorrectly written as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], caused this.", TString.ERROR)); if (null != errors) - errors.append("Offset " + i + " or maybe " + (i-numNewlines) + ": " + errors.append("Offset " + i + ((numNewlines == 0) ? "" : (" or maybe " + (i-numNewlines))) + ": " + "Found a closing bracket without a matching open bracket. Perhaps a [#COMMENT] incorrectly written as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], caused this.\n"); if (maxErrors >= 0 && ++numErrors >= maxErrors) return null; startOfString = i+1; @@ -438,7 +438,7 @@ public class ACIPTshegBarScanner { al.add(new TString("Found a truly unmatched open bracket, [ or {, prior to this current illegal open bracket.", TString.ERROR)); if (null != errors) { - errors.append("Offset " + i + " or maybe " + (i-numNewlines) + ": " + errors.append("Offset " + i + ((numNewlines == 0) ? "" : (" or maybe " + (i-numNewlines))) + ": " + "Found a truly unmatched open bracket, [ or {, prior to this current illegal open bracket.\n"); } if (maxErrors >= 0 && ++numErrors >= maxErrors) return null; @@ -457,7 +457,7 @@ public class ACIPTshegBarScanner { } al.add(new TString("Found an illegal open bracket (in context, this is " + inContext + "). Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open bracket?", TString.ERROR)); - errors.append("Offset " + i + " or maybe " + (i-numNewlines) + ": " + errors.append("Offset " + i + ((numNewlines == 0) ? "" : (" or maybe " + (i-numNewlines))) + ": " + "Found an illegal open bracket (in context, this is " + inContext + "). Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open bracket?\n"); if (maxErrors >= 0 && ++numErrors >= maxErrors) return null; } @@ -512,7 +512,7 @@ public class ACIPTshegBarScanner { al.add(new TString("Found an illegal at sign, @ (in context, this is " + inContext + "). This folio marker has a period, '.', at the end of it, which is illegal.", TString.ERROR)); if (null != errors) - errors.append("Offset " + i + " or maybe " + (i-numNewlines) + ": " + errors.append("Offset " + i + ((numNewlines == 0) ? "" : (" or maybe " + (i-numNewlines))) + ": " + "Found an illegal at sign, @ (in context, this is " + inContext + "). This folio marker has a period, '.', at the end of it, which is illegal.\n"); if (maxErrors >= 0 && ++numErrors >= maxErrors) return null; startOfString = i+numdigits+3; @@ -534,7 +534,7 @@ public class ACIPTshegBarScanner { al.add(new TString("Found an illegal at sign, @ (in context, this is " + inContext + "). This folio marker is not followed by whitespace, as is expected.", TString.ERROR)); if (null != errors) - errors.append("Offset " + i + " or maybe " + (i-numNewlines) + ": " + errors.append("Offset " + i + ((numNewlines == 0) ? "" : (" or maybe " + (i-numNewlines))) + ": " + "Found an illegal at sign, @ (in context, this is " + inContext + "). This folio marker is not followed by whitespace, as is expected.\n"); if (maxErrors >= 0 && ++numErrors >= maxErrors) return null; startOfString = i+1; // DLC FIXME: skip over more? @@ -630,7 +630,7 @@ public class ACIPTshegBarScanner { al.add(new TString("Found an illegal at sign, @ (in context, this is " + inContext + "). @012B is an example of a legal folio marker.", TString.ERROR)); if (null != errors) - errors.append("Offset " + i + " or maybe " + (i-numNewlines) + ": " + errors.append("Offset " + i + ((numNewlines == 0) ? "" : (" or maybe " + (i-numNewlines))) + ": " + "Found an illegal at sign, @ (in context, this is " + inContext + "). @012B is an example of a legal folio marker.\n"); if (maxErrors >= 0 && ++numErrors >= maxErrors) return null; startOfString = i+1; @@ -656,7 +656,7 @@ public class ACIPTshegBarScanner { al.add(new TString("Found //, which could be legal (the Unicode would be \\u0F3C\\u0F3D), but is likely in an illegal construct like //NYA\\\\.", TString.ERROR)); if (errors != null) { - errors.append("Offset " + i + " or maybe " + (i-numNewlines) + ": " + errors.append("Offset " + i + ((numNewlines == 0) ? "" : (" or maybe " + (i-numNewlines))) + ": " + "Found //, which could be legal (the Unicode would be \\u0F3C\\u0F3D), but is likely in an illegal construct like //NYA\\\\.\n"); } if (maxErrors >= 0 && ++numErrors >= maxErrors) return null; @@ -692,7 +692,7 @@ public class ACIPTshegBarScanner { al.add(new TString("Found an illegal open parenthesis, (. Nesting of parentheses is not allowed.", TString.ERROR)); if (null != errors) - errors.append("Offset " + i + " or maybe " + (i-numNewlines) + ": " + errors.append("Offset " + i + ((numNewlines == 0) ? "" : (" or maybe " + (i-numNewlines))) + ": " + "Found an illegal open parenthesis, (. Nesting of parentheses is not allowed.\n"); if (maxErrors >= 0 && ++numErrors >= maxErrors) return null; } else { @@ -706,7 +706,7 @@ public class ACIPTshegBarScanner { al.add(new TString("Unexpected closing parenthesis, ), found.", TString.ERROR)); if (null != errors) - errors.append("Offset " + i + " or maybe " + (i-numNewlines) + ": " + errors.append("Offset " + i + ((numNewlines == 0) ? "" : (" or maybe " + (i-numNewlines))) + ": " + "Unexpected closing parenthesis, ), found.\n"); if (maxErrors >= 0 && ++numErrors >= maxErrors) return null; } else { @@ -851,19 +851,19 @@ public class ACIPTshegBarScanner { al.add(new TString("Found an illegal, unprintable character.", TString.ERROR)); if (null != errors) - errors.append("Offset " + i + " or maybe " + (i-numNewlines) + ": " + errors.append("Offset " + i + ((numNewlines == 0) ? "" : (" or maybe " + (i-numNewlines))) + ": " + "Found an illegal, unprintable character.\n"); } else if ('\\' == ch) { al.add(new TString("Found a Sanskrit virama, \\, but the converter currently doesn't treat these properly. Sorry! Please do complain to the maintainers.", TString.ERROR)); if (null != errors) - errors.append("Offset " + i + " or maybe " + (i-numNewlines) + ": " + errors.append("Offset " + i + ((numNewlines == 0) ? "" : (" or maybe " + (i-numNewlines))) + ": " + "Found a Sanskrit virama, \\, but the converter currently doesn't treat these properly. Sorry! Please do complain to the maintainers.\n"); } else { al.add(new TString("Found an illegal character, " + ch + ", with ordinal " + (int)ch + ".", TString.ERROR)); if (null != errors) - errors.append("Offset " + i + " or maybe " + (i-numNewlines) + ": " + errors.append("Offset " + i + ((numNewlines == 0) ? "" : (" or maybe " + (i-numNewlines))) + ": " + "Found an illegal character, " + ch + ", with ordinal " + (int)ch + ".\n"); } if (maxErrors >= 0 && ++numErrors >= maxErrors) return null; diff --git a/source/org/thdl/tib/text/ttt/PackageTest.java b/source/org/thdl/tib/text/ttt/PackageTest.java index b0e2ca5..df8d864 100644 --- a/source/org/thdl/tib/text/ttt/PackageTest.java +++ b/source/org/thdl/tib/text/ttt/PackageTest.java @@ -7203,28 +7203,28 @@ tstHelper("ZUR"); shelp("", "", "[]"); shelp("[DD]", ""); shelp("[", - "Offset 0 or maybe 0: Found an illegal open bracket (in context, this is [). Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open bracket?\nOffset END: Truly unmatched open bracket found.\n"); + "Offset 0: Found an illegal open bracket (in context, this is [). Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open bracket?\nOffset END: Truly unmatched open bracket found.\n"); shelp("{", - "Offset 0 or maybe 0: Found an illegal open bracket (in context, this is {). Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open bracket?\nOffset END: Truly unmatched open bracket found.\n"); + "Offset 0: Found an illegal open bracket (in context, this is {). Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open bracket?\nOffset END: Truly unmatched open bracket found.\n"); shelp("DD", ""); shelp("DD]", - "Offset 2 or maybe 2: Found a truly unmatched close bracket, ] or }.\nOffset 2 or maybe 2: Found a closing bracket without a matching open bracket. Perhaps a [#COMMENT] incorrectly written as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], caused this.\n"); + "Offset 2: Found a truly unmatched close bracket, ] or }.\nOffset 2: Found a closing bracket without a matching open bracket. Perhaps a [#COMMENT] incorrectly written as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], caused this.\n"); - shelp("///NYA", "Offset 1 or maybe 1: Found //, which could be legal (the Unicode would be \\u0F3C\\u0F3D), but is likely in an illegal construct like //NYA\\\\.\nOffset END: Slashes are supposed to occur in pairs, but the input had an unmatched '/' character.\n"); + shelp("///NYA", "Offset 1: Found //, which could be legal (the Unicode would be \\u0F3C\\u0F3D), but is likely in an illegal construct like //NYA\\\\.\nOffset END: Slashes are supposed to occur in pairs, but the input had an unmatched '/' character.\n"); shelp("/NYA/", ""); shelp("[?][BP][LS][DD1][DD2][DDD][DR][# (<{A COMMENT)}>]", ""); shelp("[LS][# A [[[[[COMMENT][LS]", - "Offset 9 or maybe 9: Found an open bracket within a [#COMMENT]-style comment. Brackets may not appear in comments.\n" - + "Offset 10 or maybe 10: Found an open bracket within a [#COMMENT]-style comment. Brackets may not appear in comments.\n" - + "Offset 11 or maybe 11: Found an open bracket within a [#COMMENT]-style comment. Brackets may not appear in comments.\n" - + "Offset 12 or maybe 12: Found an open bracket within a [#COMMENT]-style comment. Brackets may not appear in comments.\n" - + "Offset 13 or maybe 13: Found an open bracket within a [#COMMENT]-style comment. Brackets may not appear in comments.\n"); + "Offset 9: Found an open bracket within a [#COMMENT]-style comment. Brackets may not appear in comments.\n" + + "Offset 10: Found an open bracket within a [#COMMENT]-style comment. Brackets may not appear in comments.\n" + + "Offset 11: Found an open bracket within a [#COMMENT]-style comment. Brackets may not appear in comments.\n" + + "Offset 12: Found an open bracket within a [#COMMENT]-style comment. Brackets may not appear in comments.\n" + + "Offset 13: Found an open bracket within a [#COMMENT]-style comment. Brackets may not appear in comments.\n"); shelp("[ILLEGAL COMMENT]", - "Offset 0 or maybe 0: Found an illegal open bracket (in context, this is [ILLEGAL C...). Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open bracket?\nOffset 16 or maybe 16: Found a closing bracket without a matching open bracket. Perhaps a [#COMMENT] incorrectly written as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], caused this.\n"); + "Offset 0: Found an illegal open bracket (in context, this is [ILLEGAL C...). Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open bracket?\nOffset 16: Found a closing bracket without a matching open bracket. Perhaps a [#COMMENT] incorrectly written as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], caused this.\n"); shelp("(BSKYABS GRO)", ""); - shelp("BSKYABS GRO)", "Offset 11 or maybe 11: Unexpected closing parenthesis, ), found.\n"); + shelp("BSKYABS GRO)", "Offset 11: Unexpected closing parenthesis, ), found.\n"); shelp("BSKYABS GRO(", "Offset END: Unmatched open parenthesis, (, found.\n"); - shelp("((NESTAGE))", "Offset 1 or maybe 1: Found an illegal open parenthesis, (. Nesting of parentheses is not allowed.\nOffset 10 or maybe 10: Unexpected closing parenthesis, ), found.\n"); + shelp("((NESTAGE))", "Offset 1: Found an illegal open parenthesis, (. Nesting of parentheses is not allowed.\nOffset 10: Unexpected closing parenthesis, ), found.\n"); shelp("(BA)(PA)NYA(CA)", ""); shelp("NYAx", ""); shelp("NYA x", ""); @@ -7253,9 +7253,9 @@ tstHelper("ZUR"); shelp("(NYA ", "Offset END: Unmatched open parenthesis, (, found.\n"); shelp("[*NYA ", "Offset END: Unmatched open bracket found. A correction does not terminate.\n"); shelp("?", "", "[QUESTION:{?}]"); - shelp("KHAN~ BAR ", "Offset 4 or maybe 4: Found an illegal character, ~, with ordinal 126.\n"); + shelp("KHAN~ BAR ", "Offset 4: Found an illegal character, ~, with ordinal 126.\n"); shelp("[* Correction with []]", - "Offset 5 or maybe 5: Found an illegal character, r, with ordinal 114.\nOffset 6 or maybe 6: Found an illegal character, r, with ordinal 114.\nOffset 7 or maybe 7: Found an illegal character, e, with ordinal 101.\nOffset 8 or maybe 8: Found an illegal character, c, with ordinal 99.\nOffset 14 or maybe 14: Found an illegal character, w, with ordinal 119.\nOffset 19 or maybe 19: Found an illegal open bracket (in context, this is []]). Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open bracket?\nOffset 21 or maybe 21: Found a closing bracket without a matching open bracket. Perhaps a [#COMMENT] incorrectly written as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], caused this.\n"); + "Offset 5: Found an illegal character, r, with ordinal 114.\nOffset 6: Found an illegal character, r, with ordinal 114.\nOffset 7: Found an illegal character, e, with ordinal 101.\nOffset 8: Found an illegal character, c, with ordinal 99.\nOffset 14: Found an illegal character, w, with ordinal 119.\nOffset 19: Found an illegal open bracket (in context, this is []]). Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open bracket?\nOffset 21: Found a closing bracket without a matching open bracket. Perhaps a [#COMMENT] incorrectly written as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], caused this.\n"); // DLC DOC: the line SDIG PA'I GROGS PO'I LAG TU SON PAR 'GYUR PA is followed by a blank line. Note that it's "PA", not "PA ", ending it. We autocorrect to the latter. @@ -7271,7 +7271,7 @@ tstHelper("ZUR"); uhelp(" 1\\ ", "\u0f0b\u0f21\u0f84\u0f0b"); } shelp("K\\,", - "Offset 1 or maybe 1: Found a Sanskrit virama, \\, but the converter currently doesn't treat these properly. Sorry! Please do complain to the maintainers.\n", + "Offset 1: Found a Sanskrit virama, \\, but the converter currently doesn't treat these properly. Sorry! Please do complain to the maintainers.\n", "[TIBETAN_NON_PUNCTUATION:{K}, ERROR:{Found a Sanskrit virama, \\, but the converter currently doesn't treat these properly. Sorry! Please do complain to the maintainers.}, TIBETAN_PUNCTUATION:{,}]"); @@ -7315,14 +7315,14 @@ tstHelper("ZUR"); shelp("@01A.3 ", "", "[FOLIO_MARKER:{@01A.3}, TIBETAN_PUNCTUATION:{ }]"); shelp("@001 ", "", "[FOLIO_MARKER:{@001}, TIBETAN_PUNCTUATION:{ }]"); shelp("@19-20A", - "Offset 0 or maybe 0: Found an illegal at sign, @ (in context, this is @19-20A). @012B is an example of a legal folio marker.\n", + "Offset 0: Found an illegal at sign, @ (in context, this is @19-20A). @012B is an example of a legal folio marker.\n", "[ERROR:{Found an illegal at sign, @ (in context, this is @19-20A). @012B is an example of a legal folio marker.}, TIBETAN_NON_PUNCTUATION:{19-20A}]"); // DLC FIXME: yes it occurs in the kangyur. shelp("@[7B]", ""); shelp("@012A.3KA", "", "[FOLIO_MARKER:{@012A.3}, TIBETAN_NON_PUNCTUATION:{KA}]"); shelp("@012A.34", - "Offset 0 or maybe 0: Found an illegal at sign, @ (in context, this is @012A.34). This folio marker has a period, '.', at the end of it, which is illegal.\n", + "Offset 0: Found an illegal at sign, @ (in context, this is @012A.34). This folio marker has a period, '.', at the end of it, which is illegal.\n", "[ERROR:{Found an illegal at sign, @ (in context, this is @012A.34). This folio marker has a period, '.', at the end of it, which is illegal.}, TIBETAN_NON_PUNCTUATION:{34}]"); shelp("@[07B]", ""); shelp("@[00007B]", ""); @@ -7339,7 +7339,7 @@ tstHelper("ZUR"); shelp("{ DD }", "", "[DD:{{ DD }}]"); // TD3790E2.ACT shelp("{ BP }", "", "[BP:{{ BP }}]"); // TD3790E2.ACT shelp("//NYA\\\\", - "Offset 1 or maybe 1: Found //, which could be legal (the Unicode would be \\u0F3C\\u0F3D), but is likely in an illegal construct like //NYA\\\\.\nOffset 5 or maybe 5: Found a Sanskrit virama, \\, but the converter currently doesn't treat these properly. Sorry! Please do complain to the maintainers.\nOffset 6 or maybe 6: Found a Sanskrit virama, \\, but the converter currently doesn't treat these properly. Sorry! Please do complain to the maintainers.\n", + "Offset 1: Found //, which could be legal (the Unicode would be \\u0F3C\\u0F3D), but is likely in an illegal construct like //NYA\\\\.\nOffset 5: Found a Sanskrit virama, \\, but the converter currently doesn't treat these properly. Sorry! Please do complain to the maintainers.\nOffset 6: Found a Sanskrit virama, \\, but the converter currently doesn't treat these properly. Sorry! Please do complain to the maintainers.\n", "[START_SLASH:{/}, ERROR:{Found //, which could be legal (the Unicode would be \\u0F3C\\u0F3D), but is likely in an illegal construct like //NYA\\\\.}, END_SLASH:{/}, TIBETAN_NON_PUNCTUATION:{NYA}, ERROR:{Found a Sanskrit virama, \\, but the converter currently doesn't treat these properly. Sorry! Please do complain to the maintainers.}, ERROR:{Found a Sanskrit virama, \\, but the converter currently doesn't treat these properly. Sorry! Please do complain to the maintainers.}]"); }