Andres found that "THAG PA" caused a NullPointerException. That's fixed.
Renamed ACIPString to TString -- we'll use this for EWTS and ACIP both. TMW->ACIP for TMW9.61 should work now.
This commit is contained in:
parent
c8927b827c
commit
ee50291ed4
4 changed files with 207 additions and 205 deletions
|
@ -31,7 +31,7 @@ import org.thdl.tib.text.DuffCode;
|
|||
|
||||
/**
|
||||
* This class is able to convert an ACIP file into Tibetan Machine Web
|
||||
* and an ACIP file into TMW. ACIP->Unicode should yield the same
|
||||
* and an ACIP file into Unicode. ACIP->Unicode should yield the same
|
||||
* results as ACIP->TMW followed by TMW->Unicode (FIXME: test it!)
|
||||
* @author David Chandler
|
||||
*/
|
||||
|
@ -225,15 +225,15 @@ public class ACIPConverter {
|
|||
writeWarningsToOut, warningLevel, false);
|
||||
}
|
||||
|
||||
private static boolean peekaheadFindsSpacesAndComma(ArrayList /* of ACIPString */ scan,
|
||||
private static boolean peekaheadFindsSpacesAndComma(ArrayList /* of TString */ scan,
|
||||
int pos) {
|
||||
int sz = scan.size();
|
||||
while (pos < sz) {
|
||||
ACIPString s = (ACIPString)scan.get(pos++);
|
||||
if (s.getType() == ACIPString.TIBETAN_PUNCTUATION && s.getText().equals(" ")) {
|
||||
TString s = (TString)scan.get(pos++);
|
||||
if (s.getType() == TString.TIBETAN_PUNCTUATION && s.getText().equals(" ")) {
|
||||
// keep going
|
||||
} else {
|
||||
if (s.getType() == ACIPString.TIBETAN_PUNCTUATION && s.getText().equals(",")) {
|
||||
if (s.getType() == TString.TIBETAN_PUNCTUATION && s.getText().equals(",")) {
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
|
@ -286,16 +286,16 @@ public class ACIPConverter {
|
|||
Color lastColor = Color.BLACK;
|
||||
Color color = Color.BLACK;
|
||||
for (int i = 0; i < sz; i++) {
|
||||
ACIPString s = (ACIPString)scan.get(i);
|
||||
TString s = (TString)scan.get(i);
|
||||
int stype = s.getType();
|
||||
if (stype == ACIPString.ERROR) {
|
||||
if (stype == TString.ERROR) {
|
||||
lastGuyWasNonPunct = false;
|
||||
lastGuy = null;
|
||||
hasErrors = true;
|
||||
String text = "[#ERROR CONVERTING ACIP DOCUMENT: Lexical error: " + s.getText() + "]";
|
||||
if (null != writer) writer.write(text);
|
||||
if (null != tdoc) tdoc.appendRoman(text, Color.RED);
|
||||
} else if (stype == ACIPString.TSHEG_BAR_ADORNMENT) {
|
||||
} else if (stype == TString.TSHEG_BAR_ADORNMENT) {
|
||||
if (lastGuyWasNonPunct) {
|
||||
String err = "[#ERROR CONVERTING ACIP DOCUMENT: This converter cannot yet convert " + s.getText() + " because the converter's author is unclear what the result should be.]";
|
||||
if (null != writer) {
|
||||
|
@ -322,7 +322,7 @@ public class ACIPConverter {
|
|||
}
|
||||
lastGuyWasNonPunct = true; // this stuff is not really punctuation
|
||||
lastGuy = null;
|
||||
} else if (stype == ACIPString.WARNING) {
|
||||
} else if (stype == TString.WARNING) {
|
||||
lastGuyWasNonPunct = false;
|
||||
lastGuy = null;
|
||||
if (writeWarningsToOut) {
|
||||
|
@ -341,15 +341,15 @@ public class ACIPConverter {
|
|||
lastGuyWasNonPunct = false;
|
||||
lastGuy = null;
|
||||
String text
|
||||
= (((stype == ACIPString.FOLIO_MARKER) ? "{" : "")
|
||||
= (((stype == TString.FOLIO_MARKER) ? "{" : "")
|
||||
+ s.getText()
|
||||
+ ((stype == ACIPString.FOLIO_MARKER) ? "}" : ""));
|
||||
+ ((stype == TString.FOLIO_MARKER) ? "}" : ""));
|
||||
if (null != writer) writer.write(text);
|
||||
if (null != tdoc) tdoc.appendRoman(text, Color.BLACK);
|
||||
} else {
|
||||
String unicode = null;
|
||||
DuffCode[] duff = null;
|
||||
if (stype == ACIPString.TIBETAN_NON_PUNCTUATION) {
|
||||
if (stype == TString.TIBETAN_NON_PUNCTUATION) {
|
||||
lastGuyWasNonPunct = true;
|
||||
TPairList pl = TPairListFactory.breakACIPIntoChunks(s.getText());
|
||||
String acipError;
|
||||
|
@ -424,13 +424,13 @@ public class ACIPConverter {
|
|||
}
|
||||
} else {
|
||||
color = Color.BLACK;
|
||||
if (stype == ACIPString.START_SLASH) {
|
||||
if (stype == TString.START_SLASH) {
|
||||
if (null != writer) unicode = "\u0F3C";
|
||||
if (null != tdoc) duff = new DuffCode[] { TibetanMachineWeb.getGlyph("(") };
|
||||
} else if (stype == ACIPString.END_SLASH) {
|
||||
} else if (stype == TString.END_SLASH) {
|
||||
if (null != writer) unicode = "\u0F3D";
|
||||
if (null != tdoc) duff = new DuffCode[] { TibetanMachineWeb.getGlyph(")") };
|
||||
} else if (stype == ACIPString.TIBETAN_PUNCTUATION) {
|
||||
} else if (stype == TString.TIBETAN_PUNCTUATION) {
|
||||
// For ACIP, tshegs are used as both
|
||||
// tshegs and whitespace. We treat a
|
||||
// space as a tsheg if and only if it
|
||||
|
@ -452,7 +452,8 @@ public class ACIPConverter {
|
|||
// space.
|
||||
&& ((lpl.get(0).getLeft().equals("G")
|
||||
|| lpl.get(0).getLeft().equals("K"))
|
||||
&& (lpl.get(0).getRight().indexOf('U') < 0))
|
||||
&& (null == lpl.get(0).getRight()
|
||||
|| lpl.get(0).getRight().indexOf('U') < 0))
|
||||
&&
|
||||
// it's (G . anything)
|
||||
// followed by some number of
|
||||
|
@ -500,12 +501,12 @@ public class ACIPConverter {
|
|||
}
|
||||
}
|
||||
}
|
||||
} else if (stype == ACIPString.START_PAREN) {
|
||||
} else if (stype == TString.START_PAREN) {
|
||||
if (null != tdoc) {
|
||||
tdoc.setTibetanFontSize(smallFontSize);
|
||||
}
|
||||
continue;
|
||||
} else if (stype == ACIPString.END_PAREN) {
|
||||
} else if (stype == TString.END_PAREN) {
|
||||
if (null != tdoc) {
|
||||
tdoc.setTibetanFontSize(regularFontSize);
|
||||
}
|
||||
|
|
|
@ -174,6 +174,7 @@ public class ACIPRules {
|
|||
if (null == wylieToACIP) {
|
||||
wylieToACIP = new HashMap(75);
|
||||
wylieToACIP.put("_", " "); // oddball.
|
||||
wylieToACIP.put("o'i", "O'I"); // oddball for TMW9.61.
|
||||
}
|
||||
wylieToACIP.put(EWTS, ACIP);
|
||||
}
|
||||
|
|
|
@ -70,7 +70,7 @@ public class ACIPTshegBarScanner {
|
|||
|
||||
/** Scans an ACIP file with path fname into tsheg bars. If errors
|
||||
* is non-null, error messages will be appended to it. Returns a
|
||||
* list of ACIPStrings that is the scan. <p>FIXME: not so
|
||||
* list of TStrings that is the scan. <p>FIXME: not so
|
||||
* efficient; copies the whole file into memory first.
|
||||
* @throws IOException if we cannot read in the ACIP input file */
|
||||
public static ArrayList scanFile(String fname, StringBuffer errors, int maxErrors)
|
||||
|
@ -83,7 +83,7 @@ public class ACIPTshegBarScanner {
|
|||
/** Scans a stream of ACIP into tsheg bars. If errors is
|
||||
* non-null, error messages will be appended to it. You can
|
||||
* recover both errors and warnings (modulo offset information)
|
||||
* from the result, though. Returns a list of ACIPStrings that
|
||||
* from the result, though. Returns a list of TStrings that
|
||||
* is the scan, or null if more than maxErrors occur. <p>FIXME:
|
||||
* not so efficient; copies the whole file into memory first.
|
||||
* @throws IOException if we cannot read the whole ACIP stream */
|
||||
|
@ -104,7 +104,7 @@ public class ACIPTshegBarScanner {
|
|||
return scan(s.toString(), errors, maxErrors);
|
||||
}
|
||||
|
||||
/** Returns a list of {@link ACIPString ACIPStrings} corresponding
|
||||
/** Returns a list of {@link TString TStrings} corresponding
|
||||
* to s, possibly the empty list (when the empty string is the
|
||||
* input). Each String is either a Latin comment, some Latin
|
||||
* text, a tsheg bar (minus the tsheg or shad or whatever), a
|
||||
|
@ -112,16 +112,16 @@ public class ACIPTshegBarScanner {
|
|||
*
|
||||
* <p>This not only scans; it finds all the errors and warnings a
|
||||
* parser would too, like "NYA x" and "(" and ")" and "/NYA" etc.
|
||||
* It puts those in as ACIPStrings with type {@link
|
||||
* ACIPString#ERROR} or {@link ACIPString#WARNING}, and also, if
|
||||
* It puts those in as TStrings with type {@link
|
||||
* TString#ERROR} or {@link TString#WARNING}, and also, if
|
||||
* errors is non-null, appends helpful messages to errors, each
|
||||
* followed by a '\n'.
|
||||
* @param s the ACIP text
|
||||
* @param errors if non-null, the buffer to which to append error
|
||||
* messages (DLC FIXME: cludge, just get this info by scanning
|
||||
* the result for ACIPString.ERROR (and maybe ACIPString.WARNING,
|
||||
* the result for TString.ERROR (and maybe TString.WARNING,
|
||||
* if you care about warnings), but then we'd have to put the
|
||||
* Offset info in the ACIPString)
|
||||
* Offset info in the TString)
|
||||
* @param maxErrors if nonnegative, then scanning will stop when
|
||||
* more than maxErrors errors occur. In this event, null is
|
||||
* returned.
|
||||
|
@ -138,7 +138,7 @@ public class ACIPTshegBarScanner {
|
|||
|
||||
boolean waitingForMatchingIllegalClose = false;
|
||||
int sl = s.length();
|
||||
int currentType = ACIPString.ERROR;
|
||||
int currentType = TString.ERROR;
|
||||
int startOfString = 0;
|
||||
Stack bracketTypeStack = new Stack();
|
||||
int startSlashIndex = -1;
|
||||
|
@ -149,10 +149,10 @@ public class ACIPTshegBarScanner {
|
|||
char ch;
|
||||
ch = s.charAt(i);
|
||||
if (ch == '\n') ++numNewlines;
|
||||
if (ACIPString.COMMENT == currentType && ch != ']') {
|
||||
if (TString.COMMENT == currentType && ch != ']') {
|
||||
if ('[' == ch) {
|
||||
al.add(new ACIPString("Found an open bracket within a [#COMMENT]-style comment. Brackets may not appear in comments.\n",
|
||||
ACIPString.ERROR));
|
||||
al.add(new TString("Found an open bracket within a [#COMMENT]-style comment. Brackets may not appear in comments.\n",
|
||||
TString.ERROR));
|
||||
if (null != errors)
|
||||
errors.append("Offset " + i + " or maybe " + (i-numNewlines) + ": "
|
||||
+ "Found an open bracket within a [#COMMENT]-style comment. Brackets may not appear in comments.\n");
|
||||
|
@ -166,12 +166,12 @@ public class ACIPTshegBarScanner {
|
|||
if (bracketTypeStack.empty()) {
|
||||
// Error.
|
||||
if (startOfString < i) {
|
||||
al.add(new ACIPString(s.substring(startOfString, i),
|
||||
currentType));
|
||||
al.add(new TString(s.substring(startOfString, i),
|
||||
currentType));
|
||||
}
|
||||
if (!waitingForMatchingIllegalClose) {
|
||||
al.add(new ACIPString("Found a truly unmatched close bracket, " + s.substring(i, i+1),
|
||||
ACIPString.ERROR));
|
||||
al.add(new TString("Found a truly unmatched close bracket, " + s.substring(i, i+1),
|
||||
TString.ERROR));
|
||||
if (null != errors) {
|
||||
errors.append("Offset " + i + " or maybe " + (i-numNewlines) + ": "
|
||||
+ "Found a truly unmatched close bracket, ] or }.\n");
|
||||
|
@ -179,19 +179,19 @@ public class ACIPTshegBarScanner {
|
|||
if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
|
||||
}
|
||||
waitingForMatchingIllegalClose = false;
|
||||
al.add(new ACIPString("Found a closing bracket without a matching open bracket. Perhaps a [#COMMENT] incorrectly written as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], caused this.",
|
||||
ACIPString.ERROR));
|
||||
al.add(new TString("Found a closing bracket without a matching open bracket. Perhaps a [#COMMENT] incorrectly written as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], caused this.",
|
||||
TString.ERROR));
|
||||
if (null != errors)
|
||||
errors.append("Offset " + i + " or maybe " + (i-numNewlines) + ": "
|
||||
+ "Found a closing bracket without a matching open bracket. Perhaps a [#COMMENT] incorrectly written as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], caused this.\n");
|
||||
if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
|
||||
startOfString = i+1;
|
||||
currentType = ACIPString.ERROR;
|
||||
currentType = TString.ERROR;
|
||||
} else {
|
||||
int stackTop = ((Integer)bracketTypeStack.pop()).intValue();
|
||||
|
||||
int end = startOfString;
|
||||
if (ACIPString.CORRECTION_START == stackTop) {
|
||||
if (TString.CORRECTION_START == stackTop) {
|
||||
|
||||
// This definitely indicates a new token.
|
||||
char prevCh = s.charAt(i-1);
|
||||
|
@ -200,19 +200,19 @@ public class ACIPTshegBarScanner {
|
|||
else
|
||||
end = i;
|
||||
if (startOfString < end) {
|
||||
al.add(new ACIPString(s.substring(startOfString, end),
|
||||
currentType));
|
||||
al.add(new TString(s.substring(startOfString, end),
|
||||
currentType));
|
||||
}
|
||||
|
||||
if ('?' != prevCh) {
|
||||
currentType = ACIPString.PROBABLE_CORRECTION;
|
||||
currentType = TString.PROBABLE_CORRECTION;
|
||||
} else {
|
||||
currentType = ACIPString.POSSIBLE_CORRECTION;
|
||||
currentType = TString.POSSIBLE_CORRECTION;
|
||||
}
|
||||
}
|
||||
al.add(new ACIPString(s.substring(end, i+1), currentType));
|
||||
al.add(new TString(s.substring(end, i+1), currentType));
|
||||
startOfString = i+1;
|
||||
currentType = ACIPString.ERROR;
|
||||
currentType = TString.ERROR;
|
||||
}
|
||||
break; // end ']','}' case
|
||||
|
||||
|
@ -222,10 +222,10 @@ public class ACIPTshegBarScanner {
|
|||
case '[':
|
||||
// This definitely indicates a new token.
|
||||
if (startOfString < i) {
|
||||
al.add(new ACIPString(s.substring(startOfString, i),
|
||||
currentType));
|
||||
al.add(new TString(s.substring(startOfString, i),
|
||||
currentType));
|
||||
startOfString = i;
|
||||
currentType = ACIPString.ERROR;
|
||||
currentType = TString.ERROR;
|
||||
}
|
||||
String thingy = null;
|
||||
|
||||
|
@ -233,57 +233,57 @@ public class ACIPTshegBarScanner {
|
|||
&& (s.substring(i, i + "[DD]".length()).equals("[DD]")
|
||||
|| s.substring(i, i + "[DD]".length()).equals("{DD}"))) {
|
||||
thingy = "[DD]";
|
||||
currentType = ACIPString.DD;
|
||||
currentType = TString.DD;
|
||||
} else if (i + "[DD1]".length() <= sl
|
||||
&& (s.substring(i, i + "[DD1]".length()).equals("[DD1]")
|
||||
|| s.substring(i, i + "[DD1]".length()).equals("{DD1}"))) {
|
||||
thingy = "[DD1]";
|
||||
currentType = ACIPString.DD;
|
||||
currentType = TString.DD;
|
||||
} else if (i + "[DD2]".length() <= sl
|
||||
&& (s.substring(i, i + "[DD2]".length()).equals("[DD2]")
|
||||
|| s.substring(i, i + "[DD2]".length()).equals("{DD2}"))) {
|
||||
thingy = "[DD2]";
|
||||
currentType = ACIPString.DD;
|
||||
currentType = TString.DD;
|
||||
} else if (i + "[DDD]".length() <= sl
|
||||
&& (s.substring(i, i + "[DDD]".length()).equals("[DDD]")
|
||||
|| s.substring(i, i + "[DDD]".length()).equals("{DDD}"))) {
|
||||
thingy = "[DDD]";
|
||||
currentType = ACIPString.DD;
|
||||
currentType = TString.DD;
|
||||
} else if (i + "[DR]".length() <= sl
|
||||
&& (s.substring(i, i + "[DR]".length()).equals("[DR]")
|
||||
|| s.substring(i, i + "[DR]".length()).equals("{DR}"))) {
|
||||
thingy = "[DR]";
|
||||
currentType = ACIPString.DR;
|
||||
currentType = TString.DR;
|
||||
} else if (i + "[LS]".length() <= sl
|
||||
&& (s.substring(i, i + "[LS]".length()).equals("[LS]")
|
||||
|| s.substring(i, i + "[LS]".length()).equals("{LS}"))) {
|
||||
thingy = "[LS]";
|
||||
currentType = ACIPString.LS;
|
||||
currentType = TString.LS;
|
||||
} else if (i + "[BP]".length() <= sl
|
||||
&& (s.substring(i, i + "[BP]".length()).equals("[BP]")
|
||||
|| s.substring(i, i + "[BP]".length()).equals("{BP}"))) {
|
||||
thingy = "[BP]";
|
||||
currentType = ACIPString.BP;
|
||||
currentType = TString.BP;
|
||||
} else if (i + "[BLANK PAGE]".length() <= sl
|
||||
&& (s.substring(i, i + "[BLANK PAGE]".length()).equals("[BLANK PAGE]")
|
||||
|| s.substring(i, i + "[BLANK PAGE]".length()).equals("{BLANK PAGE}"))) {
|
||||
thingy = "[BLANK PAGE]";
|
||||
currentType = ACIPString.BP;
|
||||
currentType = TString.BP;
|
||||
} else if (i + "[ BP ]".length() <= sl
|
||||
&& (s.substring(i, i + "[ BP ]".length()).equals("[ BP ]")
|
||||
|| s.substring(i, i + "[ BP ]".length()).equals("{ BP }"))) {
|
||||
thingy = "{ BP }"; // found in TD3790E2.ACT
|
||||
currentType = ACIPString.BP;
|
||||
currentType = TString.BP;
|
||||
} else if (i + "[ DD ]".length() <= sl
|
||||
&& (s.substring(i, i + "[ DD ]".length()).equals("[ DD ]")
|
||||
|| s.substring(i, i + "[ DD ]".length()).equals("{ DD }"))) {
|
||||
thingy = "{ DD }"; // found in TD3790E2.ACT
|
||||
currentType = ACIPString.DD;
|
||||
currentType = TString.DD;
|
||||
} else if (i + "[?]".length() <= sl
|
||||
&& (s.substring(i, i + "[?]".length()).equals("[?]")
|
||||
|| s.substring(i, i + "[?]".length()).equals("{?}"))) {
|
||||
thingy = "[?]";
|
||||
currentType = ACIPString.QUESTION;
|
||||
currentType = TString.QUESTION;
|
||||
} else {
|
||||
// We see comments appear not as [#COMMENT], but
|
||||
// as [COMMENT] sometimes. We make special cases
|
||||
|
@ -329,8 +329,8 @@ public class ACIPTshegBarScanner {
|
|||
if (i + 2 + englishComments[ec].length() <= sl
|
||||
&& (s.substring(i, i + 2 + englishComments[ec].length()).equals("[" + englishComments[ec] + "]")
|
||||
|| s.substring(i, i + 2 + englishComments[ec].length()).equals("[" + englishComments[ec] + "]"))) {
|
||||
al.add(new ACIPString("[#" + englishComments[ec] + "]",
|
||||
ACIPString.COMMENT));
|
||||
al.add(new TString("[#" + englishComments[ec] + "]",
|
||||
TString.COMMENT));
|
||||
startOfString = i + 2 + englishComments[ec].length();
|
||||
i = startOfString - 1;
|
||||
foundOne = true;
|
||||
|
@ -386,16 +386,16 @@ public class ACIPTshegBarScanner {
|
|||
= s.substring(begin, realEnd);
|
||||
for (int ec = 0; ec < englishCorrections.length; ec++) {
|
||||
if (interestingSubstring.startsWith(englishCorrections[ec])) {
|
||||
al.add(new ACIPString(s.substring(i, i+2),
|
||||
ACIPString.CORRECTION_START));
|
||||
al.add(new ACIPString(s.substring(i+2, realEnd),
|
||||
ACIPString.LATIN));
|
||||
al.add(new TString(s.substring(i, i+2),
|
||||
TString.CORRECTION_START));
|
||||
al.add(new TString(s.substring(i+2, realEnd),
|
||||
TString.LATIN));
|
||||
if (s.charAt(end - 1) == '?') {
|
||||
al.add(new ACIPString(s.substring(end-1, end+1),
|
||||
ACIPString.POSSIBLE_CORRECTION));
|
||||
al.add(new TString(s.substring(end-1, end+1),
|
||||
TString.POSSIBLE_CORRECTION));
|
||||
} else {
|
||||
al.add(new ACIPString(s.substring(end, end+1),
|
||||
ACIPString.PROBABLE_CORRECTION));
|
||||
al.add(new TString(s.substring(end, end+1),
|
||||
TString.PROBABLE_CORRECTION));
|
||||
}
|
||||
foundOne = true;
|
||||
startOfString = end+1;
|
||||
|
@ -409,24 +409,24 @@ public class ACIPTshegBarScanner {
|
|||
break;
|
||||
}
|
||||
if (null != thingy) {
|
||||
al.add(new ACIPString(thingy,
|
||||
currentType));
|
||||
al.add(new TString(thingy,
|
||||
currentType));
|
||||
startOfString = i + thingy.length();
|
||||
i = startOfString - 1;
|
||||
} else {
|
||||
if (i + 1 < sl) {
|
||||
char nextCh = s.charAt(i+1);
|
||||
if ('*' == nextCh) {
|
||||
currentType = ACIPString.CORRECTION_START;
|
||||
currentType = TString.CORRECTION_START;
|
||||
bracketTypeStack.push(new Integer(currentType));
|
||||
al.add(new ACIPString(s.substring(i, i+2),
|
||||
ACIPString.CORRECTION_START));
|
||||
currentType = ACIPString.ERROR;
|
||||
al.add(new TString(s.substring(i, i+2),
|
||||
TString.CORRECTION_START));
|
||||
currentType = TString.ERROR;
|
||||
startOfString = i+2;
|
||||
i = startOfString - 1;
|
||||
break;
|
||||
} else if ('#' == nextCh) {
|
||||
currentType = ACIPString.COMMENT;
|
||||
currentType = TString.COMMENT;
|
||||
bracketTypeStack.push(new Integer(currentType));
|
||||
break;
|
||||
}
|
||||
|
@ -435,8 +435,8 @@ public class ACIPTshegBarScanner {
|
|||
// WITHOUT # MARKS]. Though "... [" could cause
|
||||
// this too.
|
||||
if (waitingForMatchingIllegalClose) {
|
||||
al.add(new ACIPString("Found a truly unmatched open bracket, [ or {, prior to this current illegal open bracket.",
|
||||
ACIPString.ERROR));
|
||||
al.add(new TString("Found a truly unmatched open bracket, [ or {, prior to this current illegal open bracket.",
|
||||
TString.ERROR));
|
||||
if (null != errors) {
|
||||
errors.append("Offset " + i + " or maybe " + (i-numNewlines) + ": "
|
||||
+ "Found a truly unmatched open bracket, [ or {, prior to this current illegal open bracket.\n");
|
||||
|
@ -455,24 +455,24 @@ public class ACIPTshegBarScanner {
|
|||
inContext = inContext + "...";
|
||||
}
|
||||
}
|
||||
al.add(new ACIPString("Found an illegal open bracket (in context, this is " + inContext + "). Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open bracket?",
|
||||
ACIPString.ERROR));
|
||||
al.add(new TString("Found an illegal open bracket (in context, this is " + inContext + "). Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open bracket?",
|
||||
TString.ERROR));
|
||||
errors.append("Offset " + i + " or maybe " + (i-numNewlines) + ": "
|
||||
+ "Found an illegal open bracket (in context, this is " + inContext + "). Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open bracket?\n");
|
||||
if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
|
||||
}
|
||||
startOfString = i + 1;
|
||||
currentType = ACIPString.ERROR;
|
||||
currentType = TString.ERROR;
|
||||
}
|
||||
break; // end '[','{' case
|
||||
|
||||
case '@':
|
||||
// This definitely indicates a new token.
|
||||
if (startOfString < i) {
|
||||
al.add(new ACIPString(s.substring(startOfString, i),
|
||||
currentType));
|
||||
al.add(new TString(s.substring(startOfString, i),
|
||||
currentType));
|
||||
startOfString = i;
|
||||
currentType = ACIPString.ERROR;
|
||||
currentType = TString.ERROR;
|
||||
}
|
||||
|
||||
// We look for {@N{AB}, @NN{AB}, ..., @NNNNNN{AB}},
|
||||
|
@ -509,15 +509,15 @@ public class ACIPTshegBarScanner {
|
|||
inContext = inContext + "...";
|
||||
}
|
||||
}
|
||||
al.add(new ACIPString("Found an illegal at sign, @ (in context, this is " + inContext + "). This folio marker has a period, '.', at the end of it, which is illegal.",
|
||||
ACIPString.ERROR));
|
||||
al.add(new TString("Found an illegal at sign, @ (in context, this is " + inContext + "). This folio marker has a period, '.', at the end of it, which is illegal.",
|
||||
TString.ERROR));
|
||||
if (null != errors)
|
||||
errors.append("Offset " + i + " or maybe " + (i-numNewlines) + ": "
|
||||
+ "Found an illegal at sign, @ (in context, this is " + inContext + "). This folio marker has a period, '.', at the end of it, which is illegal.\n");
|
||||
if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
|
||||
startOfString = i+numdigits+3;
|
||||
i = startOfString - 1;
|
||||
currentType = ACIPString.ERROR;
|
||||
currentType = TString.ERROR;
|
||||
break;
|
||||
}
|
||||
if (i+numdigits+4 < sl && (s.charAt(i+numdigits+4) == '.' || s.charAt(i+numdigits+4) == 'A' || s.charAt(i+numdigits+4) == 'B' || s.charAt(i+numdigits+4) == 'a' || s.charAt(i+numdigits+4) == 'b' || isNumeric(s.charAt(i+numdigits+4)))) {
|
||||
|
@ -531,25 +531,25 @@ public class ACIPTshegBarScanner {
|
|||
inContext = inContext + "...";
|
||||
}
|
||||
}
|
||||
al.add(new ACIPString("Found an illegal at sign, @ (in context, this is " + inContext + "). This folio marker is not followed by whitespace, as is expected.",
|
||||
ACIPString.ERROR));
|
||||
al.add(new TString("Found an illegal at sign, @ (in context, this is " + inContext + "). This folio marker is not followed by whitespace, as is expected.",
|
||||
TString.ERROR));
|
||||
if (null != errors)
|
||||
errors.append("Offset " + i + " or maybe " + (i-numNewlines) + ": "
|
||||
+ "Found an illegal at sign, @ (in context, this is " + inContext + "). This folio marker is not followed by whitespace, as is expected.\n");
|
||||
if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
|
||||
startOfString = i+1; // DLC FIXME: skip over more?
|
||||
currentType = ACIPString.ERROR;
|
||||
currentType = TString.ERROR;
|
||||
break;
|
||||
}
|
||||
extra = 4;
|
||||
} else {
|
||||
extra = 2;
|
||||
}
|
||||
al.add(new ACIPString(s.substring(i, i+numdigits+extra),
|
||||
ACIPString.FOLIO_MARKER));
|
||||
al.add(new TString(s.substring(i, i+numdigits+extra),
|
||||
TString.FOLIO_MARKER));
|
||||
startOfString = i+numdigits+extra;
|
||||
i = startOfString - 1;
|
||||
currentType = ACIPString.ERROR;
|
||||
currentType = TString.ERROR;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -565,11 +565,11 @@ public class ACIPTshegBarScanner {
|
|||
}
|
||||
}
|
||||
if (allAreNumeric) {
|
||||
al.add(new ACIPString(s.substring(i, i+numdigits+2),
|
||||
ACIPString.FOLIO_MARKER));
|
||||
al.add(new TString(s.substring(i, i+numdigits+2),
|
||||
TString.FOLIO_MARKER));
|
||||
startOfString = i+numdigits+2;
|
||||
i = startOfString - 1;
|
||||
currentType = ACIPString.ERROR;
|
||||
currentType = TString.ERROR;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -586,11 +586,11 @@ public class ACIPTshegBarScanner {
|
|||
}
|
||||
}
|
||||
if (allAreNumeric) {
|
||||
al.add(new ACIPString(s.substring(i, i+numdigits+4),
|
||||
ACIPString.FOLIO_MARKER));
|
||||
al.add(new TString(s.substring(i, i+numdigits+4),
|
||||
TString.FOLIO_MARKER));
|
||||
startOfString = i+numdigits+4;
|
||||
i = startOfString - 1;
|
||||
currentType = ACIPString.ERROR;
|
||||
currentType = TString.ERROR;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -607,11 +607,11 @@ public class ACIPTshegBarScanner {
|
|||
}
|
||||
}
|
||||
if (allAreNumeric) {
|
||||
al.add(new ACIPString(s.substring(i, i+numdigits+1),
|
||||
ACIPString.FOLIO_MARKER));
|
||||
al.add(new TString(s.substring(i, i+numdigits+1),
|
||||
TString.FOLIO_MARKER));
|
||||
startOfString = i+numdigits+1;
|
||||
i = startOfString - 1;
|
||||
currentType = ACIPString.ERROR;
|
||||
currentType = TString.ERROR;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -627,24 +627,24 @@ public class ACIPTshegBarScanner {
|
|||
inContext = inContext + "...";
|
||||
}
|
||||
}
|
||||
al.add(new ACIPString("Found an illegal at sign, @ (in context, this is " + inContext + "). @012B is an example of a legal folio marker.",
|
||||
ACIPString.ERROR));
|
||||
al.add(new TString("Found an illegal at sign, @ (in context, this is " + inContext + "). @012B is an example of a legal folio marker.",
|
||||
TString.ERROR));
|
||||
if (null != errors)
|
||||
errors.append("Offset " + i + " or maybe " + (i-numNewlines) + ": "
|
||||
+ "Found an illegal at sign, @ (in context, this is " + inContext + "). @012B is an example of a legal folio marker.\n");
|
||||
if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
|
||||
startOfString = i+1;
|
||||
currentType = ACIPString.ERROR;
|
||||
currentType = TString.ERROR;
|
||||
}
|
||||
break; // end '@' case
|
||||
|
||||
case '/':
|
||||
// This definitely indicates a new token.
|
||||
if (startOfString < i) {
|
||||
al.add(new ACIPString(s.substring(startOfString, i),
|
||||
currentType));
|
||||
al.add(new TString(s.substring(startOfString, i),
|
||||
currentType));
|
||||
startOfString = i;
|
||||
currentType = ACIPString.ERROR;
|
||||
currentType = TString.ERROR;
|
||||
}
|
||||
|
||||
if (startSlashIndex >= 0) {
|
||||
|
@ -653,25 +653,25 @@ public class ACIPTshegBarScanner {
|
|||
* it means /NYA/. We warn about // for this
|
||||
* reason. \\ causes a tsheg-bar error (DLC
|
||||
* FIXME: verify this is so). */
|
||||
al.add(new ACIPString("Found //, which could be legal (the Unicode would be \\u0F3C\\u0F3D), but is likely in an illegal construct like //NYA\\\\.",
|
||||
ACIPString.ERROR));
|
||||
al.add(new TString("Found //, which could be legal (the Unicode would be \\u0F3C\\u0F3D), but is likely in an illegal construct like //NYA\\\\.",
|
||||
TString.ERROR));
|
||||
if (errors != null) {
|
||||
errors.append("Offset " + i + " or maybe " + (i-numNewlines) + ": "
|
||||
+ "Found //, which could be legal (the Unicode would be \\u0F3C\\u0F3D), but is likely in an illegal construct like //NYA\\\\.\n");
|
||||
}
|
||||
if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
|
||||
}
|
||||
al.add(new ACIPString(s.substring(i, i+1),
|
||||
ACIPString.END_SLASH));
|
||||
al.add(new TString(s.substring(i, i+1),
|
||||
TString.END_SLASH));
|
||||
startOfString = i+1;
|
||||
currentType = ACIPString.ERROR;
|
||||
currentType = TString.ERROR;
|
||||
startSlashIndex = -1;
|
||||
} else {
|
||||
startSlashIndex = i;
|
||||
al.add(new ACIPString(s.substring(i, i+1),
|
||||
ACIPString.START_SLASH));
|
||||
al.add(new TString(s.substring(i, i+1),
|
||||
TString.START_SLASH));
|
||||
startOfString = i+1;
|
||||
currentType = ACIPString.ERROR;
|
||||
currentType = TString.ERROR;
|
||||
}
|
||||
break; // end '/' case
|
||||
|
||||
|
@ -679,42 +679,42 @@ public class ACIPTshegBarScanner {
|
|||
case ')':
|
||||
// This definitely indicates a new token.
|
||||
if (startOfString < i) {
|
||||
al.add(new ACIPString(s.substring(startOfString, i),
|
||||
currentType));
|
||||
al.add(new TString(s.substring(startOfString, i),
|
||||
currentType));
|
||||
startOfString = i;
|
||||
currentType = ACIPString.ERROR;
|
||||
currentType = TString.ERROR;
|
||||
}
|
||||
|
||||
// We do not support nesting like (NYA (BA)).
|
||||
|
||||
if (startParenIndex >= 0) {
|
||||
if (ch == '(') {
|
||||
al.add(new ACIPString("Found an illegal open parenthesis, (. Nesting of parentheses is not allowed.",
|
||||
ACIPString.ERROR));
|
||||
al.add(new TString("Found an illegal open parenthesis, (. Nesting of parentheses is not allowed.",
|
||||
TString.ERROR));
|
||||
if (null != errors)
|
||||
errors.append("Offset " + i + " or maybe " + (i-numNewlines) + ": "
|
||||
+ "Found an illegal open parenthesis, (. Nesting of parentheses is not allowed.\n");
|
||||
if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
|
||||
} else {
|
||||
al.add(new ACIPString(s.substring(i, i+1), ACIPString.END_PAREN));
|
||||
al.add(new TString(s.substring(i, i+1), TString.END_PAREN));
|
||||
startParenIndex = -1;
|
||||
}
|
||||
startOfString = i+1;
|
||||
currentType = ACIPString.ERROR;
|
||||
currentType = TString.ERROR;
|
||||
} else {
|
||||
if (ch == ')') {
|
||||
al.add(new ACIPString("Unexpected closing parenthesis, ), found.",
|
||||
ACIPString.ERROR));
|
||||
al.add(new TString("Unexpected closing parenthesis, ), found.",
|
||||
TString.ERROR));
|
||||
if (null != errors)
|
||||
errors.append("Offset " + i + " or maybe " + (i-numNewlines) + ": "
|
||||
+ "Unexpected closing parenthesis, ), found.\n");
|
||||
if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
|
||||
} else {
|
||||
startParenIndex = i;
|
||||
al.add(new ACIPString(s.substring(i, i+1), ACIPString.START_PAREN));
|
||||
al.add(new TString(s.substring(i, i+1), TString.START_PAREN));
|
||||
}
|
||||
startOfString = i+1;
|
||||
currentType = ACIPString.ERROR;
|
||||
currentType = TString.ERROR;
|
||||
}
|
||||
break; // end '(',')' case
|
||||
|
||||
|
@ -723,13 +723,13 @@ public class ACIPTshegBarScanner {
|
|||
|| (s.charAt(i+1) != ']' && s.charAt(i+1) != '}')) {
|
||||
// The tsheg bar ends here; new token.
|
||||
if (startOfString < i) {
|
||||
al.add(new ACIPString(s.substring(startOfString, i),
|
||||
currentType));
|
||||
al.add(new TString(s.substring(startOfString, i),
|
||||
currentType));
|
||||
}
|
||||
al.add(new ACIPString(s.substring(i, i+1),
|
||||
ACIPString.QUESTION));
|
||||
al.add(new TString(s.substring(i, i+1),
|
||||
TString.QUESTION));
|
||||
startOfString = i+1;
|
||||
currentType = ACIPString.ERROR;
|
||||
currentType = TString.ERROR;
|
||||
} // else this is [*TR'A ?] or the like.
|
||||
break; // end '?' case
|
||||
|
||||
|
@ -737,23 +737,23 @@ public class ACIPTshegBarScanner {
|
|||
case '.':
|
||||
// This definitely indicates a new token.
|
||||
if (startOfString < i) {
|
||||
al.add(new ACIPString(s.substring(startOfString, i),
|
||||
currentType));
|
||||
al.add(new TString(s.substring(startOfString, i),
|
||||
currentType));
|
||||
startOfString = i;
|
||||
currentType = ACIPString.ERROR;
|
||||
currentType = TString.ERROR;
|
||||
}
|
||||
// . is used for a non-breaking tsheg, such as in
|
||||
// {NGO.,} and {....,DAM}. We give a warning unless ,
|
||||
// or ., or [A-Za-z] follows '.'.
|
||||
al.add(new ACIPString(s.substring(i, i+1),
|
||||
ACIPString.TIBETAN_PUNCTUATION));
|
||||
al.add(new TString(s.substring(i, i+1),
|
||||
TString.TIBETAN_PUNCTUATION));
|
||||
if (!(i + 1 < sl
|
||||
&& (s.charAt(i+1) == '.' || s.charAt(i+1) == ','
|
||||
|| (s.charAt(i+1) == '\r' || s.charAt(i+1) == '\n')
|
||||
|| (s.charAt(i+1) >= 'a' && s.charAt(i+1) <= 'z')
|
||||
|| (s.charAt(i+1) >= 'A' && s.charAt(i+1) <= 'Z')))) {
|
||||
al.add(new ACIPString("A non-breaking tsheg, '.', appeared, but not like \"...,\" or \".,\" or \".dA\" or \".DA\".",
|
||||
ACIPString.WARNING));
|
||||
al.add(new TString("A non-breaking tsheg, '.', appeared, but not like \"...,\" or \".,\" or \".dA\" or \".DA\".",
|
||||
TString.WARNING));
|
||||
}
|
||||
startOfString = i+1;
|
||||
break; // end '.' case
|
||||
|
@ -775,11 +775,11 @@ public class ACIPTshegBarScanner {
|
|||
boolean legalTshegBarAdornment = false;
|
||||
// The tsheg bar ends here; new token.
|
||||
if (startOfString < i) {
|
||||
if (currentType == ACIPString.TIBETAN_NON_PUNCTUATION
|
||||
if (currentType == TString.TIBETAN_NON_PUNCTUATION
|
||||
&& isTshegBarAdornment(ch))
|
||||
legalTshegBarAdornment = true;
|
||||
al.add(new ACIPString(s.substring(startOfString, i),
|
||||
currentType));
|
||||
al.add(new TString(s.substring(startOfString, i),
|
||||
currentType));
|
||||
}
|
||||
|
||||
// Insert a tsheg if necessary. ACIP files aren't
|
||||
|
@ -788,22 +788,22 @@ public class ACIPTshegBarScanner {
|
|||
if (('\r' == ch
|
||||
|| ('\n' == ch && i > 0 && s.charAt(i - 1) != '\r'))
|
||||
&& !al.isEmpty()
|
||||
&& (((ACIPString)al.get(al.size() - 1)).getType() == ACIPString.TIBETAN_NON_PUNCTUATION
|
||||
|| ((ACIPString)al.get(al.size() - 1)).getType() == ACIPString.TSHEG_BAR_ADORNMENT)) {
|
||||
al.add(new ACIPString(" ", ACIPString.TIBETAN_PUNCTUATION));
|
||||
&& (((TString)al.get(al.size() - 1)).getType() == TString.TIBETAN_NON_PUNCTUATION
|
||||
|| ((TString)al.get(al.size() - 1)).getType() == TString.TSHEG_BAR_ADORNMENT)) {
|
||||
al.add(new TString(" ", TString.TIBETAN_PUNCTUATION));
|
||||
}
|
||||
|
||||
// "DANG,\nLHAG" is really "DANG, LHAG". But always? Not if you have "MDO,\n\nKA...".
|
||||
if (('\r' == ch
|
||||
|| ('\n' == ch && i > 0 && s.charAt(i - 1) != '\r'))
|
||||
&& !al.isEmpty()
|
||||
&& (((ACIPString)al.get(al.size() - 1)).getType() == ACIPString.TIBETAN_PUNCTUATION
|
||||
|| ((ACIPString)al.get(al.size() - 1)).getType() == ACIPString.TSHEG_BAR_ADORNMENT)
|
||||
&& ((ACIPString)al.get(al.size() - 1)).getText().equals(",")
|
||||
&& (((TString)al.get(al.size() - 1)).getType() == TString.TIBETAN_PUNCTUATION
|
||||
|| ((TString)al.get(al.size() - 1)).getType() == TString.TSHEG_BAR_ADORNMENT)
|
||||
&& ((TString)al.get(al.size() - 1)).getText().equals(",")
|
||||
&& s.charAt(i-1) == ','
|
||||
&& (i + (('\r' == ch) ? 2 : 1) < sl
|
||||
&& (s.charAt(i+(('\r' == ch) ? 2 : 1)) != ch))) {
|
||||
al.add(new ACIPString(" ", ACIPString.TIBETAN_PUNCTUATION));
|
||||
al.add(new TString(" ", TString.TIBETAN_PUNCTUATION));
|
||||
}
|
||||
|
||||
// Don't add in a "\r\n" or "\n" unless there's a
|
||||
|
@ -816,24 +816,24 @@ public class ACIPTshegBarScanner {
|
|||
|| ('\n' == ch && i >= 1 && s.charAt(i-1) == '\n')))) {
|
||||
for (int h = 0; h < (realNewline ? 2 : 1); h++) {
|
||||
if (isTshegBarAdornment(ch) && !legalTshegBarAdornment) {
|
||||
al.add(new ACIPString("The ACIP " + ch + " must be glued to the end of a tsheg bar, but this one was not",
|
||||
ACIPString.ERROR));
|
||||
al.add(new TString("The ACIP " + ch + " must be glued to the end of a tsheg bar, but this one was not",
|
||||
TString.ERROR));
|
||||
} else {
|
||||
al.add(new ACIPString(rn ? s.substring(i - 1, i+1) : s.substring(i, i+1),
|
||||
(legalTshegBarAdornment
|
||||
? ACIPString.TSHEG_BAR_ADORNMENT
|
||||
: ACIPString.TIBETAN_PUNCTUATION)));
|
||||
al.add(new TString(rn ? s.substring(i - 1, i+1) : s.substring(i, i+1),
|
||||
(legalTshegBarAdornment
|
||||
? TString.TSHEG_BAR_ADORNMENT
|
||||
: TString.TIBETAN_PUNCTUATION)));
|
||||
}
|
||||
}
|
||||
}
|
||||
startOfString = i+1;
|
||||
currentType = ACIPString.ERROR;
|
||||
currentType = TString.ERROR;
|
||||
break; // end TIBETAN_PUNCTUATION case
|
||||
|
||||
default:
|
||||
if (!bracketTypeStack.empty()) {
|
||||
int stackTop = ((Integer)bracketTypeStack.peek()).intValue();
|
||||
if (ACIPString.CORRECTION_START == stackTop && '?' == ch) {
|
||||
if (TString.CORRECTION_START == stackTop && '?' == ch) {
|
||||
// allow it through...
|
||||
break;
|
||||
}
|
||||
|
@ -844,46 +844,46 @@ public class ACIPTshegBarScanner {
|
|||
break;
|
||||
if (!(isNumeric(ch) || isAlpha(ch))) {
|
||||
if (startOfString < i) {
|
||||
al.add(new ACIPString(s.substring(startOfString, i),
|
||||
currentType));
|
||||
al.add(new TString(s.substring(startOfString, i),
|
||||
currentType));
|
||||
}
|
||||
if ((int)ch == 65533) {
|
||||
al.add(new ACIPString("Found an illegal, unprintable character.",
|
||||
ACIPString.ERROR));
|
||||
al.add(new TString("Found an illegal, unprintable character.",
|
||||
TString.ERROR));
|
||||
if (null != errors)
|
||||
errors.append("Offset " + i + " or maybe " + (i-numNewlines) + ": "
|
||||
+ "Found an illegal, unprintable character.\n");
|
||||
} else if ('\\' == ch) {
|
||||
al.add(new ACIPString("Found a Sanskrit virama, \\, but the converter currently doesn't treat these properly. Sorry! Please do complain to the maintainers.",
|
||||
ACIPString.ERROR));
|
||||
al.add(new TString("Found a Sanskrit virama, \\, but the converter currently doesn't treat these properly. Sorry! Please do complain to the maintainers.",
|
||||
TString.ERROR));
|
||||
if (null != errors)
|
||||
errors.append("Offset " + i + " or maybe " + (i-numNewlines) + ": "
|
||||
+ "Found a Sanskrit virama, \\, but the converter currently doesn't treat these properly. Sorry! Please do complain to the maintainers.\n");
|
||||
} else {
|
||||
al.add(new ACIPString("Found an illegal character, " + ch + ", with ordinal " + (int)ch + ".",
|
||||
ACIPString.ERROR));
|
||||
al.add(new TString("Found an illegal character, " + ch + ", with ordinal " + (int)ch + ".",
|
||||
TString.ERROR));
|
||||
if (null != errors)
|
||||
errors.append("Offset " + i + " or maybe " + (i-numNewlines) + ": "
|
||||
+ "Found an illegal character, " + ch + ", with ordinal " + (int)ch + ".\n");
|
||||
}
|
||||
if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
|
||||
startOfString = i+1;
|
||||
currentType = ACIPString.ERROR;
|
||||
currentType = TString.ERROR;
|
||||
} else {
|
||||
// Continue through the loop.
|
||||
if (ACIPString.ERROR == currentType)
|
||||
currentType = ACIPString.TIBETAN_NON_PUNCTUATION;
|
||||
if (TString.ERROR == currentType)
|
||||
currentType = TString.TIBETAN_NON_PUNCTUATION;
|
||||
}
|
||||
break; // end default case
|
||||
}
|
||||
}
|
||||
if (startOfString < sl) {
|
||||
al.add(new ACIPString(s.substring(startOfString, sl),
|
||||
currentType));
|
||||
al.add(new TString(s.substring(startOfString, sl),
|
||||
currentType));
|
||||
}
|
||||
if (waitingForMatchingIllegalClose) {
|
||||
al.add(new ACIPString("UNEXPECTED END OF INPUT",
|
||||
ACIPString.ERROR));
|
||||
al.add(new TString("UNEXPECTED END OF INPUT",
|
||||
TString.ERROR));
|
||||
if (null != errors) {
|
||||
errors.append("Offset END: "
|
||||
+ "Truly unmatched open bracket found.\n");
|
||||
|
@ -891,25 +891,25 @@ public class ACIPTshegBarScanner {
|
|||
if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
|
||||
}
|
||||
if (!bracketTypeStack.empty()) {
|
||||
al.add(new ACIPString("Unmatched open bracket found. A " + ((ACIPString.COMMENT == currentType) ? "comment" : "correction") + " does not terminate.",
|
||||
ACIPString.ERROR));
|
||||
al.add(new TString("Unmatched open bracket found. A " + ((TString.COMMENT == currentType) ? "comment" : "correction") + " does not terminate.",
|
||||
TString.ERROR));
|
||||
if (null != errors) {
|
||||
errors.append("Offset END: "
|
||||
+ "Unmatched open bracket found. A " + ((ACIPString.COMMENT == currentType) ? "comment" : "correction") + " does not terminate.\n");
|
||||
+ "Unmatched open bracket found. A " + ((TString.COMMENT == currentType) ? "comment" : "correction") + " does not terminate.\n");
|
||||
}
|
||||
if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
|
||||
}
|
||||
if (startSlashIndex >= 0) {
|
||||
al.add(new ACIPString("Slashes are supposed to occur in pairs, but the input had an unmatched '/' character.",
|
||||
ACIPString.ERROR));
|
||||
al.add(new TString("Slashes are supposed to occur in pairs, but the input had an unmatched '/' character.",
|
||||
TString.ERROR));
|
||||
if (null != errors)
|
||||
errors.append("Offset END: "
|
||||
+ "Slashes are supposed to occur in pairs, but the input had an unmatched '/' character.\n");
|
||||
if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
|
||||
}
|
||||
if (startParenIndex >= 0) {
|
||||
al.add(new ACIPString("Parentheses are supposed to occur in pairs, but the input had an unmatched parenthesis.",
|
||||
ACIPString.ERROR));
|
||||
al.add(new TString("Parentheses are supposed to occur in pairs, but the input had an unmatched parenthesis.",
|
||||
TString.ERROR));
|
||||
if (null != errors)
|
||||
errors.append("Offset END: "
|
||||
+ "Unmatched open parenthesis, (, found.\n");
|
||||
|
|
|
@ -19,18 +19,18 @@ Contributor(s): ______________________________________.
|
|||
package org.thdl.tib.text.ttt;
|
||||
|
||||
/**
|
||||
* An ACIPString is some Latin text and a type, the type stating
|
||||
* whether said text is Latin (usually English) or transliteration of
|
||||
* Tibetan and which particular kind. Scanning errors are also encoded
|
||||
* as ACIPStrings using a special type.
|
||||
* An TString is some Latin text and a type, the type stating whether
|
||||
* said text is Latin (usually English) or transliteration of Tibetan,
|
||||
* which transliteration system (ACIP or EWTS), and which particular
|
||||
* kind. Scanning errors are also encoded as TStrings using a special
|
||||
* type.
|
||||
*
|
||||
* @author David Chandler
|
||||
*/
|
||||
public class ACIPString {
|
||||
* @author David Chandler */
|
||||
public class TString {
|
||||
private int type;
|
||||
private String text;
|
||||
|
||||
/** Returns true if and only if an ACIPString with type type is to
|
||||
/** Returns true if and only if an TString with type type is to
|
||||
* be converted to Latin, not Tibetan, text. */
|
||||
public static boolean isLatin(int type) {
|
||||
return (type != TIBETAN_NON_PUNCTUATION
|
||||
|
@ -42,45 +42,45 @@ public class ACIPString {
|
|||
&& type != END_SLASH);
|
||||
}
|
||||
|
||||
/** For [#COMMENTS] */
|
||||
/** For ACIP [#COMMENTS] and EWTS (DLC FIXME) */
|
||||
public static final int COMMENT = 0;
|
||||
/** For Folio markers like @012B */
|
||||
/** For Folio markers like @012B in ACIP */
|
||||
public static final int FOLIO_MARKER = 1;
|
||||
/** For Latin letters and numbers etc. [*LINE BREAK?] uses this,
|
||||
* for example. */
|
||||
* for example. Or in EWTS, \f uses this. */
|
||||
public static final int LATIN = 2;
|
||||
/** For Tibetan letters and numbers etc. */
|
||||
public static final int TIBETAN_NON_PUNCTUATION = 3;
|
||||
/** For tshegs, whitespace and the like, but not combining
|
||||
* punctutation like %, o, :, m, and x */
|
||||
* punctutation like ACIP %, o, :, m, and x */
|
||||
public static final int TIBETAN_PUNCTUATION = 4;
|
||||
/** For the start of a [*probable correction] or [*possible correction?] */
|
||||
/** For the start of a [*probable correction] or [*possible correction?] in ACIP */
|
||||
public static final int CORRECTION_START = 5;
|
||||
/** Denotes the end of a [*probable correction] */
|
||||
/** Denotes the end of a [*probable correction] in ACIP */
|
||||
public static final int PROBABLE_CORRECTION = 6;
|
||||
/** Denotes the end of a [*possible correction?] */
|
||||
/** Denotes the end of a [*possible correction?] in ACIP*/
|
||||
public static final int POSSIBLE_CORRECTION = 7;
|
||||
/** For [BP] -- blank page */
|
||||
/** For [BP] -- blank page in ACIP*/
|
||||
public static final int BP = 8;
|
||||
/** For [LS] -- Lanycha script on page */
|
||||
/** For [LS] -- Lanycha script on page in ACIP*/
|
||||
public static final int LS = 9;
|
||||
/** For [DR] -- picture (without caption) on page */
|
||||
/** For [DR] -- picture (without caption) on page in ACIP*/
|
||||
public static final int DR = 10;
|
||||
/** For [DD], [DDD], [DD1], [DD2], et cetera -- picture with caption on page */
|
||||
/** For [DD], [DDD], [DD1], [DD2], et cetera -- picture with caption on page in ACIP */
|
||||
public static final int DD = 11;
|
||||
/** For [?] */
|
||||
/** For [?] in ACIP */
|
||||
public static final int QUESTION = 12;
|
||||
/** For the first / in /NYA/ */
|
||||
/** For the first / in /NYA/ in ACIP */
|
||||
public static final int START_SLASH = 13;
|
||||
/** For the last / in /NYA/ */
|
||||
/** For the last / in /NYA/ in ACIP */
|
||||
public static final int END_SLASH = 14;
|
||||
/** For the opening ( in (NYA) */
|
||||
/** For the opening ( in (NYA) in ACIP */
|
||||
public static final int START_PAREN = 15;
|
||||
/** For the closing ) in (NYA) */
|
||||
/** For the closing ) in (NYA) in ACIP */
|
||||
public static final int END_PAREN = 16;
|
||||
/** For things that may not be legal syntax, such as {KA . KHA} */
|
||||
public static final int WARNING = 17;
|
||||
/** For ACIP %, o, and x */
|
||||
/** For ACIP %, o, and x or EWTS (DLC FIXME) */
|
||||
public static final int TSHEG_BAR_ADORNMENT = 18;
|
||||
/** For things that are not legal syntax, such as a file that
|
||||
* contains just "[# HALF A COMMEN" */
|
||||
|
@ -112,11 +112,11 @@ public class ACIPString {
|
|||
}
|
||||
|
||||
/** Don't instantiate me. */
|
||||
private ACIPString() { }
|
||||
private TString() { }
|
||||
|
||||
/** Creates a new ACIPString with source text <i>text</i> and type
|
||||
/** Creates a new TString with source text <i>text</i> and type
|
||||
* <i>type</i> being a characterization like {@link #DD}. */
|
||||
public ACIPString(String text, int type) {
|
||||
public TString(String text, int type) {
|
||||
setType(type);
|
||||
setText(text);
|
||||
}
|
Loading…
Reference in a new issue