I've improved the ACIP tsheg bar scanner to handle a lot of illegal
constructions that occur in practice.
This commit is contained in:
parent
2a57439516
commit
0b91ed0beb
1 changed files with 266 additions and 35 deletions
|
@ -33,6 +33,41 @@ import org.thdl.util.ThdlDebug;
|
|||
* @author David Chandler
|
||||
*/
|
||||
public class ACIPTshegBarScanner {
|
||||
// DLC DOC
|
||||
public static void main(String[] args) throws IOException {
|
||||
if (args.length != 1) {
|
||||
System.out.println("Bad args! Need just the ACIP file's path.");
|
||||
System.exit(1);
|
||||
}
|
||||
StringBuffer errors = new StringBuffer();
|
||||
ArrayList al = scanFile(args[0], errors);
|
||||
|
||||
if (errors.length() > 0) {
|
||||
System.out.println("Errors scanning ACIP input file: ");
|
||||
System.out.println(errors);
|
||||
System.out.println("Exiting; please fix input file and try again.");
|
||||
System.exit(1);
|
||||
}
|
||||
|
||||
System.out.println("Good scan!");
|
||||
System.exit(0);
|
||||
}
|
||||
|
||||
// DLC DOC
|
||||
// DLC FIXME: not so efficient; copies the whole file into memory first
|
||||
public static ArrayList scanFile(String fname, StringBuffer errors) throws IOException {
|
||||
StringBuffer s = new StringBuffer();
|
||||
char ch[] = new char[8192];
|
||||
BufferedReader in
|
||||
= new BufferedReader(new InputStreamReader(new FileInputStream(fname))); // DLC FIXME: specify encoding.
|
||||
|
||||
int amt;
|
||||
while (-1 != (amt = in.read(ch))) {
|
||||
s.append(ch, 0, amt);
|
||||
}
|
||||
return scan(s.toString(), errors);
|
||||
}
|
||||
|
||||
/** Returns a list of {@link ACIPString ACIPStrings} corresponding
|
||||
* to s, possibly the empty list (when the empty string is the
|
||||
* input). Each String is either a Latin comment, some Latin
|
||||
|
@ -41,9 +76,13 @@ public class ACIPTshegBarScanner {
|
|||
*
|
||||
* <p>This not only scans; it finds all the errors a parser would
|
||||
* too, like "NYA x" and "(" and ")" and "/NYA" etc. It puts
|
||||
* those in as ACIPStrings with type {@link ACIPString#ERROR}.
|
||||
* those in as ACIPStrings with type {@link ACIPString#ERROR},
|
||||
* and also, if errors is non-null, appends helpful messages to
|
||||
* errors, each followed by a '\n'. There is at least one case
|
||||
* where no ERROR ACIPString will appear but errors will be
|
||||
* modified.
|
||||
*/
|
||||
public static ArrayList scan(String s) {
|
||||
public static ArrayList scan(String s, StringBuffer errors) {
|
||||
|
||||
// the size depends on whether it's mostly Tibetan or mostly
|
||||
// Latin and a number of other factors. This is meant to be
|
||||
|
@ -60,9 +99,18 @@ public class ACIPTshegBarScanner {
|
|||
if (i < startOfString) throw new Error("bad reset");
|
||||
char ch;
|
||||
ch = s.charAt(i);
|
||||
if (ACIPString.COMMENT == currentType && ch != ']')
|
||||
if (ACIPString.COMMENT == currentType && ch != ']') {
|
||||
if ('[' == ch) {
|
||||
al.add(new ACIPString("Found an open square bracket, [, within a [#COMMENT]-style comment. Square brackets may not appear in comments.\n",
|
||||
ACIPString.ERROR));
|
||||
if (null != errors)
|
||||
errors.append("Offset " + i + ": "
|
||||
+ "Found an open square bracket, [, within a [#COMMENT]-style comment. Square brackets may not appear in comments.\n");
|
||||
}
|
||||
continue;
|
||||
}
|
||||
switch (ch) {
|
||||
case '}':
|
||||
case ']':
|
||||
if (bracketTypeStack.empty()) {
|
||||
// Error.
|
||||
|
@ -71,6 +119,9 @@ public class ACIPTshegBarScanner {
|
|||
currentType));
|
||||
}
|
||||
al.add(new ACIPString(s.substring(i, i+1), ACIPString.ERROR));
|
||||
if (null != errors)
|
||||
errors.append("Offset " + i + ": "
|
||||
+ "Found a closing square bracket, ], without a matching open square bracket, [. Perhaps a [#COMMENT] incorrectly written as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], caused this.\n");
|
||||
startOfString = i+1;
|
||||
currentType = ACIPString.ERROR;
|
||||
} else {
|
||||
|
@ -89,8 +140,11 @@ public class ACIPTshegBarScanner {
|
|||
startOfString = i+1;
|
||||
currentType = ACIPString.ERROR;
|
||||
}
|
||||
break;
|
||||
break; // end ']','}' case
|
||||
|
||||
case '{': // NOTE WELL: KX0016I.ACT, KD0095M.ACT, and a
|
||||
// host of other ACIP files use {} brackets like
|
||||
// [] brackets. I treat both the same.
|
||||
case '[':
|
||||
// This definitely indicates a new token.
|
||||
if (startOfString < i) {
|
||||
|
@ -102,37 +156,107 @@ public class ACIPTshegBarScanner {
|
|||
String thingy = null;
|
||||
|
||||
if (i + "[DD]".length() <= sl
|
||||
&& s.substring(i, i + "[DD]".length()).equals("[DD]")) {
|
||||
&& (s.substring(i, i + "[DD]".length()).equals("[DD]")
|
||||
|| s.substring(i, i + "[DD]".length()).equals("{DD}"))) {
|
||||
thingy = "[DD]";
|
||||
currentType = ACIPString.DD;
|
||||
} else if (i + "[DD1]".length() <= sl
|
||||
&& s.substring(i, i + "[DD1]".length()).equals("[DD1]")) {
|
||||
&& (s.substring(i, i + "[DD1]".length()).equals("[DD1]")
|
||||
|| s.substring(i, i + "[DD1]".length()).equals("{DD1}"))) {
|
||||
thingy = "[DD1]";
|
||||
currentType = ACIPString.DD;
|
||||
} else if (i + "[DD2]".length() <= sl
|
||||
&& s.substring(i, i + "[DD2]".length()).equals("[DD2]")) {
|
||||
&& (s.substring(i, i + "[DD2]".length()).equals("[DD2]")
|
||||
|| s.substring(i, i + "[DD2]".length()).equals("{DD2}"))) {
|
||||
thingy = "[DD2]";
|
||||
currentType = ACIPString.DD;
|
||||
} else if (i + "[DDD]".length() <= sl
|
||||
&& s.substring(i, i + "[DDD]".length()).equals("[DDD]")) {
|
||||
&& (s.substring(i, i + "[DDD]".length()).equals("[DDD]")
|
||||
|| s.substring(i, i + "[DDD]".length()).equals("{DDD}"))) {
|
||||
thingy = "[DDD]";
|
||||
currentType = ACIPString.DD;
|
||||
} else if (i + "[DR]".length() <= sl
|
||||
&& s.substring(i, i + "[DR]".length()).equals("[DR]")) {
|
||||
&& (s.substring(i, i + "[DR]".length()).equals("[DR]")
|
||||
|| s.substring(i, i + "[DR]".length()).equals("{DR}"))) {
|
||||
thingy = "[DR]";
|
||||
currentType = ACIPString.DR;
|
||||
} else if (i + "[LS]".length() <= sl
|
||||
&& s.substring(i, i + "[LS]".length()).equals("[LS]")) {
|
||||
&& (s.substring(i, i + "[LS]".length()).equals("[LS]")
|
||||
|| s.substring(i, i + "[LS]".length()).equals("{LS}"))) {
|
||||
thingy = "[LS]";
|
||||
currentType = ACIPString.LS;
|
||||
} else if (i + "[BP]".length() <= sl
|
||||
&& s.substring(i, i + "[BP]".length()).equals("[BP]")) {
|
||||
&& (s.substring(i, i + "[BP]".length()).equals("[BP]")
|
||||
|| s.substring(i, i + "[BP]".length()).equals("{BP}"))) {
|
||||
thingy = "[BP]";
|
||||
currentType = ACIPString.BP;
|
||||
} else if (i + "[ BP ]".length() <= sl
|
||||
&& (s.substring(i, i + "[ BP ]".length()).equals("[ BP ]")
|
||||
|| s.substring(i, i + "[ BP ]".length()).equals("{ BP }"))) {
|
||||
thingy = "{ BP }"; // found in TD3790E2.ACT
|
||||
currentType = ACIPString.BP;
|
||||
} else if (i + "[ DD ]".length() <= sl
|
||||
&& (s.substring(i, i + "[ DD ]".length()).equals("[ DD ]")
|
||||
|| s.substring(i, i + "[ DD ]".length()).equals("{ DD }"))) {
|
||||
thingy = "{ DD }"; // found in TD3790E2.ACT
|
||||
currentType = ACIPString.DD;
|
||||
} else if (i + "[?]".length() <= sl
|
||||
&& s.substring(i, i + "[?]".length()).equals("[?]")) {
|
||||
&& (s.substring(i, i + "[?]".length()).equals("[?]")
|
||||
|| s.substring(i, i + "[?]".length()).equals("{?}"))) {
|
||||
thingy = "[?]";
|
||||
currentType = ACIPString.QUESTION;
|
||||
} else {
|
||||
// We see comments appear not as [#COMMENT], but
|
||||
// as [COMMENT] sometimes. We make special cases
|
||||
// for some English comments. DLC FIXME: put
|
||||
// these in a config file.
|
||||
|
||||
String[] englishComments = new String[] {
|
||||
"FIRST", "SECOND", // S5274I.ACT
|
||||
"Additional verses added by Khen Rinpoche here are", // S0216M.ACT
|
||||
"ADDENDUM: The text of", // S0216M.ACT
|
||||
"END OF ADDENDUM", // S0216M.ACT
|
||||
"Some of the verses added here by Khen Rinpoche include:", // S0216M.ACT
|
||||
"Note that, in the second verse, the {YUL LJONG} was orignally {GANG LJONG},\nand is now recited this way since the ceremony is not only taking place in Tibet.", // S0216M.ACT
|
||||
"Note that, in the second verse, the {YUL LJONG} was orignally {GANG LJONG},\r\nand is now recited this way since the ceremony is not only taking place in Tibet.", // S0216M.ACT
|
||||
"text missing", // S6954E1.ACT
|
||||
"INCOMPLETE", // TD3817I.INC
|
||||
"MISSING PAGE", // S0935m.act
|
||||
"MISSING FOLIO", // S0975I.INC
|
||||
"UNCLEAR LINE", // S0839D1I.INC
|
||||
"THE FOLLOWING TEXT HAS INCOMPLETE SECTIONS, WHICH ARE ON ORDER", // SE6260A.INC
|
||||
"@DATA INCOMPLETE HERE", // SE6260A.INC
|
||||
"@DATA MISSING HERE", // SE6260A.INC
|
||||
"DATA INCOMPLETE HERE", // TD4226I2.INC
|
||||
"DATA MISSING HERE", // just being consistent
|
||||
"FOLLOWING SECTION WAS NOT AVAILABLE WHEN THIS EDITION WAS\nPRINTED, AND IS SUPPLIED FROM ANOTHER, PROBABLY THE ORIGINAL:", // S0018N.ACT
|
||||
"FOLLOWING SECTION WAS NOT AVAILABLE WHEN THIS EDITION WAS\r\nPRINTED, AND IS SUPPLIED FROM ANOTHER, PROBABLY THE ORIGINAL:", // S0018N.ACT
|
||||
"THESE PAGE NUMBERS RESERVED IN THIS EDITION FOR PAGES\nMISSING FROM ORIGINAL ON WHICH IT WAS BASED", // S0018N.ACT
|
||||
"THESE PAGE NUMBERS RESERVED IN THIS EDITION FOR PAGES\r\nMISSING FROM ORIGINAL ON WHICH IT WAS BASED", // S0018N.ACT
|
||||
"PAGE NUMBERS RESERVED FROM THIS EDITION FOR MISSING\nSECTION SUPPLIED BY PRECEDING", // S0018N.ACT
|
||||
"PAGE NUMBERS RESERVED FROM THIS EDITION FOR MISSING\r\nSECTION SUPPLIED BY PRECEDING", // S0018N.ACT
|
||||
"SW: OK", // S0057M.ACT
|
||||
"m:ok", // S0057M.ACT
|
||||
"A FIRST ONE\nMISSING HERE?", // S0057M.ACT
|
||||
"A FIRST ONE\r\nMISSING HERE?", // S0057M.ACT
|
||||
"THE INITIAL PART OF THIS TEXT WAS INPUT BY THE SERA MEY LIBRARY IN\nTIBETAN FONT AND NEEDS TO BE REDONE BY DOUBLE INPUT", // S0195A1.INC
|
||||
"THE INITIAL PART OF THIS TEXT WAS INPUT BY THE SERA MEY LIBRARY IN\r\nTIBETAN FONT AND NEEDS TO BE REDONE BY DOUBLE INPUT", // S0195A1.INC
|
||||
};
|
||||
boolean foundOne = false;
|
||||
for (int ec = 0; ec < englishComments.length; ec++) {
|
||||
if (i + 2 + englishComments[ec].length() <= sl
|
||||
&& (s.substring(i, i + 2 + englishComments[ec].length()).equals("[" + englishComments[ec] + "]")
|
||||
|| s.substring(i, i + 2 + englishComments[ec].length()).equals("[" + englishComments[ec] + "]"))) {
|
||||
al.add(new ACIPString("[#" + englishComments[ec] + "]",
|
||||
ACIPString.COMMENT));
|
||||
startOfString = i + 2 + englishComments[ec].length();
|
||||
i = startOfString - 1;
|
||||
foundOne = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (foundOne)
|
||||
break;
|
||||
}
|
||||
if (null != thingy) {
|
||||
al.add(new ACIPString(thingy,
|
||||
|
@ -157,10 +281,18 @@ public class ACIPTshegBarScanner {
|
|||
// "... [" could cause this too.
|
||||
al.add(new ACIPString(s.substring(i, i+1),
|
||||
ACIPString.ERROR));
|
||||
if (null != errors) {
|
||||
String inContext = s.substring(i, i+Math.min(sl-i, 10));
|
||||
if (sl-i > 10) {
|
||||
inContext = inContext + "...";
|
||||
}
|
||||
errors.append("Offset " + i + ": "
|
||||
+ "Found an illegal open square bracket, [ (in context, this is " + inContext + "). Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open square bracket?\n");
|
||||
}
|
||||
startOfString = i + 1;
|
||||
currentType = ACIPString.ERROR;
|
||||
}
|
||||
break; // end '[' case
|
||||
break; // end '[','{' case
|
||||
|
||||
case '@':
|
||||
// This definitely indicates a new token.
|
||||
|
@ -185,8 +317,31 @@ public class ACIPTshegBarScanner {
|
|||
}
|
||||
}
|
||||
if (allAreNumeric) {
|
||||
al.add(new ACIPString(s.substring(i, i+numdigits+2), ACIPString.FOLIO_MARKER));
|
||||
al.add(new ACIPString(s.substring(i, i+numdigits+2),
|
||||
ACIPString.FOLIO_MARKER));
|
||||
startOfString = i+numdigits+2;
|
||||
i = startOfString - 1;
|
||||
currentType = ACIPString.ERROR;
|
||||
break;
|
||||
}
|
||||
}
|
||||
// System.out.println("DLC NOW HERE xxx y:" + (i+numdigits+3 < sl) + " z:" + s.charAt(i+1) + s.charAt(i+numdigits+2) + s.charAt(i+numdigits+3));
|
||||
|
||||
if (i+numdigits+3 < sl
|
||||
&& s.charAt(i+1) == '[' && s.charAt(i+numdigits+3) == ']'
|
||||
&& (s.charAt(i+numdigits+2) == 'A' || s.charAt(i+numdigits+2) == 'B')) {
|
||||
boolean allAreNumeric = true;
|
||||
for (int k = 1; k <= numdigits; k++) {
|
||||
if (!isNumeric(s.charAt(i+1+k))) {
|
||||
allAreNumeric = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (allAreNumeric) {
|
||||
al.add(new ACIPString(s.substring(i, i+numdigits+4),
|
||||
ACIPString.FOLIO_MARKER));
|
||||
startOfString = i+numdigits+4;
|
||||
i = startOfString - 1;
|
||||
currentType = ACIPString.ERROR;
|
||||
break;
|
||||
}
|
||||
|
@ -194,6 +349,9 @@ public class ACIPTshegBarScanner {
|
|||
}
|
||||
if (startOfString == i) {
|
||||
al.add(new ACIPString(s.substring(i, i+1), ACIPString.ERROR));
|
||||
if (null != errors)
|
||||
errors.append("Offset " + i + ": "
|
||||
+ "Found an illegal at sign, @. @012B is an example of a legal folio marker.\n");
|
||||
startOfString = i+1;
|
||||
currentType = ACIPString.ERROR;
|
||||
}
|
||||
|
@ -209,13 +367,15 @@ public class ACIPTshegBarScanner {
|
|||
}
|
||||
|
||||
if (startSlashIndex >= 0) {
|
||||
al.add(new ACIPString(s.substring(i, i+1), ACIPString.END_SLASH));
|
||||
al.add(new ACIPString(s.substring(i, i+1),
|
||||
ACIPString.END_SLASH));
|
||||
startOfString = i+1;
|
||||
currentType = ACIPString.ERROR;
|
||||
startSlashIndex = -1;
|
||||
} else {
|
||||
startSlashIndex = i;
|
||||
al.add(new ACIPString(s.substring(i, i+1), ACIPString.START_SLASH));
|
||||
al.add(new ACIPString(s.substring(i, i+1),
|
||||
ACIPString.START_SLASH));
|
||||
startOfString = i+1;
|
||||
currentType = ACIPString.ERROR;
|
||||
}
|
||||
|
@ -234,26 +394,68 @@ public class ACIPTshegBarScanner {
|
|||
// DLC support nesting like (NYA (BA))?
|
||||
|
||||
if (startParenIndex >= 0) {
|
||||
if (ch == '(')
|
||||
if (ch == '(') {
|
||||
al.add(new ACIPString("Nesting of parentheses () is not allowed", ACIPString.ERROR));
|
||||
else {
|
||||
if (null != errors)
|
||||
errors.append("Offset " + i + ": "
|
||||
+ "Found an illegal open parenthesis, (. Nesting of parentheses is not allowed.\n");
|
||||
} else {
|
||||
al.add(new ACIPString(s.substring(i, i+1), ACIPString.END_PAREN));
|
||||
startParenIndex = -1;
|
||||
}
|
||||
startOfString = i+1;
|
||||
currentType = ACIPString.ERROR;
|
||||
} else {
|
||||
if (ch == ')')
|
||||
if (ch == ')') {
|
||||
al.add(new ACIPString("Unexpected closing parenthesis )", ACIPString.ERROR));
|
||||
else {
|
||||
if (null != errors)
|
||||
errors.append("Offset " + i + ": "
|
||||
+ "Unexpected closing parenthesis, ), found.\n");
|
||||
} else {
|
||||
startParenIndex = i;
|
||||
al.add(new ACIPString(s.substring(i, i+1), ACIPString.START_PAREN));
|
||||
}
|
||||
startOfString = i+1;
|
||||
currentType = ACIPString.ERROR;
|
||||
}
|
||||
break; // end '/' case
|
||||
break; // end '(',')' case
|
||||
|
||||
case '?':
|
||||
if (bracketTypeStack.empty()) {
|
||||
// The tsheg bar ends here; new token.
|
||||
if (startOfString < i) {
|
||||
al.add(new ACIPString(s.substring(startOfString, i),
|
||||
currentType));
|
||||
}
|
||||
al.add(new ACIPString(s.substring(i, i+1),
|
||||
ACIPString.QUESTION));
|
||||
startOfString = i+1;
|
||||
currentType = ACIPString.ERROR;
|
||||
} // else this is [*TR'A ?] or the like.
|
||||
break; // end '?' case
|
||||
|
||||
|
||||
case '.':
|
||||
// This definitely indicates a new token.
|
||||
if (startOfString < i) {
|
||||
al.add(new ACIPString(s.substring(startOfString, i),
|
||||
currentType));
|
||||
startOfString = i;
|
||||
currentType = ACIPString.ERROR;
|
||||
}
|
||||
// . is used for a non-breaking tsheg, such as in {NGO.,} and {....,DAM}. We give an error unless , or . follows '.'.
|
||||
if (i + 1 < sl && (s.charAt(i+1) == '.' || s.charAt(i+1) == ',')) {
|
||||
al.add(new ACIPString(s.substring(i, i+1),
|
||||
ACIPString.TIBETAN_PUNCTUATION));
|
||||
} else {
|
||||
al.add(new ACIPString("A non-breaking tsheg, '.', appeared, but not like \"...,\" or \".,\".",
|
||||
ACIPString.ERROR));
|
||||
if (null != errors)
|
||||
errors.append("Offset " + i + ": "
|
||||
+ "A non-breaking tsheg, '.', appeared, but not like \"...,\" or \".,\".\n");
|
||||
|
||||
}
|
||||
break; // end '.' case
|
||||
|
||||
// Classic tsheg bar enders:
|
||||
case ' ':
|
||||
|
@ -277,6 +479,13 @@ public class ACIPTshegBarScanner {
|
|||
break; // end TIBETAN_PUNCTUATION case
|
||||
|
||||
default:
|
||||
if (!bracketTypeStack.empty()) {
|
||||
int stackTop = ((Integer)bracketTypeStack.peek()).intValue();
|
||||
if (ACIPString.CORRECTION_START == stackTop && '?' == ch) {
|
||||
// allow it through...
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!(isNumeric(ch) || isAlpha(ch))) {
|
||||
if (startOfString < i) {
|
||||
al.add(new ACIPString(s.substring(startOfString, i),
|
||||
|
@ -284,6 +493,9 @@ public class ACIPTshegBarScanner {
|
|||
}
|
||||
al.add(new ACIPString(s.substring(i, i+1),
|
||||
ACIPString.ERROR));
|
||||
if (null != errors)
|
||||
errors.append("Offset " + i + ": "
|
||||
+ "Found an illegal character, " + ch + "\n");
|
||||
startOfString = i+1;
|
||||
currentType = ACIPString.ERROR;
|
||||
} else {
|
||||
|
@ -297,22 +509,37 @@ public class ACIPTshegBarScanner {
|
|||
if (startOfString < sl) {
|
||||
al.add(new ACIPString(s.substring(startOfString, sl),
|
||||
currentType));
|
||||
if (!bracketTypeStack.empty()) {
|
||||
al.add(new ACIPString("UNEXPECTED END OF INPUT",
|
||||
ACIPString.ERROR));
|
||||
}
|
||||
if (startSlashIndex >= 0) {
|
||||
al.add(new ACIPString("Slashes are supposed to occur in pairs, but the input had an unmatched '/' character.",
|
||||
ACIPString.ERROR));
|
||||
}
|
||||
if (startParenIndex >= 0) {
|
||||
al.add(new ACIPString("Parentheses are supposed to occur in pairs, but the input had an unmatched parenthesis.",
|
||||
ACIPString.ERROR));
|
||||
}
|
||||
if (!bracketTypeStack.empty()) {
|
||||
al.add(new ACIPString("UNEXPECTED END OF INPUT",
|
||||
ACIPString.ERROR));
|
||||
if (null != errors) {
|
||||
if (ACIPString.COMMENT == currentType) {
|
||||
errors.append("Offset END: "
|
||||
+ "Unmatched open square bracket, [, found. A comment does not terminate.\n");
|
||||
} else {
|
||||
errors.append("Offset END: "
|
||||
+ "Unmatched open square bracket, [, found. A correction does not terminate.\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
if (startSlashIndex >= 0) {
|
||||
al.add(new ACIPString("Slashes are supposed to occur in pairs, but the input had an unmatched '/' character.",
|
||||
ACIPString.ERROR));
|
||||
if (null != errors)
|
||||
errors.append("Offset END: "
|
||||
+ "Slashes are supposed to occur in pairs, but the input had an unmatched '/' character.\n");
|
||||
}
|
||||
if (startParenIndex >= 0) {
|
||||
al.add(new ACIPString("Parentheses are supposed to occur in pairs, but the input had an unmatched parenthesis.",
|
||||
ACIPString.ERROR));
|
||||
if (null != errors)
|
||||
errors.append("Offset END: "
|
||||
+ "Unmatched open parenthesis, (, found.\n");
|
||||
}
|
||||
return al;
|
||||
}
|
||||
|
||||
|
||||
/** See implementation. */
|
||||
private static boolean isNumeric(char ch) {
|
||||
return ch >= '0' && ch <= '9';
|
||||
|
@ -320,12 +547,16 @@ public class ACIPTshegBarScanner {
|
|||
|
||||
/** See implementation. */
|
||||
private static boolean isAlpha(char ch) {
|
||||
return ch == '\''
|
||||
return ch == '\'' // 23rd consonant
|
||||
|
||||
// combining punctuation:
|
||||
// combining punctuation, vowels:
|
||||
|| ch == '%'
|
||||
|| ch == 'o'
|
||||
|| ch == 'x'
|
||||
|| ch == ':'
|
||||
|
||||
|| ch == '-'
|
||||
|| ch == '+'
|
||||
|
||||
|| (ch >= 'A' && ch <= 'Z')
|
||||
|| (ch >= 'a' && ch <= 'z');
|
||||
|
|
Loading…
Reference in a new issue