I'm now stricter about accepting alphabetic characters. F, Q, X, a,
b, c, d, e, ... do not belong in ACIP, so the scanner rejects them. This should make it even easier to distinguish automatically between Tibetan and English texts.
This commit is contained in:
parent
39451d8879
commit
245aac4911
2 changed files with 10 additions and 4 deletions
|
@ -845,6 +845,7 @@ public class ACIPTshegBarScanner {
|
|||
// combining punctuation, vowels:
|
||||
|| ch == '%'
|
||||
|| ch == 'o'
|
||||
|| ch == 'm'
|
||||
|| ch == 'x'
|
||||
|| ch == ':'
|
||||
|| ch == '^'
|
||||
|
@ -852,8 +853,13 @@ public class ACIPTshegBarScanner {
|
|||
|
||||
|| ch == '-'
|
||||
|| ch == '+'
|
||||
|
||||
|| (ch >= 'A' && ch <= 'Z')
|
||||
|| (ch >= 'a' && ch <= 'z');
|
||||
|| ((ch >= 'A' && ch <= 'Z') && ch != 'X' && ch != 'Q' && ch != 'F')
|
||||
|| ch == 'i'
|
||||
|| ch == 't'
|
||||
|| ch == 'h'
|
||||
|| ch == 'd'
|
||||
|| ch == 'n'
|
||||
|| ch == 's'
|
||||
|| ch == 'h';
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue