2002-09-30 03:10:00 +00:00
|
|
|
/*
|
|
|
|
The contents of this file are subject to the THDL Open Community License
|
|
|
|
Version 1.0 (the "License"); you may not use this file except in compliance
|
|
|
|
with the License. You may obtain a copy of the License on the THDL web site
|
|
|
|
(http://www.thdl.org/).
|
|
|
|
|
|
|
|
Software distributed under the License is distributed on an "AS IS" basis,
|
|
|
|
WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
|
|
|
|
License for the specific terms governing rights and limitations under the
|
|
|
|
License.
|
|
|
|
|
|
|
|
The Initial Developer of this software is the Tibetan and Himalayan Digital
|
2003-03-31 00:33:50 +00:00
|
|
|
Library (THDL). Portions created by the THDL are Copyright 2001-2003 THDL.
|
2002-09-30 03:10:00 +00:00
|
|
|
All Rights Reserved.
|
|
|
|
|
|
|
|
Contributor(s): ______________________________________.
|
|
|
|
*/
|
|
|
|
|
|
|
|
package org.thdl.tib.text;
|
|
|
|
|
|
|
|
import java.util.*;
|
|
|
|
import java.net.URL;
|
|
|
|
import java.io.*;
|
|
|
|
import java.lang.*;
|
|
|
|
import java.awt.Font;
|
|
|
|
import java.awt.event.KeyEvent;
|
|
|
|
import javax.swing.text.*;
|
|
|
|
import java.awt.font.*;
|
|
|
|
|
2002-10-28 03:17:28 +00:00
|
|
|
import org.thdl.util.ThdlDebug;
|
Fixes bug 624133, "Input freezes after impossible character". Try 'shsM' in
ACIP or 'ShSm' in Extended Wylie to see the new behavior.
We use a trie to store valid input sequences. In the future, we could use
the same trie as a replacement for the more inefficient HashSets we use to
store characters, vowels, and punctuation. For example, we'd use
'validInputSequences.put("K", new Pair("consonant", "k"))' when reading
in the ACIP keyboard's description of the first consonant of the Tibetan
alphabet in 'TibetanKeyboard.java'.
Note that the current trie implementation is only useful for 7- or 8-bit
transcription systems, and works best for tries with low average depth, which
describes a transcription system's trie very well. If you used arbitrary
Unicode in your keyboard, you'd need a different trie implementation.
Improved the optional keyboard input mode status messages.
2002-11-02 18:44:24 +00:00
|
|
|
import org.thdl.util.Trie;
|
2002-11-18 16:12:25 +00:00
|
|
|
import org.thdl.util.ThdlOptions;
|
2002-10-28 03:17:28 +00:00
|
|
|
|
2002-09-30 03:10:00 +00:00
|
|
|
/**
|
|
|
|
* Interfaces between Extended Wylie and the TibetanMachineWeb fonts.
|
2002-11-18 16:12:25 +00:00
|
|
|
* To do this this must first read the code table, which lives in
|
|
|
|
* "tibwn.ini", and which must be found in the same directory as this
|
|
|
|
* class. Note that WylieWord has its own copy of this file, so edit
|
|
|
|
* both or neither.
|
|
|
|
*
|
|
|
|
* <p>In addition, this class optionally loads the TibetanMachineWeb
|
|
|
|
* fonts manually via {@link #readInFontFiles()}.
|
2002-09-30 03:10:00 +00:00
|
|
|
* @author Edward Garrett, Tibetan and Himalayan Digital Library
|
|
|
|
* @version 1.0
|
|
|
|
*/
|
2003-03-30 21:49:55 +00:00
|
|
|
// FIXME: for speed, make either this class, its methods, or both, final?
|
2003-03-31 00:33:50 +00:00
|
|
|
public class TibetanMachineWeb implements THDLWylieConstants {
|
Fixes bug 624133, "Input freezes after impossible character". Try 'shsM' in
ACIP or 'ShSm' in Extended Wylie to see the new behavior.
We use a trie to store valid input sequences. In the future, we could use
the same trie as a replacement for the more inefficient HashSets we use to
store characters, vowels, and punctuation. For example, we'd use
'validInputSequences.put("K", new Pair("consonant", "k"))' when reading
in the ACIP keyboard's description of the first consonant of the Tibetan
alphabet in 'TibetanKeyboard.java'.
Note that the current trie implementation is only useful for 7- or 8-bit
transcription systems, and works best for tries with low average depth, which
describes a transcription system's trie very well. If you used arbitrary
Unicode in your keyboard, you'd need a different trie implementation.
Improved the optional keyboard input mode status messages.
2002-11-02 18:44:24 +00:00
|
|
|
/** This addresses bug 624133, "Input freezes after impossible
|
|
|
|
* character". The input sequences that are valid in Extended
|
|
|
|
* Wylie. For example, "Sh" will be in this container, but "S"
|
|
|
|
* will not be. */
|
|
|
|
private static Trie validInputSequences = new Trie();
|
|
|
|
|
|
|
|
/** needed because a Trie cannot have a null value associated with
|
|
|
|
* a key */
|
|
|
|
private final static String anyOldObjectWillDo
|
|
|
|
= "this placeholder is useful for debugging; we need a nonnull Object anyway";
|
|
|
|
|
2002-09-30 03:10:00 +00:00
|
|
|
private static boolean hasReadData = false;
|
|
|
|
private static TibetanKeyboard keyboard = null;
|
|
|
|
private static Set charSet = null;
|
|
|
|
private static Set vowelSet = null;
|
|
|
|
private static Set puncSet = null;
|
2003-03-30 02:31:16 +00:00
|
|
|
private static Set topSet = null;
|
2002-09-30 03:10:00 +00:00
|
|
|
private static Set leftSet = null;
|
|
|
|
private static Set rightSet = null;
|
|
|
|
private static Set farRightSet = null;
|
|
|
|
private static Map tibHash = new HashMap();
|
|
|
|
private static Map binduMap = new HashMap();
|
2003-01-05 05:57:44 +00:00
|
|
|
private static String[][] toHashKey = new String[11][95]; //note: toHashKey[0][..] is not used
|
2002-09-30 03:10:00 +00:00
|
|
|
private static DuffCode[][] TMtoTMW = new DuffCode[5][255-32];
|
|
|
|
private static String fileName = "tibwn.ini";
|
|
|
|
private static final String DELIMITER = "~";
|
|
|
|
private static Set top_vowels;
|
|
|
|
private static SimpleAttributeSet[] webFontAttributeSet = new SimpleAttributeSet[11];
|
|
|
|
private static boolean hasDisambiguatingKey; //to disambiguate gy and g.y=
|
|
|
|
private static char disambiguating_key;
|
|
|
|
private static boolean hasSanskritStackingKey; //for stacking Sanskrit
|
|
|
|
private static boolean hasTibetanStackingKey; //for stacking Tibetan
|
|
|
|
private static boolean isStackingMedial; //ie g+y, not +gy
|
|
|
|
private static char stacking_key;
|
|
|
|
private static boolean isAChenRequiredBeforeVowel;
|
|
|
|
private static boolean isAChungConsonant;
|
|
|
|
private static boolean hasAVowel;
|
|
|
|
private static String aVowel;
|
|
|
|
|
2003-01-05 05:57:44 +00:00
|
|
|
// We use .intern() explicitly here so the code is easier to
|
|
|
|
// understand, but all string literals are interned.
|
2002-09-30 03:10:00 +00:00
|
|
|
public static final String[] tmFontNames = {
|
|
|
|
null,
|
2003-01-05 05:57:44 +00:00
|
|
|
"TibetanMachine".intern(),
|
|
|
|
"TibetanMachineSkt1".intern(),
|
|
|
|
"TibetanMachineSkt2".intern(),
|
|
|
|
"TibetanMachineSkt3".intern(),
|
|
|
|
"TibetanMachineSkt4".intern()
|
2002-09-30 03:10:00 +00:00
|
|
|
};
|
|
|
|
public static final String[] tmwFontNames = {
|
|
|
|
null,
|
2003-01-05 05:57:44 +00:00
|
|
|
"TibetanMachineWeb".intern(),
|
|
|
|
"TibetanMachineWeb1".intern(),
|
|
|
|
"TibetanMachineWeb2".intern(),
|
|
|
|
"TibetanMachineWeb3".intern(),
|
|
|
|
"TibetanMachineWeb4".intern(),
|
|
|
|
"TibetanMachineWeb5".intern(),
|
|
|
|
"TibetanMachineWeb6".intern(),
|
|
|
|
"TibetanMachineWeb7".intern(),
|
|
|
|
"TibetanMachineWeb8".intern(),
|
|
|
|
"TibetanMachineWeb9".intern()
|
2002-09-30 03:10:00 +00:00
|
|
|
};
|
|
|
|
/**
|
|
|
|
* represents where in an array of DuffCodes you
|
|
|
|
* find the TibetanMachine equivalence of a glyph
|
|
|
|
*/
|
|
|
|
public static final int TM = 0;
|
|
|
|
/**
|
|
|
|
* represents where in an array of DuffCodes you
|
|
|
|
* find the reduced character equivalent of a TMW glyph
|
|
|
|
*/
|
|
|
|
public static final int REDUCED_C = 1;
|
|
|
|
/**
|
|
|
|
* represents where in an array of DuffCodes you
|
|
|
|
* find the TibetanMachineWeb glyph
|
|
|
|
*/
|
|
|
|
public static final int TMW = 2;
|
|
|
|
/**
|
|
|
|
* represents where in an array of DuffCodes you
|
|
|
|
* find the gigu value for a given glyph
|
|
|
|
*/
|
|
|
|
public static final int VOWEL_i = 3;
|
|
|
|
/**
|
|
|
|
* represents where in an array of DuffCodes you
|
|
|
|
* find the zhebju value for a given glyph
|
|
|
|
*/
|
|
|
|
public static final int VOWEL_u = 4;
|
|
|
|
/**
|
|
|
|
* represents where in an array of DuffCodes you
|
|
|
|
* find the drengbu value for a given glyph
|
|
|
|
*/
|
|
|
|
public static final int VOWEL_e = 5;
|
|
|
|
/**
|
|
|
|
* represents where in an array of DuffCodes you
|
|
|
|
* find the naro value for a given glyph
|
|
|
|
*/
|
|
|
|
public static final int VOWEL_o = 6;
|
|
|
|
/**
|
|
|
|
* represents where in an array of DuffCodes you
|
|
|
|
* find the achung value for a given glyph
|
|
|
|
*/
|
|
|
|
public static final int VOWEL_A = 7;
|
|
|
|
/**
|
|
|
|
* represents where in an array of DuffCodes you
|
|
|
|
* find the achung + zhebju value for a given glyph
|
|
|
|
*/
|
|
|
|
public static final int VOWEL_U = 8;
|
|
|
|
/**
|
|
|
|
* represents where in an array of DuffCodes you
|
|
|
|
* find the Unicode equivalence of a given glyph
|
|
|
|
*/
|
|
|
|
public static final int UNICODE = 9;
|
|
|
|
/**
|
|
|
|
* represents where in an array of DuffCodes you
|
|
|
|
* find the half height equivalence of a given glyph
|
|
|
|
*/
|
|
|
|
public static final int HALF_C = 10;
|
|
|
|
|
2003-03-30 02:31:16 +00:00
|
|
|
/** head letters, superscribed letters */
|
|
|
|
private static final String tops = "r,s,l";
|
|
|
|
/** prefixes */
|
2002-09-30 03:10:00 +00:00
|
|
|
private static final String lefts = "g,d,b,m,'";
|
2003-03-30 02:31:16 +00:00
|
|
|
/** suffixes */
|
2002-09-30 03:10:00 +00:00
|
|
|
private static final String rights = "g,ng,d,n,b,m,r,l,s,',T";
|
2003-03-30 16:13:00 +00:00
|
|
|
/** postsuffixes. nga was here in the past, according to Edward,
|
|
|
|
* to handle cases like ya'ng. pa'am wasn't considered, but had
|
|
|
|
* it been, ma probably would've gone here too. We now handle
|
|
|
|
* 'am, 'ang, etc. specially, so now this set is now just the
|
|
|
|
* postsuffixes.
|
|
|
|
*/
|
|
|
|
private static final String farrights = "d,s";
|
2002-09-30 03:10:00 +00:00
|
|
|
|
|
|
|
static {
|
|
|
|
readData();
|
|
|
|
|
2003-04-13 01:17:10 +00:00
|
|
|
/* Initialize to Extended Wylie keyboard. The preferences
|
|
|
|
* mechanism will switch this to the preferred keyboard. */
|
|
|
|
setKeyboard(keyboard);
|
2002-09-30 03:10:00 +00:00
|
|
|
}
|
|
|
|
|
2002-11-18 16:12:25 +00:00
|
|
|
/** Assumes that the TMW font files are resources associated with
|
|
|
|
* this class and loads those font files.
|
|
|
|
* @throws Error if that assumption does not hold */
|
|
|
|
private static void readInFontFiles() {
|
|
|
|
/* Note the leading slashes on these paths: */
|
|
|
|
readInFontFile("/Fonts/TibetanMachineWeb/timwn.ttf");
|
|
|
|
readInFontFile("/Fonts/TibetanMachineWeb/timwn1.ttf");
|
|
|
|
readInFontFile("/Fonts/TibetanMachineWeb/timwn2.ttf");
|
|
|
|
readInFontFile("/Fonts/TibetanMachineWeb/timwn3.ttf");
|
|
|
|
readInFontFile("/Fonts/TibetanMachineWeb/timwn4.ttf");
|
|
|
|
readInFontFile("/Fonts/TibetanMachineWeb/timwn5.ttf");
|
|
|
|
readInFontFile("/Fonts/TibetanMachineWeb/timwn6.ttf");
|
|
|
|
readInFontFile("/Fonts/TibetanMachineWeb/timwn7.ttf");
|
|
|
|
readInFontFile("/Fonts/TibetanMachineWeb/timwn8.ttf");
|
|
|
|
readInFontFile("/Fonts/TibetanMachineWeb/timwn9.ttf");
|
|
|
|
}
|
|
|
|
|
|
|
|
/** Assumes that the TMW font file at the given path is a resource
|
|
|
|
* associated with this class and loads that font file.
|
|
|
|
* @param path a path within the JAR containing this class file
|
|
|
|
* @throws Error if that assumption does not hold */
|
|
|
|
private static void readInFontFile(String path) {
|
2003-01-05 05:57:44 +00:00
|
|
|
|
|
|
|
// Note that the TM and TMW fonts do not have hanging
|
|
|
|
// baselines. They have Roman baselines. Tony Duff said this
|
|
|
|
// is subtly necessary and that only an OpenType font can
|
|
|
|
// support baselines properly.
|
|
|
|
|
2002-11-18 16:12:25 +00:00
|
|
|
try {
|
|
|
|
InputStream is = TibetanMachineWeb.class.getResourceAsStream(path);
|
|
|
|
if (null == is) {
|
|
|
|
throw new Error("You selected the optional behavior of loading the TibetanMachineWeb font family manually, but the resource "
|
|
|
|
+ path + " could not be found.");
|
|
|
|
}
|
|
|
|
Font.createFont(Font.TRUETYPE_FONT, is);
|
|
|
|
} catch( Exception e ) {
|
|
|
|
e.printStackTrace();
|
|
|
|
ThdlDebug.noteIffyCode();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2002-10-13 19:13:59 +00:00
|
|
|
/**
|
2002-09-30 03:10:00 +00:00
|
|
|
* This method reads the data file ("tibwn.ini"), constructs
|
|
|
|
* the character, punctuation, and vowel lists, as well as
|
|
|
|
* performing other acts of initialization.
|
|
|
|
*/
|
|
|
|
private static void readData() {
|
2002-11-18 16:12:25 +00:00
|
|
|
if (!ThdlOptions.getBooleanOption("thdl.rely.on.system.tmw.fonts")) {
|
|
|
|
readInFontFiles();
|
|
|
|
}
|
|
|
|
|
2002-09-30 03:10:00 +00:00
|
|
|
webFontAttributeSet[0] = null;
|
|
|
|
for (int i=1; i<webFontAttributeSet.length; i++) {
|
|
|
|
webFontAttributeSet[i] = new SimpleAttributeSet();
|
|
|
|
StyleConstants.setFontFamily(webFontAttributeSet[i],tmwFontNames[i]);
|
|
|
|
}
|
|
|
|
|
|
|
|
StringTokenizer sTok;
|
2003-03-30 02:31:16 +00:00
|
|
|
topSet = new HashSet();
|
2002-09-30 03:10:00 +00:00
|
|
|
leftSet = new HashSet();
|
|
|
|
rightSet = new HashSet();
|
|
|
|
farRightSet = new HashSet();
|
|
|
|
|
2003-03-30 02:31:16 +00:00
|
|
|
sTok = new StringTokenizer(tops, ",");
|
|
|
|
while (sTok.hasMoreTokens())
|
|
|
|
topSet.add(sTok.nextToken());
|
|
|
|
|
2002-09-30 03:10:00 +00:00
|
|
|
sTok = new StringTokenizer(lefts, ",");
|
|
|
|
while (sTok.hasMoreTokens())
|
|
|
|
leftSet.add(sTok.nextToken());
|
|
|
|
|
|
|
|
sTok = new StringTokenizer(rights, ",");
|
|
|
|
while (sTok.hasMoreTokens())
|
|
|
|
rightSet.add(sTok.nextToken());
|
|
|
|
|
|
|
|
sTok = new StringTokenizer(farrights, ",");
|
|
|
|
while (sTok.hasMoreTokens())
|
|
|
|
farRightSet.add(sTok.nextToken());
|
|
|
|
|
|
|
|
top_vowels = new HashSet();
|
|
|
|
top_vowels.add(TibetanMachineWeb.i_VOWEL);
|
|
|
|
top_vowels.add(TibetanMachineWeb.e_VOWEL);
|
|
|
|
top_vowels.add(TibetanMachineWeb.o_VOWEL);
|
|
|
|
top_vowels.add(TibetanMachineWeb.ai_VOWEL);
|
|
|
|
top_vowels.add(TibetanMachineWeb.au_VOWEL);
|
|
|
|
top_vowels.add(TibetanMachineWeb.reverse_i_VOWEL);
|
|
|
|
|
|
|
|
try {
|
|
|
|
URL url = TibetanMachineWeb.class.getResource(fileName);
|
2002-10-04 04:37:32 +00:00
|
|
|
if (url == null) {
|
|
|
|
System.err.println("Cannot find " + fileName + "; aborting.");
|
|
|
|
System.exit(1);
|
|
|
|
}
|
2002-09-30 03:10:00 +00:00
|
|
|
InputStreamReader isr = new InputStreamReader(url.openStream());
|
|
|
|
BufferedReader in = new BufferedReader(isr);
|
|
|
|
|
|
|
|
System.out.println("reading "+fileName);
|
|
|
|
String line;
|
|
|
|
boolean hashOn = false;
|
2002-11-02 16:01:40 +00:00
|
|
|
boolean isSanskrit = false; //FIXME: this is never read.
|
2002-09-30 03:10:00 +00:00
|
|
|
boolean ignore = false;
|
|
|
|
|
|
|
|
while ((line = in.readLine()) != null) {
|
|
|
|
if (line.startsWith("<?")) { //line is command
|
|
|
|
if (line.equalsIgnoreCase("<?Consonants?>")) {
|
|
|
|
isSanskrit = false;
|
|
|
|
hashOn = false;
|
|
|
|
line = in.readLine();
|
|
|
|
charSet = new HashSet();
|
|
|
|
StringTokenizer st = new StringTokenizer(line,",");
|
Fixes bug 624133, "Input freezes after impossible character". Try 'shsM' in
ACIP or 'ShSm' in Extended Wylie to see the new behavior.
We use a trie to store valid input sequences. In the future, we could use
the same trie as a replacement for the more inefficient HashSets we use to
store characters, vowels, and punctuation. For example, we'd use
'validInputSequences.put("K", new Pair("consonant", "k"))' when reading
in the ACIP keyboard's description of the first consonant of the Tibetan
alphabet in 'TibetanKeyboard.java'.
Note that the current trie implementation is only useful for 7- or 8-bit
transcription systems, and works best for tries with low average depth, which
describes a transcription system's trie very well. If you used arbitrary
Unicode in your keyboard, you'd need a different trie implementation.
Improved the optional keyboard input mode status messages.
2002-11-02 18:44:24 +00:00
|
|
|
while (st.hasMoreTokens()) {
|
|
|
|
String ntk;
|
|
|
|
charSet.add(ntk = st.nextToken());
|
|
|
|
validInputSequences.put(ntk, anyOldObjectWillDo);
|
|
|
|
}
|
2002-09-30 03:10:00 +00:00
|
|
|
}
|
|
|
|
else if (line.equalsIgnoreCase("<?Vowels?>")) {
|
|
|
|
isSanskrit = false;
|
|
|
|
hashOn = false;
|
|
|
|
line = in.readLine();
|
|
|
|
vowelSet = new HashSet();
|
|
|
|
StringTokenizer st = new StringTokenizer(line,",");
|
Fixes bug 624133, "Input freezes after impossible character". Try 'shsM' in
ACIP or 'ShSm' in Extended Wylie to see the new behavior.
We use a trie to store valid input sequences. In the future, we could use
the same trie as a replacement for the more inefficient HashSets we use to
store characters, vowels, and punctuation. For example, we'd use
'validInputSequences.put("K", new Pair("consonant", "k"))' when reading
in the ACIP keyboard's description of the first consonant of the Tibetan
alphabet in 'TibetanKeyboard.java'.
Note that the current trie implementation is only useful for 7- or 8-bit
transcription systems, and works best for tries with low average depth, which
describes a transcription system's trie very well. If you used arbitrary
Unicode in your keyboard, you'd need a different trie implementation.
Improved the optional keyboard input mode status messages.
2002-11-02 18:44:24 +00:00
|
|
|
while (st.hasMoreTokens()) {
|
|
|
|
String ntk;
|
|
|
|
vowelSet.add(ntk = st.nextToken());
|
|
|
|
validInputSequences.put(ntk, anyOldObjectWillDo);
|
|
|
|
}
|
2002-09-30 03:10:00 +00:00
|
|
|
}
|
|
|
|
else if (line.equalsIgnoreCase("<?Other?>")) {
|
|
|
|
isSanskrit = false;
|
|
|
|
hashOn = false;
|
|
|
|
line = in.readLine();
|
|
|
|
puncSet = new HashSet();
|
|
|
|
StringTokenizer st = new StringTokenizer(line,",");
|
Fixes bug 624133, "Input freezes after impossible character". Try 'shsM' in
ACIP or 'ShSm' in Extended Wylie to see the new behavior.
We use a trie to store valid input sequences. In the future, we could use
the same trie as a replacement for the more inefficient HashSets we use to
store characters, vowels, and punctuation. For example, we'd use
'validInputSequences.put("K", new Pair("consonant", "k"))' when reading
in the ACIP keyboard's description of the first consonant of the Tibetan
alphabet in 'TibetanKeyboard.java'.
Note that the current trie implementation is only useful for 7- or 8-bit
transcription systems, and works best for tries with low average depth, which
describes a transcription system's trie very well. If you used arbitrary
Unicode in your keyboard, you'd need a different trie implementation.
Improved the optional keyboard input mode status messages.
2002-11-02 18:44:24 +00:00
|
|
|
while (st.hasMoreTokens()) {
|
|
|
|
String ntk;
|
|
|
|
puncSet.add(ntk = st.nextToken());
|
|
|
|
validInputSequences.put(ntk, anyOldObjectWillDo);
|
|
|
|
}
|
2002-09-30 03:10:00 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
else if (line.equalsIgnoreCase("<?Input:Punctuation?>")
|
|
|
|
|| line.equalsIgnoreCase("<?Input:Vowels?>")
|
|
|
|
|| line.equalsIgnoreCase("<?Input:Tibetan?>")) {
|
|
|
|
isSanskrit = false;
|
|
|
|
hashOn = true;
|
|
|
|
ignore = false;
|
|
|
|
}
|
|
|
|
else if (line.equalsIgnoreCase("<?Input:Sanskrit?>")) {
|
|
|
|
isSanskrit = true;
|
|
|
|
hashOn = true;
|
|
|
|
ignore = false;
|
|
|
|
}
|
|
|
|
else if (line.equalsIgnoreCase("<?ToWylie?>")) {
|
|
|
|
isSanskrit = false;
|
|
|
|
hashOn = false;
|
|
|
|
ignore = false;
|
|
|
|
}
|
|
|
|
else if (line.equalsIgnoreCase("<?Ignore?>"))
|
|
|
|
ignore = true;
|
|
|
|
}
|
|
|
|
else if (line.startsWith("//")) //comment
|
|
|
|
;
|
|
|
|
else if (line.equals("")) //empty string
|
|
|
|
;
|
|
|
|
else if (!ignore) {
|
|
|
|
StringTokenizer st = new StringTokenizer(line,DELIMITER,true);
|
|
|
|
|
|
|
|
String wylie = new String();
|
|
|
|
DuffCode[] duffCodes = new DuffCode[11];
|
|
|
|
|
|
|
|
int k = 0;
|
|
|
|
|
|
|
|
while (st.hasMoreTokens()) {
|
|
|
|
String val = st.nextToken();
|
|
|
|
|
|
|
|
if (val.equals(DELIMITER))
|
|
|
|
k++;
|
|
|
|
|
|
|
|
else if (!val.equals("")) {
|
|
|
|
switch (k) {
|
|
|
|
case 0: //wylie key
|
|
|
|
wylie = val;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 1:
|
|
|
|
duffCodes[TM] = new DuffCode(val,false);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 2: //reduced-size character if there is one
|
|
|
|
duffCodes[REDUCED_C] = new DuffCode(val,true);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 3: //TibetanMachineWeb code
|
|
|
|
duffCodes[k-1] = new DuffCode(val,true);
|
|
|
|
TMtoTMW[duffCodes[TM].fontNum-1][duffCodes[TM].charNum-32] = duffCodes[TMW];
|
|
|
|
break;
|
|
|
|
case 4:
|
|
|
|
case 5:
|
|
|
|
case 6:
|
|
|
|
case 7:
|
|
|
|
case 8:
|
|
|
|
case 9:
|
|
|
|
duffCodes[k-1] = new DuffCode(val,true);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 10: //Unicode: ignore for now
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 11: //half-height character if there is one
|
|
|
|
duffCodes[HALF_C] = new DuffCode(val,true);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 12: //special bindu-value if vowel+bindu are one glyph
|
|
|
|
DuffCode binduCode = new DuffCode(val,true);
|
|
|
|
binduMap.put(duffCodes[TMW],binduCode);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (hashOn)
|
|
|
|
tibHash.put(wylie,duffCodes);
|
|
|
|
|
|
|
|
int font = duffCodes[2].fontNum;
|
|
|
|
int code = duffCodes[2].charNum-32;
|
|
|
|
toHashKey[font][code] = wylie;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
catch (IOException e) {
|
|
|
|
System.out.println("file Disappeared");
|
|
|
|
}
|
|
|
|
|
|
|
|
hasReadData = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* (Re-)sets the keyboard.
|
|
|
|
* @param kb the keyboard to be installed. If null, then the
|
|
|
|
* Extended Wylie keyboard is installed
|
|
|
|
* @return true if the keyboard was successfully set, false
|
|
|
|
* if there was an error
|
|
|
|
*/
|
|
|
|
public static boolean setKeyboard(TibetanKeyboard kb) {
|
|
|
|
keyboard = kb;
|
|
|
|
|
2003-01-05 05:57:44 +00:00
|
|
|
if (currentKeyboardIsExtendedWylie()) { //wylie keyboard
|
2002-09-30 03:10:00 +00:00
|
|
|
hasDisambiguatingKey = true;
|
|
|
|
disambiguating_key = WYLIE_DISAMBIGUATING_KEY;
|
|
|
|
hasSanskritStackingKey = true;
|
|
|
|
hasTibetanStackingKey = false;
|
|
|
|
isStackingMedial = true;
|
|
|
|
stacking_key = WYLIE_SANSKRIT_STACKING_KEY;
|
|
|
|
isAChenRequiredBeforeVowel = false;
|
|
|
|
isAChungConsonant = false;
|
|
|
|
hasAVowel = true;
|
|
|
|
aVowel = WYLIE_aVOWEL;
|
Fixes bug 624133, "Input freezes after impossible character". Try 'shsM' in
ACIP or 'ShSm' in Extended Wylie to see the new behavior.
We use a trie to store valid input sequences. In the future, we could use
the same trie as a replacement for the more inefficient HashSets we use to
store characters, vowels, and punctuation. For example, we'd use
'validInputSequences.put("K", new Pair("consonant", "k"))' when reading
in the ACIP keyboard's description of the first consonant of the Tibetan
alphabet in 'TibetanKeyboard.java'.
Note that the current trie implementation is only useful for 7- or 8-bit
transcription systems, and works best for tries with low average depth, which
describes a transcription system's trie very well. If you used arbitrary
Unicode in your keyboard, you'd need a different trie implementation.
Improved the optional keyboard input mode status messages.
2002-11-02 18:44:24 +00:00
|
|
|
if (!vowelSet.contains(WYLIE_aVOWEL)) {
|
2002-09-30 03:10:00 +00:00
|
|
|
vowelSet.add(WYLIE_aVOWEL);
|
Fixes bug 624133, "Input freezes after impossible character". Try 'shsM' in
ACIP or 'ShSm' in Extended Wylie to see the new behavior.
We use a trie to store valid input sequences. In the future, we could use
the same trie as a replacement for the more inefficient HashSets we use to
store characters, vowels, and punctuation. For example, we'd use
'validInputSequences.put("K", new Pair("consonant", "k"))' when reading
in the ACIP keyboard's description of the first consonant of the Tibetan
alphabet in 'TibetanKeyboard.java'.
Note that the current trie implementation is only useful for 7- or 8-bit
transcription systems, and works best for tries with low average depth, which
describes a transcription system's trie very well. If you used arbitrary
Unicode in your keyboard, you'd need a different trie implementation.
Improved the optional keyboard input mode status messages.
2002-11-02 18:44:24 +00:00
|
|
|
validInputSequences.put(WYLIE_aVOWEL, anyOldObjectWillDo);
|
|
|
|
}
|
2002-09-30 03:10:00 +00:00
|
|
|
}
|
|
|
|
else {
|
|
|
|
hasDisambiguatingKey = keyboard.hasDisambiguatingKey();
|
|
|
|
if (hasDisambiguatingKey)
|
|
|
|
disambiguating_key = keyboard.getDisambiguatingKey();
|
|
|
|
|
|
|
|
hasSanskritStackingKey = keyboard.hasSanskritStackingKey();
|
|
|
|
hasTibetanStackingKey = keyboard.hasTibetanStackingKey();
|
|
|
|
if (hasSanskritStackingKey || hasTibetanStackingKey) {
|
|
|
|
isStackingMedial = keyboard.isStackingMedial();
|
|
|
|
stacking_key = keyboard.getStackingKey();
|
|
|
|
}
|
|
|
|
|
|
|
|
isAChenRequiredBeforeVowel = keyboard.isAChenRequiredBeforeVowel();
|
|
|
|
isAChungConsonant = keyboard.isAChungConsonant();
|
|
|
|
hasAVowel = keyboard.hasAVowel();
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* (Re-)sets the keyboard.
|
|
|
|
* @param url the URL of the keyboard to be installed.
|
|
|
|
* If null, then the Extended Wylie keyboard is
|
|
|
|
* installed
|
|
|
|
* @return true if the keyboard was successfully set, false
|
|
|
|
* if there was an error
|
|
|
|
*/
|
|
|
|
public static boolean setKeyboard(URL url) {
|
|
|
|
try {
|
2002-10-28 03:17:28 +00:00
|
|
|
TibetanKeyboard kb = new TibetanKeyboard(url);
|
2002-09-30 03:10:00 +00:00
|
|
|
if (setKeyboard(kb))
|
|
|
|
return true;
|
|
|
|
else
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
catch (TibetanKeyboard.InvalidKeyboardException ike) {
|
2002-10-28 03:17:28 +00:00
|
|
|
System.out.println("can't create the keyboard associated with " + url);
|
|
|
|
ThdlDebug.noteIffyCode();
|
2002-09-30 03:10:00 +00:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Gets the AttributeSet for the given TibetanMachineWeb font.
|
|
|
|
* This information is required in order to be able to put styled
|
|
|
|
* text into {@link TibetanDocument TibetanDocument}.
|
|
|
|
* @param font the number of the TibetanMachineWeb font for which
|
|
|
|
* you want the SimpleAttributeSet: TibetanMachineWeb = 1,
|
|
|
|
* TibetanMachineWeb1 = 2, TibetanMachineWeb = 3, etc. up to 10
|
|
|
|
* @return a SimpleAttributeSet for the given font - that is,
|
|
|
|
* a way of encoding the font itself
|
|
|
|
*/
|
|
|
|
public static SimpleAttributeSet getAttributeSet(int font) {
|
|
|
|
if (font > -1 && font < webFontAttributeSet.length)
|
|
|
|
return webFontAttributeSet[font];
|
|
|
|
else
|
|
|
|
return null;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Says whether or not the character is formatting.
|
|
|
|
* @param c the character to be checked
|
|
|
|
* @return true if c is formatting (TAB or
|
|
|
|
* ENTER), false if not
|
|
|
|
*/
|
|
|
|
public static boolean isFormatting(char c) {
|
|
|
|
if (c < 32 || c > 126)
|
|
|
|
return true;
|
|
|
|
else
|
|
|
|
return false;
|
|
|
|
/*
|
|
|
|
if ( c == KeyEvent.VK_TAB
|
|
|
|
|| c == KeyEvent.VK_ENTER)
|
|
|
|
|
|
|
|
return true;
|
|
|
|
else
|
|
|
|
return false;
|
|
|
|
*/
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Checks to see if the passed string
|
|
|
|
* is a character in the installed keyboard.
|
|
|
|
*
|
|
|
|
* @param s the string you want to check
|
|
|
|
* @return true if s is a character in the current keyboard,
|
|
|
|
* false if not
|
|
|
|
*/
|
|
|
|
public static boolean isChar(String s) {
|
2003-01-05 05:57:44 +00:00
|
|
|
if (currentKeyboardIsExtendedWylie()) {
|
2002-09-30 03:10:00 +00:00
|
|
|
if (charSet.contains(s))
|
|
|
|
return true;
|
|
|
|
else
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
if (keyboard.isChar(s))
|
|
|
|
return true;
|
|
|
|
else
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Checks to see if the passed string
|
|
|
|
* is a character in Extended Wylie.
|
|
|
|
* @param s the string to be checked
|
|
|
|
* @return true if s is a character in
|
|
|
|
* Extended Wylie transliteration, false if not
|
|
|
|
*/
|
|
|
|
public static boolean isWylieChar(String s) {
|
2003-03-30 02:31:16 +00:00
|
|
|
return charSet.contains(s);
|
2002-09-30 03:10:00 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Checks to see if the passed string
|
|
|
|
* is punctuation in the installed keyboard.
|
|
|
|
* @param s the string you want to check
|
|
|
|
* @return true if s is punctuation in the current
|
|
|
|
* keyboard, false if not
|
|
|
|
*/
|
|
|
|
public static boolean isPunc(String s) {
|
2003-03-30 02:31:16 +00:00
|
|
|
if (currentKeyboardIsExtendedWylie())
|
|
|
|
return puncSet.contains(s);
|
2002-09-30 03:10:00 +00:00
|
|
|
else
|
2003-03-30 02:31:16 +00:00
|
|
|
return keyboard.isPunc(s);
|
2002-09-30 03:10:00 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* This method checks to see if the passed string
|
|
|
|
* is punctuation in Extended Wylie.
|
|
|
|
* @param s the string to be checked
|
|
|
|
* @return true if s is punctuation in
|
|
|
|
* Extended Wylie transliteration, false if not
|
|
|
|
*/
|
|
|
|
public static boolean isWyliePunc(String s) {
|
2003-03-30 02:31:16 +00:00
|
|
|
return puncSet.contains(s);
|
2002-09-30 03:10:00 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Checks to see if the passed string
|
|
|
|
* is a vowel in the installed keyboard.
|
|
|
|
* @param s the string you want to check
|
|
|
|
* @return true if s is a vowel in the current
|
|
|
|
* keyboard, false if not
|
|
|
|
*/
|
|
|
|
public static boolean isVowel(String s) {
|
2003-03-30 02:31:16 +00:00
|
|
|
if (currentKeyboardIsExtendedWylie())
|
|
|
|
return vowelSet.contains(s);
|
2002-09-30 03:10:00 +00:00
|
|
|
else
|
2003-03-30 02:31:16 +00:00
|
|
|
return keyboard.isVowel(s);
|
2002-09-30 03:10:00 +00:00
|
|
|
}
|
|
|
|
|
2003-03-30 21:49:55 +00:00
|
|
|
/**
|
|
|
|
* Checks to see if the concatenation of x and y is ambiguous in
|
|
|
|
* Extended Wylie. gya and g.ya, bla and b.la, and bra and b.ra are
|
|
|
|
* the only syntactically legal ambigous fellows, as stacks like blha,
|
|
|
|
* blda, brla, brkya, brgya, brka, etc. are unambiguous.
|
|
|
|
* @param x the prefix
|
|
|
|
* @param y the root stack
|
|
|
|
* @return true if x + y is ambiguous in the Extended Wylie
|
|
|
|
* transliteration, false if not
|
|
|
|
*/
|
|
|
|
public static boolean isAmbiguousWylie(String x, String y) {
|
2003-04-08 04:56:40 +00:00
|
|
|
// What about ambiguity between wa-zur and wa? dwa vs. d.wa, e.g.?
|
|
|
|
// Doesn't matter, because that's illegal. wa doesn't take any
|
|
|
|
// prefixes.
|
|
|
|
|
2003-03-30 21:49:55 +00:00
|
|
|
return (("g".equals(x) && "y".equals(y))
|
|
|
|
|| ("b".equals(x) && "l".equals(y))
|
|
|
|
|| ("b".equals(x) && "r".equals(y)));
|
|
|
|
}
|
|
|
|
|
2002-09-30 03:10:00 +00:00
|
|
|
/**
|
|
|
|
* Checks to see if the passed string
|
|
|
|
* is a vowel in Extended Wylie.
|
|
|
|
* @param s the string to be checked
|
|
|
|
* @return true if s is a vowel in
|
|
|
|
* Extended Wylie transliteration, false if not
|
|
|
|
*/
|
|
|
|
public static boolean isWylieVowel(String s) {
|
2003-03-30 02:31:16 +00:00
|
|
|
return vowelSet.contains(s);
|
2002-09-30 03:10:00 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Returns true iff this Wylie is valid as a leftmost character in a
|
|
|
|
* Tibetan syllable. For example, in the syllable 'brgyad', 'b' is the
|
2003-03-30 02:31:16 +00:00
|
|
|
* leftmost character. Valid leftmost characters include g, d, b, ',
|
|
|
|
* and m.
|
2002-09-30 03:10:00 +00:00
|
|
|
* @param s the (Wylie) string to be checked
|
|
|
|
* @return true if s is a possible leftmost character in a Tibetan
|
|
|
|
* syllable, false if not. */
|
|
|
|
public static boolean isWylieLeft(String s) {
|
2003-03-30 02:31:16 +00:00
|
|
|
return leftSet.contains(s);
|
2002-09-30 03:10:00 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Returns true iff this Wylie is valid as a right (post-vowel)
|
|
|
|
* character in a Tibetan syllable. For example, in the syllable
|
|
|
|
* 'lags', 'g' is in the right character position. Valid right
|
|
|
|
* characters include g, ng, d, n, b, m, r, l, s, ', and T.
|
|
|
|
* @param s the (Wylie) string to be checked
|
|
|
|
* @return true if s is a possible right character in a Tibetan
|
|
|
|
* syllable, false if not. */
|
|
|
|
public static boolean isWylieRight(String s) {
|
2003-03-30 02:31:16 +00:00
|
|
|
return rightSet.contains(s);
|
2002-09-30 03:10:00 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2003-03-30 02:31:16 +00:00
|
|
|
* Returns true iff this Wylie is valid as a postsuffix in a
|
2002-09-30 03:10:00 +00:00
|
|
|
* Tibetan syllable.
|
|
|
|
* @param s the string to be checked
|
2003-03-30 02:31:16 +00:00
|
|
|
* @return true if s is a possible postsuffix in a Tibetan
|
2002-09-30 03:10:00 +00:00
|
|
|
* syllable, false if not. */
|
|
|
|
public static boolean isWylieFarRight(String s) {
|
2003-03-30 02:31:16 +00:00
|
|
|
return farRightSet.contains(s);
|
|
|
|
}
|
2002-09-30 03:10:00 +00:00
|
|
|
|
2003-03-30 02:31:16 +00:00
|
|
|
/**
|
|
|
|
* Returns true iff this Wylie is valid as a head letter in a Tibetan
|
|
|
|
* syllable.
|
|
|
|
* @param s the string to be checked
|
|
|
|
* @return true if s is a possible superscribed letter in a Tibetan
|
|
|
|
* syllable, false if not. */
|
|
|
|
public static boolean isWylieTop(String s) {
|
|
|
|
return topSet.contains(s);
|
2002-09-30 03:10:00 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Converts character to its Extended Wylie correspondence.
|
|
|
|
* This assumes that the passed string is a character
|
|
|
|
* in the current keyboard.
|
|
|
|
* @param s the string to be converted
|
|
|
|
* @return the Wylie character corresponding to
|
|
|
|
* s, or null if there is no such character
|
|
|
|
* @see TibetanKeyboard
|
|
|
|
*/
|
|
|
|
public static String getWylieForChar(String s) {
|
2003-01-05 05:57:44 +00:00
|
|
|
if (currentKeyboardIsExtendedWylie())
|
2002-09-30 03:10:00 +00:00
|
|
|
return s;
|
|
|
|
|
|
|
|
return keyboard.getWylieForChar(s);
|
|
|
|
}
|
|
|
|
|
2003-01-05 05:57:44 +00:00
|
|
|
/** Returns true iff the currently active keyboard is the
|
|
|
|
* built-in, extended Wylie keyboard. */
|
|
|
|
public static boolean currentKeyboardIsExtendedWylie() {
|
|
|
|
return (null == getKeyboard());
|
|
|
|
}
|
|
|
|
|
Fixes bug 624133, "Input freezes after impossible character". Try 'shsM' in
ACIP or 'ShSm' in Extended Wylie to see the new behavior.
We use a trie to store valid input sequences. In the future, we could use
the same trie as a replacement for the more inefficient HashSets we use to
store characters, vowels, and punctuation. For example, we'd use
'validInputSequences.put("K", new Pair("consonant", "k"))' when reading
in the ACIP keyboard's description of the first consonant of the Tibetan
alphabet in 'TibetanKeyboard.java'.
Note that the current trie implementation is only useful for 7- or 8-bit
transcription systems, and works best for tries with low average depth, which
describes a transcription system's trie very well. If you used arbitrary
Unicode in your keyboard, you'd need a different trie implementation.
Improved the optional keyboard input mode status messages.
2002-11-02 18:44:24 +00:00
|
|
|
/**
|
|
|
|
* Returns the current keyboard, or, if the current keyboard is the
|
|
|
|
* Extended Wylie keyboard, null. */
|
|
|
|
public static TibetanKeyboard getKeyboard() {
|
|
|
|
return keyboard;
|
|
|
|
}
|
|
|
|
|
2002-09-30 03:10:00 +00:00
|
|
|
/**
|
|
|
|
* Converts punctuation to its Extended Wylie correspondence.
|
|
|
|
* This assumes that the passed string is punctuation
|
|
|
|
* in the current keyboard.
|
|
|
|
* @param s the string to be converted
|
|
|
|
* @return the Wylie punctuation corresponding to
|
|
|
|
* s, or null if there is no such punctuation
|
|
|
|
* @see TibetanKeyboard
|
|
|
|
*/
|
|
|
|
public static String getWylieForPunc(String s) {
|
2003-01-05 05:57:44 +00:00
|
|
|
if (currentKeyboardIsExtendedWylie())
|
2002-09-30 03:10:00 +00:00
|
|
|
return s;
|
|
|
|
|
|
|
|
return keyboard.getWylieForPunc(s);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Converts vowel to its Extended Wylie correspondence.
|
|
|
|
* This assumes that the passed string is a vowel
|
|
|
|
* in the current keyboard.
|
|
|
|
* @param s the string to be converted
|
|
|
|
* @return the Wylie vowel corresponding to
|
|
|
|
* s, or null if there is no such vowel
|
|
|
|
* @see TibetanKeyboard
|
|
|
|
*/
|
|
|
|
public static String getWylieForVowel(String s) {
|
2003-01-05 05:57:44 +00:00
|
|
|
if (currentKeyboardIsExtendedWylie())
|
2002-09-30 03:10:00 +00:00
|
|
|
return s;
|
|
|
|
|
|
|
|
return keyboard.getWylieForVowel(s);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Gets the DuffCode required for a vowel, if
|
|
|
|
* affixed to the given hashKey.
|
|
|
|
* @param hashKey the key for the character the
|
|
|
|
* vowel is to be affixed to
|
|
|
|
* @param vowel the vowel you want the DuffCode for
|
|
|
|
* @return the DuffCode for the vowel in the given
|
|
|
|
* context, or null if there is no such vowel in
|
|
|
|
* the context
|
|
|
|
* @see DuffCode
|
|
|
|
*/
|
|
|
|
public static DuffCode getVowel(String hashKey, int vowel) {
|
|
|
|
DuffCode[] dc = (DuffCode[])tibHash.get(hashKey);
|
|
|
|
|
|
|
|
if (null == dc)
|
|
|
|
return null;
|
|
|
|
|
|
|
|
return dc[vowel]; //either a vowel or null
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Checks to see if a glyph exists for this hash key.
|
|
|
|
* @param hashKey the key to be checked
|
|
|
|
* @return true if there is a glyph corresponding to
|
|
|
|
* hashKey, false if not
|
|
|
|
*/
|
|
|
|
public static boolean hasGlyph(String hashKey) {
|
|
|
|
if (tibHash.get(hashKey)==null)
|
|
|
|
return false;
|
|
|
|
else
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Gets a glyph for this hash key. Hash keys are not identical to Extended
|
|
|
|
* Wylie. The hash key for a Tibetan stack separates the members of the stack
|
|
|
|
* with '-', for example, 's-g-r'. In Sanskrit stacks, '+' is used, e.g. 'g+h+g+h'.
|
|
|
|
* @param hashKey the key for which you want a DuffCode
|
|
|
|
* @return the TibetanMachineWeb DuffCode value for hashKey
|
|
|
|
* @see DuffCode
|
|
|
|
*/
|
|
|
|
public static DuffCode getGlyph(String hashKey) {
|
|
|
|
DuffCode[] dc = (DuffCode[])tibHash.get(hashKey);
|
|
|
|
return dc[TMW];
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Gets the half height character for this hash key.
|
|
|
|
* @param hashKey the key you want a half height glyph for
|
|
|
|
* @return the TibetanMachineWeb DuffCode of hashKey's
|
|
|
|
* reduced height glyph, or null if there is no such glyph
|
|
|
|
* @see DuffCode
|
|
|
|
*/
|
|
|
|
public static DuffCode getHalfHeightGlyph(String hashKey) {
|
|
|
|
DuffCode[] dc = (DuffCode[])tibHash.get(hashKey);
|
|
|
|
if (dc == null)
|
|
|
|
return null;
|
|
|
|
|
|
|
|
return dc[REDUCED_C];
|
|
|
|
}
|
|
|
|
|
|
|
|
private static DuffCode getTMtoTMW(int font, int code) {
|
|
|
|
if (code > 255-32) {
|
|
|
|
switch (code) {
|
|
|
|
case 8218-32: //sby
|
|
|
|
code = 130-32;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 8230-32: //sgr
|
|
|
|
code = 133-32;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 8225-32: //spr
|
|
|
|
code = 135-32;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 8117-32: //tshw
|
|
|
|
code = 146-32;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 8126-32: //rw
|
|
|
|
code = 149-32;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 8482-32: //grw
|
|
|
|
code = 153-32;
|
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
|
|
|
return null;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return TMtoTMW[font][code];
|
|
|
|
}
|
|
|
|
|
|
|
|
private static int getTMFontNumber(String name) {
|
2003-01-05 05:57:44 +00:00
|
|
|
String internedName = name.intern();
|
2002-09-30 03:10:00 +00:00
|
|
|
for (int i=1; i<tmFontNames.length; i++) {
|
2003-01-05 05:57:44 +00:00
|
|
|
if (internedName == tmFontNames[i])
|
2002-09-30 03:10:00 +00:00
|
|
|
return i;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Gets the TibetanMachineWeb font number for this font name.
|
|
|
|
* @param name a font name
|
|
|
|
* @return between 1 and 10 if the font is one
|
|
|
|
* of the TibetanMachineWeb fonts, otherwise 0
|
|
|
|
*/
|
|
|
|
public static int getTMWFontNumber(String name) {
|
2003-01-05 05:57:44 +00:00
|
|
|
String internedName = name.intern();
|
2002-09-30 03:10:00 +00:00
|
|
|
for (int i=1; i<tmwFontNames.length; i++) {
|
2003-01-05 05:57:44 +00:00
|
|
|
// Thanks to interning, we can use == rather than .equals().
|
|
|
|
if (internedName == tmwFontNames[i])
|
2002-09-30 03:10:00 +00:00
|
|
|
return i;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Gets the hash key associated with this glyph.
|
|
|
|
* @param font a TibetanMachineWeb font number
|
|
|
|
* @param code an ASCII character code
|
|
|
|
* @return the hashKey corresponding to the character
|
|
|
|
* at font, code
|
|
|
|
*/
|
|
|
|
public static String getHashKeyForGlyph(int font, int code) {
|
|
|
|
code = code - 32;
|
|
|
|
return toHashKey[font][code];
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Gets the hash key associated with this glyph.
|
|
|
|
* @param dc a DuffCode denoting a TibetanMachineWeb glyph
|
|
|
|
* @return the hashKey corresponding to the character at dc
|
|
|
|
*/
|
|
|
|
public static String getHashKeyForGlyph(DuffCode dc) {
|
|
|
|
int font = dc.fontNum;
|
|
|
|
int code = dc.charNum-32;
|
|
|
|
return toHashKey[font][code];
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Gets the Extended Wylie for a given hash key.
|
|
|
|
* The hash keys in charList and so on may include separating
|
|
|
|
* characters. For example, Wylie 'sgr' has the hash key 's-g-r'.
|
|
|
|
* This method takes a hash key and converts it its correct
|
|
|
|
* Wylie value, and therefore is useful in conversions from
|
|
|
|
* TibetanMachineWeb to Wylie.
|
|
|
|
* @param hashKey the hash key for a glyph
|
|
|
|
* @return the Wylie value of that hash key
|
|
|
|
*/
|
|
|
|
public static String wylieForGlyph(String hashKey) {
|
|
|
|
if (hashKey.indexOf(WYLIE_SANSKRIT_STACKING_KEY) > -1)
|
|
|
|
return hashKey; //because '+' remains part of Extended Wylie for Sanskrit stacks
|
|
|
|
|
|
|
|
if (hashKey.charAt(0) == '-')
|
|
|
|
return hashKey; //because must be '-i' or '-I' vowels
|
|
|
|
|
|
|
|
StringTokenizer st = new StringTokenizer(hashKey, "-");
|
|
|
|
StringBuffer sb = new StringBuffer();
|
|
|
|
|
|
|
|
while (st.hasMoreTokens())
|
|
|
|
sb.append(st.nextToken());
|
|
|
|
|
|
|
|
return sb.toString();
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Gets the Extended Wylie value for this glyph.
|
|
|
|
* @param font the font of the TibetanMachineWeb
|
|
|
|
* glyph you want the Wylie of
|
|
|
|
* @param code the TibetanMachineWeb glyph
|
|
|
|
* you want the Wylie of
|
|
|
|
* @return the Wylie value corresponding to the
|
|
|
|
* glyph denoted by font, code
|
|
|
|
*/
|
|
|
|
public static String getWylieForGlyph(int font, int code) {
|
|
|
|
String hashKey = getHashKeyForGlyph(font, code);
|
|
|
|
return wylieForGlyph(hashKey);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Gets the Extended Wylie value for this glyph.
|
|
|
|
* @param dc the DuffCode of the glyph you want
|
|
|
|
* the Wylie of
|
|
|
|
* @return the Wylie value corresponding to the
|
|
|
|
* glyph denoted by dc
|
|
|
|
*/
|
|
|
|
public static String getWylieForGlyph(DuffCode dc) {
|
|
|
|
String hashKey = getHashKeyForGlyph(dc);
|
|
|
|
return wylieForGlyph(hashKey);
|
|
|
|
}
|
|
|
|
|
Fixes bug 624133, "Input freezes after impossible character". Try 'shsM' in
ACIP or 'ShSm' in Extended Wylie to see the new behavior.
We use a trie to store valid input sequences. In the future, we could use
the same trie as a replacement for the more inefficient HashSets we use to
store characters, vowels, and punctuation. For example, we'd use
'validInputSequences.put("K", new Pair("consonant", "k"))' when reading
in the ACIP keyboard's description of the first consonant of the Tibetan
alphabet in 'TibetanKeyboard.java'.
Note that the current trie implementation is only useful for 7- or 8-bit
transcription systems, and works best for tries with low average depth, which
describes a transcription system's trie very well. If you used arbitrary
Unicode in your keyboard, you'd need a different trie implementation.
Improved the optional keyboard input mode status messages.
2002-11-02 18:44:24 +00:00
|
|
|
/** This addresses bug 624133, "Input freezes after impossible
|
|
|
|
* character". Returns true iff s is a proper prefix of some
|
|
|
|
* legal input for this keyboard. In the extended Wylie
|
|
|
|
* keyboard, hasInputPrefix("S") is true because "Sh" is legal
|
|
|
|
* input. hasInputPrefix("Sh") is false because though "Sh" is
|
|
|
|
* legal input, ("Sh" + y) is not valid input for any non-empty
|
|
|
|
* String y. */
|
|
|
|
public static boolean hasInputPrefix(String s) {
|
2003-01-05 05:57:44 +00:00
|
|
|
if (!currentKeyboardIsExtendedWylie()) {
|
Fixes bug 624133, "Input freezes after impossible character". Try 'shsM' in
ACIP or 'ShSm' in Extended Wylie to see the new behavior.
We use a trie to store valid input sequences. In the future, we could use
the same trie as a replacement for the more inefficient HashSets we use to
store characters, vowels, and punctuation. For example, we'd use
'validInputSequences.put("K", new Pair("consonant", "k"))' when reading
in the ACIP keyboard's description of the first consonant of the Tibetan
alphabet in 'TibetanKeyboard.java'.
Note that the current trie implementation is only useful for 7- or 8-bit
transcription systems, and works best for tries with low average depth, which
describes a transcription system's trie very well. If you used arbitrary
Unicode in your keyboard, you'd need a different trie implementation.
Improved the optional keyboard input mode status messages.
2002-11-02 18:44:24 +00:00
|
|
|
return keyboard.hasInputPrefix(s);
|
|
|
|
} else {
|
|
|
|
return validInputSequences.hasPrefix(s);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2002-09-30 03:10:00 +00:00
|
|
|
/**
|
|
|
|
* Says whether or not this glyph involves a Sanskrit stack.
|
|
|
|
* @param font the font of a TibetanMachineWeb glyph
|
|
|
|
* @param code the ASCII value of a TibetanMachineWeb glyph
|
|
|
|
* @return true if this glyph is a Sanskrit stack,
|
|
|
|
* false if not
|
|
|
|
*/
|
|
|
|
public static boolean isSanskritStack(int font, int code) {
|
|
|
|
String val = toHashKey[font][code];
|
|
|
|
if (val.indexOf(WYLIE_SANSKRIT_STACKING_KEY) == -1)
|
|
|
|
return false;
|
|
|
|
else
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Says whether or not this glyph involves a Sanskrit stack.
|
|
|
|
* @param dc the DuffCode of a TibetanMachineWeb glyph
|
|
|
|
* @return true if this glyph is a Sanskrit stack,
|
|
|
|
* false if not
|
|
|
|
*/
|
|
|
|
public static boolean isSanskritStack(DuffCode dc) {
|
|
|
|
int font = dc.fontNum;
|
|
|
|
int code = dc.charNum-32;
|
|
|
|
|
|
|
|
if (isSanskritStack(font, code))
|
|
|
|
return true;
|
|
|
|
else
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Says whether or not this glyph involves a Tibetan stack.
|
|
|
|
* @param font the font of a TibetanMachineWeb glyph
|
|
|
|
* @param code the ASCII value of a TibetanMachineWeb glyph
|
|
|
|
* @return true if this glyph is a Tibetan stack,
|
|
|
|
* false if not
|
|
|
|
*/
|
|
|
|
public static boolean isStack(int font, int code) {
|
|
|
|
String val = toHashKey[font][code];
|
|
|
|
if (val.indexOf('-') < 1) //we allow '-i' and '-I' in as vowels
|
|
|
|
return false;
|
|
|
|
else
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Says whether or not this glyph involves a Tibetan stack.
|
|
|
|
* @param dc the DuffCode of a TibetanMachineWeb glyph
|
|
|
|
* @return true if this glyph is a Tibetan stack,
|
|
|
|
* false if not
|
|
|
|
*/
|
|
|
|
public static boolean isStack(DuffCode dc) {
|
|
|
|
int font = dc.fontNum;
|
|
|
|
int code = dc.charNum-32;
|
|
|
|
|
|
|
|
if (isStack(font, code))
|
|
|
|
return true;
|
|
|
|
else
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Gets the hash with information about each character and stack.
|
|
|
|
* @return a hash containing a key for each
|
|
|
|
* entity defined in Wylie, whose object is the
|
|
|
|
* DuffCode for that key
|
|
|
|
*/
|
|
|
|
public static Map getTibHash() {
|
|
|
|
return tibHash;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Gets the hash for characters that require special bindus.
|
|
|
|
* @return a hash whose keys are all vowel glyphs (DuffCodes)
|
|
|
|
* that require a special bindu, and whose objects
|
|
|
|
* are the vowel+bindu glyph (DuffCode) corresponding to each
|
|
|
|
* such vowel glyph
|
|
|
|
*/
|
|
|
|
public static Map getBinduMap() {
|
|
|
|
return binduMap;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Returns true iff the keyboard has a disambiguating key.
|
|
|
|
* @return true if the installed keyboard has a disambiguating key,
|
|
|
|
* false if not
|
|
|
|
* @see TibetanKeyboard */
|
|
|
|
public static boolean hasDisambiguatingKey() {
|
|
|
|
return hasDisambiguatingKey;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Gets the disambiguating key.
|
|
|
|
* @return the disambiguating key for the installed
|
|
|
|
* keyboard, or ' ' if there is no such key
|
|
|
|
* @see TibetanKeyboard
|
|
|
|
*/
|
|
|
|
public static char getDisambiguatingKey() {
|
|
|
|
return disambiguating_key;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Returns true iff the keyboard has a Sanksrit stacking key.
|
|
|
|
* @return true if a stacking key is required to type Sanskrit stacks,
|
|
|
|
* false if not
|
|
|
|
* @see TibetanKeyboard */
|
|
|
|
public static boolean hasSanskritStackingKey() {
|
|
|
|
return hasSanskritStackingKey;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Returns true iff the keyboard has a Tibetan stacking key.
|
|
|
|
* @return true if a stacking key is required to type Tibetan stacks,
|
|
|
|
* false if not
|
|
|
|
* @see TibetanKeyboard */
|
|
|
|
public static boolean hasTibetanStackingKey() {
|
|
|
|
return hasTibetanStackingKey;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Returns true iff stacking is medial.
|
|
|
|
* @return true if the stacking key is medial, false if not, or if
|
|
|
|
* there is no stacking key
|
|
|
|
* @see TibetanKeyboard */
|
|
|
|
public static boolean isStackingMedial() {
|
|
|
|
return isStackingMedial;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Gets the stacking key.
|
|
|
|
* @return the stacking key, or ' ' if there
|
|
|
|
* isn't one
|
|
|
|
* @see TibetanKeyboard
|
|
|
|
*/
|
|
|
|
public static char getStackingKey() {
|
|
|
|
return stacking_key;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Returns true iff achen is required before vowels.
|
|
|
|
* @return true if you have to type achen first before you can get a
|
|
|
|
* vowel with achen, false if you can just type the vowel by itself (as
|
|
|
|
* in Wylie)
|
|
|
|
* @see TibetanKeyboard */
|
|
|
|
public static boolean isAChenRequiredBeforeVowel() {
|
|
|
|
return isAChenRequiredBeforeVowel;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Returns true iff achung is treated as a consonant.
|
|
|
|
* @return true if a-chung is considered a consonant for the purposes
|
|
|
|
* of stacking, false if not (as in Wylie)
|
|
|
|
* @see TibetanKeyboard */
|
|
|
|
public static boolean isAChungConsonant() {
|
|
|
|
return isAChungConsonant;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Returns true iff there is a key for the invisible 'a' vowel in this
|
|
|
|
* keyboard.
|
|
|
|
* @return true if the installed keyboard has a dummy a vowel, false if
|
|
|
|
* not
|
|
|
|
* @see TibetanKeyboard */
|
|
|
|
public static boolean hasAVowel() {
|
|
|
|
return hasAVowel;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Gets the invisible 'a' vowel.
|
|
|
|
* @return the dummy 'a'-vowel for the installed
|
|
|
|
* keyboard, or "" if there is no such vowel
|
|
|
|
* @see TibetanKeyboard
|
|
|
|
*/
|
|
|
|
public static String getAVowel() {
|
|
|
|
return aVowel;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Returns true iff this glyph is a top (superscript) vowel.
|
|
|
|
* @param a DuffCode representing a TibetanMachineWeb glyph
|
|
|
|
* @return true if the glyph is a top-hanging (superscript) vowel (i,
|
|
|
|
* u, e, o, ai, or ao) and false if not */
|
|
|
|
public static boolean isTopVowel(DuffCode dc) {
|
|
|
|
String wylie = getWylieForGlyph(dc);
|
|
|
|
if (top_vowels.contains(wylie))
|
|
|
|
return true;
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
2002-10-28 04:12:49 +00:00
|
|
|
|
|
|
|
/** Returns true if and only if ch, which is an ASCII character
|
|
|
|
that you can think of as an arbitrary index into one of the
|
|
|
|
Tibetan fonts, is a character that is appropriate for ending a
|
|
|
|
line of Tibetan. <code>'-'</code>, for example, represents
|
|
|
|
the tsheg (the little dot after a syllable) in (FIXME: Edward,
|
|
|
|
is this true?) all of the TMW fonts. Thus, this would return
|
|
|
|
true for <code>'-'</code>.
|
|
|
|
|
|
|
|
Note that ch is <b>not</b> the Wylie transliteration; it is an
|
|
|
|
arbitrary character (well, not quite, since ' ', '\t', '\n' et
|
|
|
|
cetera seem to have been wisely chosen to represent Tibetan
|
|
|
|
whitespace, but pretty arbitrary). If you open up MS Word,
|
|
|
|
select TibetanMachineWeb1, and type a hyphen,
|
|
|
|
i.e. <code>'-'</code>, you'll see a tsheg appear. If you open
|
|
|
|
Jskad and type a hyphen, you won't see a tsheg.
|
|
|
|
|
|
|
|
@param ch the ASCII character "index" into the TMW font
|
|
|
|
|
|
|
|
@return true iff this is a tsheg or whitespace or the like */
|
|
|
|
public static boolean isTMWFontCharBreakable(char ch) {
|
2002-11-18 16:12:25 +00:00
|
|
|
// DLC FIXME: treat whitespace differently than you do
|
|
|
|
// punctuation. And treat "/ka nga/", Tibetan verse,
|
|
|
|
// specially in the caller of this method.
|
|
|
|
|
2002-11-02 03:46:44 +00:00
|
|
|
if (false) {
|
2002-11-02 03:33:09 +00:00
|
|
|
//<?Input:Punctuation?>
|
|
|
|
int ord = (int)ch;
|
|
|
|
|
|
|
|
// FIXME: why did 94 appear twice in tibwn.ini's punctuation section?
|
|
|
|
if (32 == ord) return true;
|
|
|
|
if (45 == ord) return true;
|
|
|
|
if (107 == ord) return true;
|
|
|
|
if (103 == ord) return true;
|
|
|
|
if (104 == ord) return true;
|
|
|
|
if (105 == ord) return true;
|
|
|
|
if (43 == ord) return true;
|
|
|
|
if (40 == ord) return true;
|
|
|
|
if (41 == ord) return true;
|
|
|
|
if (38 == ord) return true;
|
|
|
|
if (39 == ord) return true;
|
|
|
|
if (93 == ord) return true;
|
|
|
|
if (94 == ord) return true;
|
|
|
|
if (92 == ord) return true;
|
|
|
|
if (91 == ord) return true;
|
2002-11-02 03:46:44 +00:00
|
|
|
} // DLC FIXME
|
2002-10-28 04:12:49 +00:00
|
|
|
return ('-' == ch /* FIXME: this is the tsheg (i.e., the Wylie is ' '), but we have no constant for it. */
|
|
|
|
|| ' ' == ch /* FIXME: this is space (i.e., the Wylie is '_'), but we have no constant for it. */
|
|
|
|
|| '\t' == ch /* FIXME: this is some sort of whitespace */
|
|
|
|
|| '\n' == ch /* FIXME: this is some sort of whitespace */
|
2002-11-02 03:33:09 +00:00
|
|
|
|| '/' == ch /* a shad */
|
2002-10-28 04:12:49 +00:00
|
|
|
);
|
|
|
|
|
|
|
|
// FIXME: am I missing anything? tabs etc.?
|
|
|
|
}
|
2002-09-30 03:10:00 +00:00
|
|
|
}
|