A TMW->Unicode table is here. Note these issues, however:
Is the EWTS '_' to be represented as U+0020, or is it a wider space? Does TMW9.42, Dza, map to U+0F5F,U+0F39? Does TMW6.60, r+y, map to U+0F62,U+0FBB or to U+0F6A,U+0FBB? (Likewise with r+w, TMW6.61, TMW6.62, etc.) Is U+0F7E a bindu? What Unicode does TMW7.96 map to, for example? What does TMW7.91 map to? Should TMW8.97 and TMW8.98 map to swastiskas elsewhere in Unicode? If so, which codepoints? Likewise with TMW9.60, a Chinese character. Does TMW7.68 map to U+0F39? Does TMW7.74, the ITHI secret sign, have a Unicode mapping? f68,fa0,f80,f72 comes close, but fa0 would be too large, wouldn't it? What Unicode does TMW9.61 map to? Is it for sequences like f40,f7c,f60,f72? Or is it for f60,f72,f7c?
This commit is contained in:
parent
b387c512e9
commit
af5b95b08d
2 changed files with 840 additions and 789 deletions
|
@ -31,6 +31,7 @@ import org.thdl.util.ThdlDebug;
|
|||
import org.thdl.util.ThdlLazyException;
|
||||
import org.thdl.util.Trie;
|
||||
import org.thdl.util.ThdlOptions;
|
||||
import org.thdl.tib.text.tshegbar.UnicodeCodepointToThdlWylie;
|
||||
|
||||
/**
|
||||
* Interfaces between Extended Wylie and the TibetanMachineWeb fonts.
|
||||
|
@ -388,18 +389,13 @@ public class TibetanMachineWeb implements THDLWylieConstants {
|
|||
|
||||
String wylie = null;
|
||||
DuffCode[] duffCodes;
|
||||
if (ignore) {
|
||||
duffCodes = new DuffCode[TMW + 1];
|
||||
} else {
|
||||
duffCodes = new DuffCode[11];
|
||||
}
|
||||
duffCodes = new DuffCode[11];
|
||||
|
||||
int k = 0;
|
||||
|
||||
StringBuffer escapedToken = new StringBuffer("");
|
||||
ThdlDebug.verify(escapedToken.length() == 0);
|
||||
while (st.hasMoreTokens()
|
||||
&& (!ignore || (k <= 3 /* 3 from 'case 3:' */))) {
|
||||
while (st.hasMoreTokens()) {
|
||||
String val = getEscapedToken(st, escapedToken);
|
||||
|
||||
if (val.equals(DELIMITER)
|
||||
|
@ -413,9 +409,7 @@ public class TibetanMachineWeb implements THDLWylieConstants {
|
|||
|
||||
switch (k) {
|
||||
case 0: //wylie key
|
||||
if (!ignore) {
|
||||
wylie = val;
|
||||
}
|
||||
wylie = val;
|
||||
break;
|
||||
|
||||
case 1:
|
||||
|
@ -454,23 +448,50 @@ public class TibetanMachineWeb implements THDLWylieConstants {
|
|||
case 8:
|
||||
case 9:
|
||||
if (!ignore) {
|
||||
try {
|
||||
duffCodes[k-1] = new DuffCode(val,true);
|
||||
} catch (Exception e) {
|
||||
System.err.println("Couldn't make a DuffCode out of " + val + "; line is " + line + "; k is " + k);
|
||||
}
|
||||
}
|
||||
break;
|
||||
break;
|
||||
|
||||
case 10: //Unicode: ignore for now
|
||||
StringTokenizer uTok = new StringTokenizer(val, ",");
|
||||
while (uTok.hasMoreTokens()) {
|
||||
String subval = uTok.nextToken();
|
||||
ThdlDebug.verify(subval.length() == 4);
|
||||
try {
|
||||
int x;
|
||||
ThdlDebug.verify(((x = Integer.parseInt(subval, 16)) >= 0x0F00
|
||||
&& x <= 0x0FFF)
|
||||
|| x == 0x0020);
|
||||
} catch (NumberFormatException e) {
|
||||
ThdlDebug.verify(false);
|
||||
if (!val.equals("none")) {
|
||||
StringBuffer unicodeBuffer = new StringBuffer();
|
||||
StringTokenizer uTok = new StringTokenizer(val, ",");
|
||||
while (uTok.hasMoreTokens()) {
|
||||
String subval = uTok.nextToken();
|
||||
ThdlDebug.verify(subval.length() == 4 || subval.length() == 3);
|
||||
try {
|
||||
int x;
|
||||
ThdlDebug.verify(((x = Integer.parseInt(subval, 16)) >= 0x0F00
|
||||
&& x <= 0x0FFF)
|
||||
|| x == 0x0020);
|
||||
unicodeBuffer.append((char)x);
|
||||
} catch (NumberFormatException e) {
|
||||
ThdlDebug.verify(false);
|
||||
}
|
||||
}
|
||||
// DLC FIXME: use unicodeBuffer for a TMW->Unicode conversion.
|
||||
|
||||
// For V&V:
|
||||
|
||||
// DLC FIXME: also check for ^[90-bc] and ^.+[40-6a]
|
||||
|
||||
// StringBuffer wylie_minus_plusses_buf
|
||||
// = UnicodeCodepointToThdlWylie.getThdlWylieForUnicodeString(unicodeBuffer.toString());
|
||||
// String wylie_minus_plusses
|
||||
// = ((wylie_minus_plusses_buf == null)
|
||||
// ? null
|
||||
// : wylie_minus_plusses_buf.toString().replaceAll("(.)\\+","$1"));
|
||||
// if (null == wylie
|
||||
// || null == wylie_minus_plusses
|
||||
// || !(wylie.replaceAll("(.)\\+","$1").equals(wylie_minus_plusses)
|
||||
// || wylie.replaceAll("(.)-","$1").equals(wylie_minus_plusses)
|
||||
// || wylie.equals(wylie_minus_plusses))) {
|
||||
// System.out.println("wylie: " + wylie + "; wylie_minus_plusses: " + wylie_minus_plusses);
|
||||
// }
|
||||
}
|
||||
break;
|
||||
|
||||
|
@ -486,9 +507,19 @@ public class TibetanMachineWeb implements THDLWylieConstants {
|
|||
binduMap.put(duffCodes[TMW],binduCode);
|
||||
}
|
||||
break;
|
||||
case 13:
|
||||
throw new Error("tibwn.ini has only 13 columns, you tried to use a 14th column.");
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (k == 10) {
|
||||
throw new Error("needed none or some unicode; line is " + line);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (k < 10) {
|
||||
throw new Error("needed none or some unicode; line is " + line);
|
||||
}
|
||||
|
||||
|
||||
if (!ignore) {
|
||||
if (null == wylie)
|
||||
|
|
File diff suppressed because it is too large
Load diff
Loading…
Reference in a new issue