ACIP->TMW and ACIP->Unicode now allow for Unicode escapes like K\u0F84. This means that the lack of support for ACIP's backslash, '\\', is mitigated because you can turn ACIP {K\} into ACIP {K\u0F84}.
Support for U+F021-U+F0FF, the PUA that the latest EWTS uses, is not provided.
This commit is contained in:
parent
946d8cbc72
commit
dfaae4be93
6 changed files with 845 additions and 16 deletions
|
@ -154,5 +154,6 @@ public class TMW_RTF_TO_THDL_WYLIETest extends TestCase {
|
|||
helper("Test1", "--to-tibetan-machine", "TM", 0);
|
||||
helper("Test2", "--to-tibetan-machine", "TM", 0);
|
||||
helper("Test2", "--to-tibetan-machine-web", "TMW", 0);
|
||||
helper("Test3", "--acip-to-tmw", "TMW", 0);
|
||||
}
|
||||
}
|
||||
|
|
386
source/org/thdl/tib/input/TMW_RTF_TO_THDL_WYLIETest3.rtf
Normal file
386
source/org/thdl/tib/input/TMW_RTF_TO_THDL_WYLIETest3.rtf
Normal file
|
@ -0,0 +1,386 @@
|
|||
[# \u0F40\u0F00: ]\u0F40\u0F00
|
||||
|
||||
[# \u0F40\u0F01: ]\u0F40\u0F01
|
||||
|
||||
[# \u0F40\u0F02: ]\u0F40\u0F02
|
||||
|
||||
[# \u0F40\u0F03: ]\u0F40\u0F03
|
||||
|
||||
[# \u0F40\u0F04: ]\u0F40\u0F04
|
||||
|
||||
[# \u0F40\u0F05: ]\u0F40\u0F05
|
||||
|
||||
[# \u0F40\u0F06: ]\u0F40\u0F06
|
||||
|
||||
[# \u0F40\u0F07: ]\u0F40\u0F07
|
||||
|
||||
[# \u0F40\u0F08: ]\u0F40\u0F08
|
||||
|
||||
[# \u0F40\u0F09: ]\u0F40\u0F09
|
||||
|
||||
[# \u0F40\u0F0A: ]\u0F40\u0F0A
|
||||
|
||||
[# \u0F40\u0F0B: ]\u0F40\u0F0B
|
||||
|
||||
[# \u0F40\u0F0C: ]\u0F40\u0F0C
|
||||
|
||||
[# \u0F40\u0F0D: ]\u0F40\u0F0D
|
||||
|
||||
[# \u0F40\u0F0E: ]\u0F40\u0F0E
|
||||
|
||||
[# \u0F40\u0F0F: ]\u0F40\u0F0F
|
||||
|
||||
[# \u0F40\u0F10: ]\u0F40\u0F10
|
||||
|
||||
[# \u0F40\u0F11: ]\u0F40\u0F11
|
||||
|
||||
[# \u0F40\u0F12: ]\u0F40\u0F12
|
||||
|
||||
[# \u0F40\u0F13: ]\u0F40\u0F13
|
||||
|
||||
[# \u0F40\u0F14: ]\u0F40\u0F14
|
||||
|
||||
[# \u0F40\u0F15: ]\u0F40\u0F15
|
||||
|
||||
[# \u0F40\u0F16: ]\u0F40\u0F16
|
||||
|
||||
[# \u0F40\u0F17: ]\u0F40\u0F17
|
||||
|
||||
[# \u0F40\u0F18: ]\u0F40\u0F18
|
||||
|
||||
[# \u0F40\u0F19: ]\u0F40\u0F19
|
||||
|
||||
[# \u0F40\u0F1A: ]\u0F40\u0F1A
|
||||
|
||||
[# \u0F40\u0F1B: ]\u0F40\u0F1B
|
||||
|
||||
[# \u0F40\u0F1C: ]\u0F40\u0F1C
|
||||
|
||||
[# \u0F40\u0F1D: ]\u0F40\u0F1D
|
||||
|
||||
[# \u0F40\u0F1E: ]\u0F40\u0F1E
|
||||
|
||||
[# \u0F40\u0F1F: ]\u0F40\u0F1F
|
||||
|
||||
[# \u0F40\u0F20: ]\u0F40\u0F20
|
||||
|
||||
[# \u0F40\u0F21: ]\u0F40\u0F21
|
||||
|
||||
[# \u0F40\u0F22: ]\u0F40\u0F22
|
||||
|
||||
[# \u0F40\u0F23: ]\u0F40\u0F23
|
||||
|
||||
[# \u0F40\u0F24: ]\u0F40\u0F24
|
||||
|
||||
[# \u0F40\u0F25: ]\u0F40\u0F25
|
||||
|
||||
[# \u0F40\u0F26: ]\u0F40\u0F26
|
||||
|
||||
[# \u0F40\u0F27: ]\u0F40\u0F27
|
||||
|
||||
[# \u0F40\u0F28: ]\u0F40\u0F28
|
||||
|
||||
[# \u0F40\u0F29: ]\u0F40\u0F29
|
||||
|
||||
[# \u0F40\u0F2A: ]\u0F40\u0F2A
|
||||
|
||||
[# \u0F40\u0F2B: ]\u0F40\u0F2B
|
||||
|
||||
[# \u0F40\u0F2C: ]\u0F40\u0F2C
|
||||
|
||||
[# \u0F40\u0F2D: ]\u0F40\u0F2D
|
||||
|
||||
[# \u0F40\u0F2E: ]\u0F40\u0F2E
|
||||
|
||||
[# \u0F40\u0F2F: ]\u0F40\u0F2F
|
||||
|
||||
[# \u0F40\u0F30: ]\u0F40\u0F30
|
||||
|
||||
[# \u0F40\u0F31: ]\u0F40\u0F31
|
||||
|
||||
[# \u0F40\u0F32: ]\u0F40\u0F32
|
||||
|
||||
[# \u0F40\u0F33: ]\u0F40\u0F33
|
||||
|
||||
[# \u0F40\u0F34: ]\u0F40\u0F34
|
||||
|
||||
[# \u0F40\u0F35: ]\u0F40\u0F35
|
||||
|
||||
[# \u0F40\u0F36: ]\u0F40\u0F36
|
||||
|
||||
[# \u0F40\u0F37: ]\u0F40\u0F37
|
||||
|
||||
[# \u0F40\u0F38: ]\u0F40\u0F38
|
||||
|
||||
[# \u0F40\u0F39: ]\u0F40\u0F39
|
||||
|
||||
[# \u0F40\u0F3A: ]\u0F40\u0F3A
|
||||
|
||||
[# \u0F40\u0F3B: ]\u0F40\u0F3B
|
||||
|
||||
[# \u0F40\u0F3C: ]\u0F40\u0F3C
|
||||
|
||||
[# \u0F40\u0F3D: ]\u0F40\u0F3D
|
||||
|
||||
[# \u0F40\u0F3E: ]\u0F40\u0F3E
|
||||
|
||||
[# \u0F40\u0F3F: ]\u0F40\u0F3F
|
||||
|
||||
[# \u0F40\u0F40: ]\u0F40\u0F40
|
||||
|
||||
[# \u0F40\u0F41: ]\u0F40\u0F41
|
||||
|
||||
[# \u0F40\u0F42: ]\u0F40\u0F42
|
||||
|
||||
[# \u0F40\u0F43: ]\u0F40\u0F43
|
||||
|
||||
[# \u0F40\u0F44: ]\u0F40\u0F44
|
||||
|
||||
[# \u0F40\u0F45: ]\u0F40\u0F45
|
||||
|
||||
[# \u0F40\u0F46: ]\u0F40\u0F46
|
||||
|
||||
[# \u0F40\u0F47: ]\u0F40\u0F47
|
||||
|
||||
[# \u0F40\u0F49: ]\u0F40\u0F49
|
||||
|
||||
[# \u0F40\u0F4A: ]\u0F40\u0F4A
|
||||
|
||||
[# \u0F40\u0F4B: ]\u0F40\u0F4B
|
||||
|
||||
[# \u0F40\u0F4C: ]\u0F40\u0F4C
|
||||
|
||||
[# \u0F40\u0F4D: ]\u0F40\u0F4D
|
||||
|
||||
[# \u0F40\u0F4E: ]\u0F40\u0F4E
|
||||
|
||||
[# \u0F40\u0F4F: ]\u0F40\u0F4F
|
||||
|
||||
[# \u0F40\u0F50: ]\u0F40\u0F50
|
||||
|
||||
[# \u0F40\u0F51: ]\u0F40\u0F51
|
||||
|
||||
[# \u0F40\u0F52: ]\u0F40\u0F52
|
||||
|
||||
[# \u0F40\u0F53: ]\u0F40\u0F53
|
||||
|
||||
[# \u0F40\u0F54: ]\u0F40\u0F54
|
||||
|
||||
[# \u0F40\u0F55: ]\u0F40\u0F55
|
||||
|
||||
[# \u0F40\u0F56: ]\u0F40\u0F56
|
||||
|
||||
[# \u0F40\u0F57: ]\u0F40\u0F57
|
||||
|
||||
[# \u0F40\u0F58: ]\u0F40\u0F58
|
||||
|
||||
[# \u0F40\u0F59: ]\u0F40\u0F59
|
||||
|
||||
[# \u0F40\u0F5A: ]\u0F40\u0F5A
|
||||
|
||||
[# \u0F40\u0F5B: ]\u0F40\u0F5B
|
||||
|
||||
[# \u0F40\u0F5C: ]\u0F40\u0F5C
|
||||
|
||||
[# \u0F40\u0F5D: ]\u0F40\u0F5D
|
||||
|
||||
[# \u0F40\u0F5E: ]\u0F40\u0F5E
|
||||
|
||||
[# \u0F40\u0F5F: ]\u0F40\u0F5F
|
||||
|
||||
[# \u0F40\u0F60: ]\u0F40\u0F60
|
||||
|
||||
[# \u0F40\u0F61: ]\u0F40\u0F61
|
||||
|
||||
[# \u0F40\u0F62: ]\u0F40\u0F62
|
||||
|
||||
[# \u0F40\u0F63: ]\u0F40\u0F63
|
||||
|
||||
[# \u0F40\u0F64: ]\u0F40\u0F64
|
||||
|
||||
[# \u0F40\u0F65: ]\u0F40\u0F65
|
||||
|
||||
[# \u0F40\u0F66: ]\u0F40\u0F66
|
||||
|
||||
[# \u0F40\u0F67: ]\u0F40\u0F67
|
||||
|
||||
[# \u0F40\u0F68: ]\u0F40\u0F68
|
||||
|
||||
[# \u0F40\u0F69: ]\u0F40\u0F69
|
||||
|
||||
[# \u0F40\u0F6A: ]\u0F40\u0F6A
|
||||
|
||||
[# \u0F40\u0F71: ]\u0F40\u0F71
|
||||
|
||||
[# \u0F40\u0F72: ]\u0F40\u0F72
|
||||
|
||||
[# \u0F40\u0F73: ]\u0F40\u0F73
|
||||
|
||||
[# \u0F40\u0F74: ]\u0F40\u0F74
|
||||
|
||||
[# \u0F40\u0F75: ]\u0F40\u0F75
|
||||
|
||||
[# \u0F40\u0F76: ]\u0F40\u0F76
|
||||
|
||||
[# \u0F40\u0F77: ]\u0F40\u0F77
|
||||
|
||||
[# \u0F40\u0F78: ]\u0F40\u0F78
|
||||
|
||||
[# \u0F40\u0F79: ]\u0F40\u0F79
|
||||
|
||||
[# \u0F40\u0F7A: ]\u0F40\u0F7A
|
||||
|
||||
[# \u0F40\u0F7B: ]\u0F40\u0F7B
|
||||
|
||||
[# \u0F40\u0F7C: ]\u0F40\u0F7C
|
||||
|
||||
[# \u0F40\u0F7D: ]\u0F40\u0F7D
|
||||
|
||||
[# \u0F40\u0F7E: ]\u0F40\u0F7E
|
||||
|
||||
[# \u0F40\u0F7F: ]\u0F40\u0F7F
|
||||
|
||||
[# \u0F40\u0F80: ]\u0F40\u0F80
|
||||
|
||||
[# \u0F40\u0F81: ]\u0F40\u0F81
|
||||
|
||||
[# \u0F40\u0F82: ]\u0F40\u0F82
|
||||
|
||||
[# \u0F40\u0F83: ]\u0F40\u0F83
|
||||
|
||||
[# \u0F40\u0F84: ]\u0F40\u0F84
|
||||
|
||||
[# \u0F40\u0F85: ]\u0F40\u0F85
|
||||
|
||||
[# \u0F40\u0F86: ]\u0F40\u0F86
|
||||
|
||||
[# \u0F40\u0F87: ]\u0F40\u0F87
|
||||
|
||||
[# \u0F40\u0F88: ]\u0F40\u0F88
|
||||
|
||||
[# \u0F40\u0F89: ]\u0F40\u0F89
|
||||
|
||||
[# \u0F40\u0F8A: ]\u0F40\u0F8A
|
||||
|
||||
[# \u0F40\u0F8B: ]\u0F40\u0F8B
|
||||
|
||||
[# \u0F40\u0F90: ]\u0F40\u0F90
|
||||
|
||||
[# \u0F40\u0F91: ]\u0F40\u0F91
|
||||
|
||||
[# \u0F40\u0F92: ]\u0F40\u0F92
|
||||
|
||||
[# \u0F40\u0F93: ]\u0F40\u0F93
|
||||
|
||||
[# \u0F40\u0F94: ]\u0F40\u0F94
|
||||
|
||||
[# \u0F40\u0F95: ]\u0F40\u0F95
|
||||
|
||||
[# \u0F40\u0F96: ]\u0F40\u0F96
|
||||
|
||||
[# \u0F40\u0F97: ]\u0F40\u0F97
|
||||
|
||||
[# \u0F40\u0F99: ]\u0F40\u0F99
|
||||
|
||||
[# \u0F40\u0F9A: ]\u0F40\u0F9A
|
||||
|
||||
[# \u0F40\u0F9B: ]\u0F40\u0F9B
|
||||
|
||||
[# \u0F40\u0F9C: ]\u0F40\u0F9C
|
||||
|
||||
[# \u0F40\u0F9D: ]\u0F40\u0F9D
|
||||
|
||||
[# \u0F40\u0F9E: ]\u0F40\u0F9E
|
||||
|
||||
[# \u0F40\u0F9F: ]\u0F40\u0F9F
|
||||
|
||||
[# \u0F40\u0FA0: ]\u0F40\u0FA0
|
||||
|
||||
[# \u0F40\u0FA1: ]\u0F40\u0FA1
|
||||
|
||||
[# \u0F40\u0FA2: ]\u0F40\u0FA2
|
||||
|
||||
[# \u0F40\u0FA3: ]\u0F40\u0FA3
|
||||
|
||||
[# \u0F40\u0FA4: ]\u0F40\u0FA4
|
||||
|
||||
[# \u0F40\u0FA5: ]\u0F40\u0FA5
|
||||
|
||||
[# \u0F40\u0FA6: ]\u0F40\u0FA6
|
||||
|
||||
[# \u0F40\u0FA7: ]\u0F40\u0FA7
|
||||
|
||||
[# \u0F40\u0FA8: ]\u0F40\u0FA8
|
||||
|
||||
[# \u0F40\u0FA9: ]\u0F40\u0FA9
|
||||
|
||||
[# \u0F40\u0FAA: ]\u0F40\u0FAA
|
||||
|
||||
[# \u0F40\u0FAB: ]\u0F40\u0FAB
|
||||
|
||||
[# \u0F40\u0FAC: ]\u0F40\u0FAC
|
||||
|
||||
[# \u0F40\u0FAD: ]\u0F40\u0FAD
|
||||
|
||||
[# \u0F40\u0FAE: ]\u0F40\u0FAE
|
||||
|
||||
[# \u0F40\u0FAF: ]\u0F40\u0FAF
|
||||
|
||||
[# \u0F40\u0FB0: ]\u0F40\u0FB0
|
||||
|
||||
[# \u0F40\u0FB1: ]\u0F40\u0FB1
|
||||
|
||||
[# \u0F40\u0FB2: ]\u0F40\u0FB2
|
||||
|
||||
[# \u0F40\u0FB3: ]\u0F40\u0FB3
|
||||
|
||||
[# \u0F40\u0FB4: ]\u0F40\u0FB4
|
||||
|
||||
[# \u0F40\u0FB5: ]\u0F40\u0FB5
|
||||
|
||||
[# \u0F40\u0FB6: ]\u0F40\u0FB6
|
||||
|
||||
[# \u0F40\u0FB7: ]\u0F40\u0FB7
|
||||
|
||||
[# \u0F40\u0FB8: ]\u0F40\u0FB8
|
||||
|
||||
[# \u0F40\u0FB9: ]\u0F40\u0FB9
|
||||
|
||||
[# \u0F40\u0FBA: ]\u0F40\u0FBA
|
||||
|
||||
[# \u0F40\u0FBB: ]\u0F40\u0FBB
|
||||
|
||||
[# \u0F40\u0FBC: ]\u0F40\u0FBC
|
||||
|
||||
[# \u0F40\u0FBE: ]\u0F40\u0FBE
|
||||
|
||||
[# \u0F40\u0FBF: ]\u0F40\u0FBF
|
||||
|
||||
[# \u0F40\u0FC0: ]\u0F40\u0FC0
|
||||
|
||||
[# \u0F40\u0FC1: ]\u0F40\u0FC1
|
||||
|
||||
[# \u0F40\u0FC2: ]\u0F40\u0FC2
|
||||
|
||||
[# \u0F40\u0FC3: ]\u0F40\u0FC3
|
||||
|
||||
[# \u0F40\u0FC4: ]\u0F40\u0FC4
|
||||
|
||||
[# \u0F40\u0FC5: ]\u0F40\u0FC5
|
||||
|
||||
[# \u0F40\u0FC6: ]\u0F40\u0FC6
|
||||
|
||||
[# \u0F40\u0FC7: ]\u0F40\u0FC7
|
||||
|
||||
[# \u0F40\u0FC8: ]\u0F40\u0FC8
|
||||
|
||||
[# \u0F40\u0FC9: ]\u0F40\u0FC9
|
||||
|
||||
[# \u0F40\u0FCA: ]\u0F40\u0FCA
|
||||
|
||||
[# \u0F40\u0FCB: ]\u0F40\u0FCB
|
||||
|
||||
[# \u0F40\u0FCC: ]\u0F40\u0FCC
|
||||
|
||||
[# \u0F40\u0FCF: ]\u0F40\u0FCF
|
||||
|
|
@ -0,0 +1,391 @@
|
|||
{\rtf1\ansi
|
||||
{\fonttbl\f0\fnil Monospaced;\f1\fnil Times New Roman;\f2\fnil TibetanMachineWeb;\f3\fnil TibetanMachineWeb7;\f4\fnil TibetanMachineWeb8;\f5\fnil TibetanMachineWeb1;\f6\fnil TibetanMachineWeb9;\f7\fnil TibetanMachineWeb2;\f8\fnil TibetanMachineWeb3;\f9\fnil TibetanMachineWeb4;\f10\fnil TibetanMachineWeb5;}
|
||||
|
||||
\f1\fs36 [# \\u0F40\\u0F00: ]\f2\fs72\\?\f3 f\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F01: ]\f2\fs72\\\f3 I\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F02: ]\f2\fs72\\8v\f3 _\\\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F03: ]\f2\fs72\\8v\f3 _\f2 i\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F04: ]\f2\fs72\\\f4 &\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F05: ]\f2\fs72\\\f4 '\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F06: ]\f2\fs72\\\f4 )\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F07: ]\f2\fs72\\\f4 *\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F08: ]\f2\fs72\\h\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F09: ]\f2\fs72\\\f4 $\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F0A: ]\f2\fs72\\\f4 !\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F0B: ]\f2\fs72\\\f5 -\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F0C: ]\f2\fs72\\l\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F0D: ]\f2\fs72\\k\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F0E: ]\f2\fs72\\kk\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F0F: ]\f2\fs72\\\f4 +\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F10: ]\f2\fs72\\\f4 /\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F11: ]\f2\fs72\\g\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F12: ]\f2\fs72\\\f4 3\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F13: ]\f2\fs72\\\f4\\\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F14: ]\f2\fs72\\i\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F15: ]\f2\fs72\\\f6 "\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F16: ]\f2\fs72\\\f6 #\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F17: ]\f2\fs72\\\f6 $\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F18: ]\f2\fs72\\\f6 %\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F19: ]\f2\fs72\\\f6 &\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F1A: ]\f2\fs72\\\f4 s\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F1B: ]\f2\fs72\\\f4 u\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F1C: ]\f2\fs72\\\f4 x\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F1D: ]\f2\fs72\\\f4 t\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F1E: ]\f2\fs72\\\f4 v\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F1F: ]\f2\fs72\\\f4 w\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F20: ]\f2\fs72\\\f6 0\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F21: ]\f2\fs72\\\f6 1\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F22: ]\f2\fs72\\\f6 2\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F23: ]\f2\fs72\\\f6 3\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F24: ]\f2\fs72\\\f6 4\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F25: ]\f2\fs72\\\f6 5\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F26: ]\f2\fs72\\\f6 6\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F27: ]\f2\fs72\\\f6 7\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F28: ]\f2\fs72\\\f6 8\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F29: ]\f2\fs72\\\f6 9\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F2A: ]\f2\fs72\\\f4 J\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F2B: ]\f2\fs72\\\f4 K\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F2C: ]\f2\fs72\\\f4 L\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F2D: ]\f2\fs72\\\f4 M\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F2E: ]\f2\fs72\\\f4 N\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F2F: ]\f2\fs72\\\f4 O\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F30: ]\f2\fs72\\\f4 P\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F31: ]\f2\fs72\\\f4 Q\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F32: ]\f2\fs72\\\f4 R\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F33: ]\f2\fs72\\\f4 I\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F34: ]\f2\fs72\\j\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F35: ]\f2\fs72\\\f4 f\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F36: ]\f2\fs72\\\f4 j\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F37: ]\f2\fs72\\\f4 e\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F38: ]\f2\fs72\\\f4 g\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F39: ]\f2\fs72\\\f3 C\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F3A: ]\f2\fs72\\\f4 _\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F3B: ]\f2\fs72\\\f4 `\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F3C: ]\f2\fs72\\\f4 ]\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F3D: ]\f2\fs72\\\f4 ^\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F3E: ]\f2\fs72\\\f6 (\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F3F: ]\f2\fs72\\\f6 '\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F40: ]\f2\fs72\\\\\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F41: ]\f2\fs72\\"\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F42: ]\f2\fs72\\]\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F43: ]\f2\fs72\\\f7 O\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F44: ]\f2\fs72\\$\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F45: ]\f2\fs72\\%\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F46: ]\f2\fs72\\&\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F47: ]\f2\fs72\\'\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F49: ]\f2\fs72\\^\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F4A: ]\f2\fs72\\@\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F4B: ]\f2\fs72\\A\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F4C: ]\f2\fs72\\B\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F4D: ]\f2\fs72\\\f8 Z\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F4E: ]\f2\fs72\\C\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F4F: ]\f2\fs72\\_\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F50: ]\f2\fs72\\*\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F51: ]\f2\fs72\\`\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F52: ]\f2\fs72\\\f9 a\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F53: ]\f2\fs72\\a\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F54: ]\f2\fs72\\.\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F55: ]\f2\fs72\\/\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F56: ]\f2\fs72\\0\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F57: ]\f2\fs72\\\f10 L\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F58: ]\f2\fs72\\1\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F59: ]\f2\fs72\\2\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F5A: ]\f2\fs72\\3\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F5B: ]\f2\fs72\\4\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F5C: ]\f2\fs72\\\f8 0\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F5D: ]\f2\fs72\\5\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F5E: ]\f2\fs72\\b\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F5F: ]\f2\fs72\\7\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F60: ]\f2\fs72\\8\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F61: ]\f2\fs72\\9\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F62: ]\f2\fs72\\\f3 B\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F63: ]\f2\fs72\\;\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F64: ]\f2\fs72\\c\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F65: ]\f2\fs72\\D\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F66: ]\f2\fs72\\=\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F67: ]\f2\fs72\\d\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F68: ]\f2\fs72\\?\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F69: ]\f2\fs72\\E\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F6A: ]\f2\fs72\\:\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F71: ]\f2\fs72\\\f6 w\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F72: ]\f2\fs72\\n\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F73: ]\f2\fs72\\\f8 j\f2 m\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F74: ]\f2\fs72\\\f3 ~\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F75: ]\f2\fs72\\\f6 ~\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F76: ]\f2\fs72\\\f3 GW\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F77: ]\f2\fs72\\\f3 G\f8 j\f3 W\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F78: ]\f2\fs72\\\f6 i\f3 W\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F79: ]\f2\fs72\\\f6 i\f8 j\f3 W\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F7A: ]\f2\fs72\\|\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F7B: ]\f2\fs72\\\f3 X\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F7C: ]\f2\fs72\\~\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F7D: ]\f2\fs72\\\f3 Y\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F7E: ]\f2\fs72\\\f3 [\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F7F: ]\f2\fs72\\\f3\\\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F80: ]\f2\fs72\\\f3 W\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F81: ]\f2\fs72\\\f8 j\f3 W\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F82: ]\f2\fs72\\\f4 Y\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F83: ]\f2\fs72\\\f3 ^\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F84: ]\f2\fs72\\\f3 i\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F85: ]\f2\fs72\\\f3 ]\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F86: ]\f2\fs72\\\f4 l\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F87: ]\f2\fs72\\\f4 k\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F88: ]\f2\fs72\\\f3 H\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F89: ]\f2\fs72\\\f4 m\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F8A: ]\f2\fs72\\\f4 n\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F8B: ]\f2\fs72\\\f4 o\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F90: ]\f2\fs72\\\f6 G\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F91: ]\f2\fs72\\\f6 H\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F92: ]\f2\fs72\\\f6 I\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F93: ]\f2\fs72\\\f6 J\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F94: ]\f2\fs72\\\f6 K\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F95: ]\f2\fs72\\\f6 L\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F96: ]\f2\fs72\\\f6 M\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F97: ]\f2\fs72\\\f6 N\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F99: ]\f2\fs72\\\f6 O\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F9A: ]\f2\fs72\\\f6 P\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F9B: ]\f2\fs72\\\f6 Q\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F9C: ]\f2\fs72\\\f6 R\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F9D: ]\f2\fs72\\\f6 S\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F9E: ]\f2\fs72\\\f6 T\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0F9F: ]\f2\fs72\\\f6 U\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0FA0: ]\f2\fs72\\\f6 V\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0FA1: ]\f2\fs72\\\f6 W\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0FA2: ]\f2\fs72\\\f6 X\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0FA3: ]\f2\fs72\\\f6 Y\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0FA4: ]\f2\fs72\\\f6 Z\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0FA5: ]\f2\fs72\\\f6 [\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0FA6: ]\f2\fs72\\\f6\\\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0FA7: ]\f2\fs72\\\f6 ]\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0FA8: ]\f2\fs72\\\f6 ^\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0FA9: ]\f2\fs72\\\f6 _\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0FAA: ]\f2\fs72\\\f6 `\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0FAB: ]\f2\fs72\\\f6 a\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0FAC: ]\f2\fs72\\\f6 b\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0FAD: ]\f2\fs72\\\f3 E\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0FAE: ]\f2\fs72\\\f6 d\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0FAF: ]\f2\fs72\\\f6 e\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0FB0: ]\f2\fs72\\\f6 f\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0FB1: ]\f2\fs72\\\f3 F\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0FB2: ]\f2\fs72\\\f3 G\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0FB3: ]\f2\fs72\\\f6 i\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0FB4: ]\f2\fs72\\\f6 j\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0FB5: ]\f2\fs72\\\f6 k\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0FB6: ]\f2\fs72\\\f6 l\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0FB7: ]\f2\fs72\\\f6 m\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0FB8: ]\f2\fs72\\\f6 n\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0FB9: ]\f2\fs72\\\f6 o\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0FBA: ]\f2\fs72\\\f6 c\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0FBB: ]\f2\fs72\\\f6 g\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0FBC: ]\f2\fs72\\\f6 h\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0FBE: ]\f2\fs72\\\f4 h\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0FBF: ]\f2\fs72\\\f4 i\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0FC0: ]\f2\fs72\\\f4 S\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0FC1: ]\f2\fs72\\\f4 T\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0FC2: ]\f2\fs72\\\f4 U\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0FC3: ]\f2\fs72\\\f4 V\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0FC4: ]\f2\fs72\\\f6 >\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0FC5: ]\f2\fs72\\\f6 ?\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0FC6: ]\f2\fs72\\\f6 @\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0FC7: ]\f2\fs72\\\f6 A\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0FC8: ]\f2\fs72\\\f6 B\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0FC9: ]\f2\fs72\\\f6 C\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0FCA: ]\f2\fs72\\\f6 D\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0FCB: ]\f2\fs72\\\f6 E\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0FCC: ]\f2\fs72\\\f6 F\f1\fs36\par
|
||||
\par
|
||||
[# \\u0F40\\u0FCF: ]\f2\fs72\\\f4 y\f1\fs36\par
|
||||
\par
|
||||
\fs24\par
|
||||
}
|
|
@ -266,6 +266,8 @@ public class ACIPConverter {
|
|||
throws IOException
|
||||
{
|
||||
try {
|
||||
if (null != tdoc && (toUnicode && !toRTF))
|
||||
throw new Error("Doing both at once might work, but it's not been tested. I bet some 'continue;' statements will need to go.");
|
||||
if (toUnicode && toRTF)
|
||||
throw new Error("FIXME: support this ACIP->Unicode.rtf mode so that KA (GA) shows up in two different font sizes. See RFE 838591.");
|
||||
if (!toUnicode && !toRTF)
|
||||
|
@ -363,7 +365,7 @@ public class ACIPConverter {
|
|||
warnings.append('\n');
|
||||
}
|
||||
} else {
|
||||
if (s.isLatin(stype)) {
|
||||
if (s.isLatin()) {
|
||||
lastGuyWasNonPunct = false;
|
||||
lastGuy = null;
|
||||
String text
|
||||
|
@ -576,7 +578,7 @@ public class ACIPConverter {
|
|||
tdoc.appendRoman(tdocLocation[0], s.getText(),
|
||||
Color.BLACK);
|
||||
tdocLocation[0] += s.getText().length();
|
||||
continue;
|
||||
continue; // FIXME: this means the unicode above doesn't go into the output if null != writer && null != tdoc?
|
||||
} else {
|
||||
String wy = ACIPRules.getWylieForACIPOther(s.getText());
|
||||
if (null == wy) throw new Error("No wylie for ACIP " + s.getText());
|
||||
|
@ -594,6 +596,24 @@ public class ACIPConverter {
|
|||
tdoc.setTibetanFontSize(regularFontSize);
|
||||
}
|
||||
continue;
|
||||
} else if (stype == TString.UNICODE_CHARACTER) {
|
||||
if (null != writer) {
|
||||
unicode = s.getText();
|
||||
}
|
||||
if (null != tdoc) {
|
||||
duff = TibetanMachineWeb.mapUnicodeToTMW(s.getText().charAt(0));
|
||||
if (null == duff) {
|
||||
hasErrors = true;
|
||||
String errorMessage = "[#ERROR CONVERTING ACIP DOCUMENT: The Unicode escape with ordinal " + (int)s.getText().charAt(0) + " does not match up with any TibetanMachineWeb glyph.]";
|
||||
tdoc.appendRoman(tdocLocation[0],
|
||||
errorMessage,
|
||||
Color.RED);
|
||||
tdocLocation[0] += errorMessage.length();
|
||||
if (null != errors)
|
||||
errors.append(errorMessage + "\n");
|
||||
continue; // FIXME: if null != writer, we dropped some output.
|
||||
}
|
||||
}
|
||||
} else {
|
||||
throw new Error("forgot a case");
|
||||
}
|
||||
|
|
|
@ -23,6 +23,7 @@ import java.util.ArrayList;
|
|||
import java.util.Stack;
|
||||
|
||||
import org.thdl.util.ThdlDebug;
|
||||
import org.thdl.util.ThdlOptions;
|
||||
|
||||
/**
|
||||
* This class is able to break up Strings of ACIP text (for example, an
|
||||
|
@ -903,11 +904,31 @@ public class ACIPTshegBarScanner {
|
|||
errors.append("Offset " + i + ((numNewlines == 0) ? "" : (" or maybe " + (i-numNewlines))) + ": "
|
||||
+ "Found an illegal, unprintable character.\n");
|
||||
} else if ('\\' == ch) {
|
||||
int x = -1;
|
||||
if (!ThdlOptions.getBooleanOption("thdl.tib.text.disallow.unicode.character.escapes.in.acip")
|
||||
&& i + 5 < sl && 'u' == s.charAt(i+1)) {
|
||||
try {
|
||||
if (!((x = Integer.parseInt(s.substring(i+2, i+6), 16)) >= 0x0000 && x <= 0xFFFF))
|
||||
x = -1;
|
||||
} catch (NumberFormatException e) {
|
||||
// Though this is unlikely to be
|
||||
// legal, we allow it through.
|
||||
// (FIXME: warn.)
|
||||
}
|
||||
}
|
||||
if (x >= 0) {
|
||||
al.add(new TString(new String(new char[] { (char)x }),
|
||||
TString.UNICODE_CHARACTER));
|
||||
i += "uXXXX".length();
|
||||
startOfString = i+1;
|
||||
break;
|
||||
} else {
|
||||
al.add(new TString("Found a Sanskrit virama, \\, but the converter currently doesn't treat these properly. Sorry! Please do complain to the maintainers.",
|
||||
TString.ERROR));
|
||||
if (null != errors)
|
||||
errors.append("Offset " + i + ((numNewlines == 0) ? "" : (" or maybe " + (i-numNewlines))) + ": "
|
||||
+ "Found a Sanskrit virama, \\, but the converter currently doesn't treat these properly. Sorry! Please do complain to the maintainers.\n");
|
||||
}
|
||||
} else {
|
||||
al.add(new TString("Found an illegal character, " + ch + ", with ordinal " + (int)ch + ".",
|
||||
TString.ERROR));
|
||||
|
|
|
@ -19,6 +19,8 @@ Contributor(s): ______________________________________.
|
|||
package org.thdl.tib.text.ttt;
|
||||
|
||||
import org.thdl.util.ThdlOptions;
|
||||
import org.thdl.util.ThdlDebug;
|
||||
import org.thdl.tib.text.tshegbar.UnicodeUtils;
|
||||
|
||||
import java.util.HashSet;
|
||||
import java.io.*;
|
||||
|
@ -35,16 +37,19 @@ public class TString {
|
|||
private int type;
|
||||
private String text;
|
||||
|
||||
/** Returns true if and only if an TString with type type is to
|
||||
* be converted to Latin, not Tibetan, text. */
|
||||
public static boolean isLatin(int type) {
|
||||
/** Returns true if and only if an TString with type <i>type</i>
|
||||
* is to be converted to something other than Tibetan text.
|
||||
* (Chinese Unicode, Latin, etc. all qualify as non-Tibetan.) */
|
||||
public boolean isLatin() {
|
||||
return (type != TIBETAN_NON_PUNCTUATION
|
||||
&& type != TIBETAN_PUNCTUATION
|
||||
&& type != TSHEG_BAR_ADORNMENT
|
||||
&& type != START_PAREN
|
||||
&& type != END_PAREN
|
||||
&& type != START_SLASH
|
||||
&& type != END_SLASH);
|
||||
&& type != END_SLASH
|
||||
&& (type != UNICODE_CHARACTER
|
||||
|| !UnicodeUtils.isInTibetanRange(getText().charAt(0))));
|
||||
}
|
||||
|
||||
/** For ACIP [#COMMENTS] and EWTS (DLC FIXME: what are EWTS comments?) */
|
||||
|
@ -87,13 +92,15 @@ public class TString {
|
|||
public static final int WARNING = 17;
|
||||
/** For ACIP %, o, and x or EWTS (DLC FIXME: what are EWTS adornments?) */
|
||||
public static final int TSHEG_BAR_ADORNMENT = 18;
|
||||
/** For "\\uMNOP", this TString will contain the string that has
|
||||
just the sole character "\\uMNOP". */
|
||||
public static final int UNICODE_CHARACTER = 19;
|
||||
/** For things that are not legal syntax, such as a file that
|
||||
* contains just "[# HALF A COMMEN" */
|
||||
public static final int ERROR = 19;
|
||||
* contains just "[# HALF A COMMEN". THIS MUST COME LAST. */
|
||||
public static final int ERROR = 20;
|
||||
|
||||
/** Returns true if and only if this string is Latin (usually
|
||||
* English). Returns false if this string is transliteration of
|
||||
* Tibetan. */
|
||||
/** Returns the type of this string, which is one of the
|
||||
enumerated integer static final members of this class. */
|
||||
public int getType() {
|
||||
return type;
|
||||
}
|
||||
|
@ -126,6 +133,8 @@ public class TString {
|
|||
String ftext = (TIBETAN_NON_PUNCTUATION == type)
|
||||
? MidLexSubstitution.getFinalValueForTibetanNonPunctuationToken(text)
|
||||
: text;
|
||||
// FIXME: assert this
|
||||
ThdlDebug.verify(type != UNICODE_CHARACTER || text.length() == 1);
|
||||
setText(ftext);
|
||||
if ((outputAllTshegBars || outputUniqueTshegBars) && TIBETAN_NON_PUNCTUATION == type)
|
||||
outputTshegBar(ftext);
|
||||
|
@ -182,6 +191,7 @@ public class TString {
|
|||
if (type == END_PAREN) typeString = "END_PAREN";
|
||||
if (type == WARNING) typeString = "WARNING";
|
||||
if (type == TSHEG_BAR_ADORNMENT) typeString = "TSHEG_BAR_ADORNMENT";
|
||||
if (type == UNICODE_CHARACTER) typeString = "UNICODE_CHARACTER";
|
||||
if (type == ERROR) typeString = "ERROR";
|
||||
return typeString + ":{" + getText() + "}";
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue