I've added a command-line converter,
org.thdl.tib.input.TMW_RTF_TO_THDL_WYLIE. It converts RTF files consisting of TMW characters to the corresponding THDL Extended Wylie. It supports --find-some-non-tmw mode, which allows you to ensure that no unusual characters will spoil the conversion. The converter has built-in intelligence that allows it to handle Tahoma '{', '}', and '\\' characters properly. The converter works on mixed Roman/TMW also, but --find-some-non-tmw and --find-all-non-tmw modes are not as useful. Invoke org.thdl.tib.input.TMW_RTF_TO_THDL_WYLIE, which resides in Jskad's jar, with no command-line options to see usage information.
This commit is contained in:
parent
17ea8fdf2a
commit
e2a9720d9b
7 changed files with 345 additions and 17 deletions
|
@ -282,8 +282,10 @@ public class TibetanMachineWeb implements THDLWylieConstants {
|
|||
InputStreamReader isr = new InputStreamReader(url.openStream());
|
||||
BufferedReader in = new BufferedReader(isr);
|
||||
|
||||
System.out.println("Reading Tibetan Machine Web code table "
|
||||
+ fileName);
|
||||
if (ThdlOptions.getBooleanOption("thdl.verbose")) {
|
||||
System.out.println("Reading Tibetan Machine Web code table "
|
||||
+ fileName);
|
||||
}
|
||||
String line;
|
||||
boolean hashOn = false;
|
||||
boolean isSanskrit = false; //FIXME: this is never read.
|
||||
|
@ -419,6 +421,7 @@ public class TibetanMachineWeb implements THDLWylieConstants {
|
|||
}
|
||||
catch (IOException e) {
|
||||
System.out.println("file Disappeared");
|
||||
ThdlDebug.noteIffyCode();
|
||||
}
|
||||
|
||||
hasReadData = true;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue