diff --git a/source/options.txt b/source/options.txt index 9ad5cf4..95b3478 100644 --- a/source/options.txt +++ b/source/options.txt @@ -69,7 +69,8 @@ thdl.use.temp.file.directory.for.log = false # Set this to the directory in which you want the debugging output, # the .log files, placed. You cannot set this to true AND set -# thdl.use.temp.file.directory.for.log to true. +# thdl.use.temp.file.directory.for.log to true. Setting it to the +# empty string means to use the current working directory. thdl.log.directory = # Set this to true only if you are a developer and want to see what @@ -85,3 +86,12 @@ thdl.debug = false # Set this to true if you want Savant to treat all files as valid # input. Otherwise, only .savant files are acceptable. thdl.treat.all.files.as.dot.savant.files.regardless.of.extension = false + +# Do you want a developer-only option on the bottom of the Tools menu +# in Jskad? +thdl.add.developer.options.to.menu = false + +# Set this to true if you want the console and the log file to tell +# you about the messy details of how Jskad decides when and where to +# break lines for display. +thdl.log.line.breaking.algorithm = false diff --git a/source/org/thdl/tib/text/TibetanLabelView.java b/source/org/thdl/tib/text/TibetanLabelView.java index 5204c29..e6dda54 100644 --- a/source/org/thdl/tib/text/TibetanLabelView.java +++ b/source/org/thdl/tib/text/TibetanLabelView.java @@ -20,7 +20,6 @@ package org.thdl.tib.text; import javax.swing.*; import javax.swing.text.*; -import javax.swing.text.rtf.RTFEditorKit; /** A TibetanLabelView is a LabelView that has its own idea, informed * by its knowledge of Tibetan, about where a good place to break @@ -37,10 +36,13 @@ import javax.swing.text.rtf.RTFEditorKit; * * @author David Chandler */ class TibetanLabelView extends LabelView { + private boolean logging; /** Creates a new TibetanLabelView. */ - public TibetanLabelView(Element e) { + public TibetanLabelView(Element e, boolean debugLog) { super(e); // FIXME: assert (e == this.getElement()) + + logging = debugLog; } public int getBreakWeight(int axis, float pos, float len) { @@ -107,31 +109,63 @@ class TibetanLabelView extends LabelView { // Grab the underlying characters: Segment seggy = this.getText(startOffset, endOffset); - // System.out.println("DLC: getGoodBreakingLocation(start=" + startOffset + ", end=" + endOffset + "\"" + new String(seggy.array, seggy.offset, seggy.count) + "\""); - - // Now look for whitespace: - // - // FIXME: does going backwards or forwards matter? - char currentChar = seggy.first(); - for (; currentChar != Segment.DONE; currentChar = seggy.next()) { + // Now look for whitespace. Going from the back is what you + // want--otherwise, your 2nd line of text will be fuller than + // your first. + char currentChar = seggy.last(); + for (; currentChar != Segment.DONE; currentChar = seggy.previous()) { // FIXME: eeek! How do we know when we're dealing with - // Tibetan and when we're not? I'm assuming it's all - // Tibetan, all the time. - if (Character.isWhitespace(currentChar) - || '-' /* FIXME: this is the TSHEG (i.e., the Wylie is ' '), but we have no constant for it. */ == currentChar - || ' ' /* FIXME: this is space (i.e., the Wylie is '_'), but we have no constant for it. */ == currentChar + // Tibetan and when we're not? This is styled text, so + // where are the attributes etc.? We should find the font + // and decide about breaking based on that. Well, we + // should if we want a clean solution and don't mind a + // little performance hit. + // + // This question only needs to be answered if you want a + // clean solution, I think, because the code below should + // work exactly the same. Here's what's up: Even though + // we aren't testing to see if we're typing Roman or + // Tibetan, a character that's good for a line break in + // one is also good in the other. Methinks Tony Duff was + // smart like that. + // + // To be explicit, the test below seems to work perfectly + // for both Tibetan and Roman text. (Maybe Roman text + // will break after hyphens more quickly, but hey.) + // + // That said, this is still a FIXME. But note that the + // obvious fix will slow things down. - // FIXME: am I missing anything? move this into TibetanMachineWeb, anyway. - ) + if (Character.isWhitespace(currentChar) // FIXME: is this OK for Tibetan text? Tony Duff may have made it so, but maybe not. Test! + || TibetanMachineWeb.isTMWFontCharBreakable(currentChar)) { - // System.out.println("DLC: We've got a good place to break: " + (startOffset + seggy.getIndex() - seggy.getBeginIndex() - // + 1)); - return startOffset + seggy.getIndex() - seggy.getBeginIndex() - + 1 /* FIXME: why this foo work so good? */ - ; + // The '+ 1' is because you want to break after a + // tsheg or what not rather than before it. + int goodPlace = (startOffset + seggy.getIndex() + - seggy.getBeginIndex() + 1); + if (logging) { + String s = new String(seggy.array, seggy.offset, seggy.count); + if (!"\n".equals(s)) { + System.out.println("TibetanLabelView: found a good break in \"" + + new String(seggy.array, seggy.offset, seggy.count) + + "\"; we should break after character " + + (seggy.getIndex() - seggy.getBeginIndex() + 1) + + " (counting begins at one)"); + } + } + return goodPlace; } } - // System.out.println("DLC: We DO NOT have any good place to break."); + + // There is no good place. Return a negative number. + if (logging) { + String s = new String(seggy.array, seggy.offset, seggy.count); + if (!"\n".equals(s)) { + System.out.println("TibetanLabelView: found NO good break in \"" + + new String(seggy.array, seggy.offset, seggy.count) + + "\""); + } + } return -1; } diff --git a/source/org/thdl/tib/text/TibetanMachineWeb.java b/source/org/thdl/tib/text/TibetanMachineWeb.java index 92cde63..01599d7 100644 --- a/source/org/thdl/tib/text/TibetanMachineWeb.java +++ b/source/org/thdl/tib/text/TibetanMachineWeb.java @@ -1117,4 +1117,33 @@ public static boolean isTopVowel(DuffCode dc) { return false; } + + /** Returns true if and only if ch, which is an ASCII character + that you can think of as an arbitrary index into one of the + Tibetan fonts, is a character that is appropriate for ending a + line of Tibetan. '-', for example, represents + the tsheg (the little dot after a syllable) in (FIXME: Edward, + is this true?) all of the TMW fonts. Thus, this would return + true for '-'. + + Note that ch is not the Wylie transliteration; it is an + arbitrary character (well, not quite, since ' ', '\t', '\n' et + cetera seem to have been wisely chosen to represent Tibetan + whitespace, but pretty arbitrary). If you open up MS Word, + select TibetanMachineWeb1, and type a hyphen, + i.e. '-', you'll see a tsheg appear. If you open + Jskad and type a hyphen, you won't see a tsheg. + + @param ch the ASCII character "index" into the TMW font + + @return true iff this is a tsheg or whitespace or the like */ + public static boolean isTMWFontCharBreakable(char ch) { + return ('-' == ch /* FIXME: this is the tsheg (i.e., the Wylie is ' '), but we have no constant for it. */ + || ' ' == ch /* FIXME: this is space (i.e., the Wylie is '_'), but we have no constant for it. */ + || '\t' == ch /* FIXME: this is some sort of whitespace */ + || '\n' == ch /* FIXME: this is some sort of whitespace */ + ); + + // FIXME: am I missing anything? tabs etc.? + } } diff --git a/source/org/thdl/tib/text/TibetanRTFViewFactory.java b/source/org/thdl/tib/text/TibetanRTFViewFactory.java index b2009fe..ac4c991 100644 --- a/source/org/thdl/tib/text/TibetanRTFViewFactory.java +++ b/source/org/thdl/tib/text/TibetanRTFViewFactory.java @@ -20,14 +20,42 @@ package org.thdl.tib.text; import javax.swing.*; import javax.swing.text.*; -import javax.swing.text.rtf.RTFEditorKit; + +import org.thdl.util.ThdlOptions; /** A ViewFactory that is cognizant of the line-wrapping rules for * Tibetan text. That is, this class knows about the tsheg and other * Tibetan punctuation. * @author David Chandler */ class TibetanRTFViewFactory implements ViewFactory { - private TibetanRTFViewFactory() { super(); } + + /** the delegatee */ + private ViewFactory delegatee = null; + + /** true iff the Tibetan-aware views that we create should include + copious debugging output */ + private static boolean debugLog = false; + + /** true if we've already inquired about the preference and stored + it in debugLog */ + private static boolean debugLogInitialized = false; + + /** Returns true iff the Tibetan-aware views that we create should + include copious debugging output. */ + private static boolean getDebugLog() { + if (false == debugLogInitialized) { + debugLogInitialized = true; + debugLog + = ThdlOptions.getBooleanOption("thdl.log.line.breaking.algorithm"); + } + return debugLog; + } + + + /** Do not call this. */ + private TibetanRTFViewFactory() { + super(); + } /** Creates a new TibetanRTFViewFactory that delegates to vf when * unknown elements are encountered. @@ -39,23 +67,16 @@ class TibetanRTFViewFactory implements ViewFactory { delegatee = d; } - /** the delegatee */ - private ViewFactory delegatee = null; - /** Returns a View that will break correctly at Tibetan * punctuation. */ public View create(Element el) { String elName = el.getName(); if (null != elName - && elName.equals(AbstractDocument.ContentElementName)) { // FIXME: is this right? - return new TibetanLabelView(el); + && elName.equals(AbstractDocument.ContentElementName)) { // FIXME: is this right? what about paragraph-level stuff? + return new TibetanLabelView(el, getDebugLog()); } else { - // we don't know what to do, so delegate - View r = delegatee.create(el); - // DLC for debugging: -// System.out.println("DLC: creating a view '" + r + "'"); -// System.out.println("DLC: for element '" + el + "'"); - return r; + // We don't know what to do, so delegate: + return delegatee.create(el); } } }