Now breaks the line after the last whitespace, not the first.

I cleaned things up a bit, and I've made logging optional since I don't yet trust the code fully. A Wylie underscore at the end of a line is worth looking into further, at the very least.
2002-10-28 04:12:49 +00:00 · 2002-10-28 04:12:49 +00:00 · fd1b4dd468
commit fd1b4dd468
parent 8433369d60
4 changed files with 130 additions and 36 deletions
--- a/source/options.txt
+++ b/source/options.txt
@ -69,7 +69,8 @@ thdl.use.temp.file.directory.for.log = false
 # Set this to the directory in which you want the debugging output,
 # the .log files, placed.  You cannot set this to true AND set
-# thdl.use.temp.file.directory.for.log to true.
+# thdl.use.temp.file.directory.for.log to true.  Setting it to the
 # empty string means to use the current working directory.
 thdl.log.directory =
 # Set this to true only if you are a developer and want to see what
@ -85,3 +86,12 @@ thdl.debug = false
 # Set this to true if you want Savant to treat all files as valid
 # input.  Otherwise, only .savant files are acceptable.
 thdl.treat.all.files.as.dot.savant.files.regardless.of.extension = false
 # Do you want a developer-only option on the bottom of the Tools menu
 # in Jskad?
 thdl.add.developer.options.to.menu = false
 # Set this to true if you want the console and the log file to tell
 # you about the messy details of how Jskad decides when and where to
 # break lines for display.
 thdl.log.line.breaking.algorithm = false
--- a/source/org/thdl/tib/text/TibetanLabelView.java
+++ b/source/org/thdl/tib/text/TibetanLabelView.java
@ -20,7 +20,6 @@ package org.thdl.tib.text;
 import javax.swing.*; 
 import javax.swing.text.*;
 import javax.swing.text.rtf.RTFEditorKit;
 /** A TibetanLabelView is a LabelView that has its own idea, informed
 *  by its knowledge of Tibetan, about where a good place to break
@ -37,10 +36,13 @@ import javax.swing.text.rtf.RTFEditorKit;
 *
 *  @author David Chandler */
 class TibetanLabelView extends LabelView {
    private boolean logging;
    /** Creates a new TibetanLabelView. */
-    public TibetanLabelView(Element e) {
+    public TibetanLabelView(Element e, boolean debugLog) {
        super(e);
        // FIXME: assert (e == this.getElement())
        logging = debugLog;
    }
    public int getBreakWeight(int axis, float pos, float len) {
@ -107,31 +109,63 @@ class TibetanLabelView extends LabelView {
        // Grab the underlying characters:
        Segment seggy = this.getText(startOffset, endOffset);
-        //        System.out.println("DLC: getGoodBreakingLocation(start=" + startOffset + ", end=" + endOffset + "\"" + new String(seggy.array, seggy.offset, seggy.count) + "\"");
+        // Now look for whitespace.  Going from the back is what you
-
+        // want--otherwise, your 2nd line of text will be fuller than
-        // Now look for whitespace:
+        // your first.
-        //
+        char currentChar = seggy.last();
-        // FIXME: does going backwards or forwards matter?
+        for (; currentChar != Segment.DONE; currentChar = seggy.previous()) {
        char currentChar = seggy.first();
        for (; currentChar != Segment.DONE; currentChar = seggy.next()) {
            // FIXME: eeek!  How do we know when we're dealing with
-            // Tibetan and when we're not?  I'm assuming it's all
+            // Tibetan and when we're not?  This is styled text, so
-            // Tibetan, all the time.
+            // where are the attributes etc.?  We should find the font
-            if (Character.isWhitespace(currentChar)
+            // and decide about breaking based on that.  Well, we
-                || '-' /* FIXME: this is the TSHEG (i.e., the Wylie is ' '), but we have no constant for it. */ == currentChar
+            // should if we want a clean solution and don't mind a
-                || ' ' /* FIXME: this is space (i.e., the Wylie is '_'), but we have no constant for it. */ == currentChar
+            // little performance hit.
            //
            // This question only needs to be answered if you want a
            // clean solution, I think, because the code below should
            // work exactly the same.  Here's what's up: Even though
            // we aren't testing to see if we're typing Roman or
            // Tibetan, a character that's good for a line break in
            // one is also good in the other.  Methinks Tony Duff was
            // smart like that.
            //
            // To be explicit, the test below seems to work perfectly
            // for both Tibetan and Roman text.  (Maybe Roman text
            // will break after hyphens more quickly, but hey.)
            //
            // That said, this is still a FIXME.  But note that the
            // obvious fix will slow things down.
-                // FIXME: am I missing anything?  move this into TibetanMachineWeb, anyway.
+            if (Character.isWhitespace(currentChar) // FIXME: is this OK for Tibetan text?  Tony Duff may have made it so, but maybe not.  Test!
-                )
+                || TibetanMachineWeb.isTMWFontCharBreakable(currentChar))
                {
-                    //                    System.out.println("DLC: We've got a good place to break: " + (startOffset + seggy.getIndex() - seggy.getBeginIndex()
+                    // The '+ 1' is because you want to break after a
-                    //                        + 1));
+                    // tsheg or what not rather than before it.
-                    return startOffset + seggy.getIndex() - seggy.getBeginIndex()
+                    int goodPlace = (startOffset + seggy.getIndex()
-                        + 1 /* FIXME: why this foo work so good? */
+                                     - seggy.getBeginIndex() + 1);
-                        ;
+                    if (logging) {
                        String s = new String(seggy.array, seggy.offset, seggy.count);
                        if (!"\n".equals(s)) {
                            System.out.println("TibetanLabelView: found a good break in \""
                                               + new String(seggy.array, seggy.offset, seggy.count)
                                                   + "\"; we should break after character "
                                               + (seggy.getIndex() - seggy.getBeginIndex() + 1)
                                               + " (counting begins at one)");
                        }
                    }
                    return goodPlace;
                }
        }
-        // System.out.println("DLC: We DO NOT have any good place to break.");
+
        // There is no good place.  Return a negative number.
        if (logging) {
            String s = new String(seggy.array, seggy.offset, seggy.count);
            if (!"\n".equals(s)) {
                System.out.println("TibetanLabelView: found NO good break in \""
                                   + new String(seggy.array, seggy.offset, seggy.count)
                                       + "\"");
            }
        }
        return -1;
    }
--- a/source/org/thdl/tib/text/TibetanMachineWeb.java
+++ b/source/org/thdl/tib/text/TibetanMachineWeb.java
@ -1117,4 +1117,33 @@ public static boolean isTopVowel(DuffCode dc) {
 	return false;
 }
    /** Returns true if and only if ch, which is an ASCII character
        that you can think of as an arbitrary index into one of the
        Tibetan fonts, is a character that is appropriate for ending a
        line of Tibetan.  <code>'-'</code>, for example, represents
        the tsheg (the little dot after a syllable) in (FIXME: Edward,
        is this true?) all of the TMW fonts.  Thus, this would return
        true for <code>'-'</code>.
        Note that ch is <b>not</b> the Wylie transliteration; it is an
        arbitrary character (well, not quite, since ' ', '\t', '\n' et
        cetera seem to have been wisely chosen to represent Tibetan
        whitespace, but pretty arbitrary).  If you open up MS Word,
        select TibetanMachineWeb1, and type a hyphen,
        i.e. <code>'-'</code>, you'll see a tsheg appear.  If you open
        Jskad and type a hyphen, you won't see a tsheg.
        @param ch the ASCII character "index" into the TMW font
        @return true iff this is a tsheg or whitespace or the like */
    public static boolean isTMWFontCharBreakable(char ch) {
        return ('-' == ch /* FIXME: this is the tsheg (i.e., the Wylie is ' '), but we have no constant for it. */
                || ' ' == ch /* FIXME: this is space (i.e., the Wylie is '_'), but we have no constant for it. */
                || '\t' == ch /* FIXME: this is some sort of whitespace */
                || '\n' == ch /* FIXME: this is some sort of whitespace */
                );
        // FIXME: am I missing anything?  tabs etc.?
    }
 }
--- a/source/org/thdl/tib/text/TibetanRTFViewFactory.java
+++ b/source/org/thdl/tib/text/TibetanRTFViewFactory.java
@ -20,14 +20,42 @@ package org.thdl.tib.text;
 import javax.swing.*; 
 import javax.swing.text.*;
-import javax.swing.text.rtf.RTFEditorKit;
+
 import org.thdl.util.ThdlOptions;
 /** A ViewFactory that is cognizant of the line-wrapping rules for
 *  Tibetan text.  That is, this class knows about the tsheg and other
 *  Tibetan punctuation.
 *  @author David Chandler */
 class TibetanRTFViewFactory implements ViewFactory {
-    private TibetanRTFViewFactory() { super(); }
+
    /** the delegatee */
    private ViewFactory delegatee = null;
    /** true iff the Tibetan-aware views that we create should include
        copious debugging output */
    private static boolean debugLog = false;
    /** true if we've already inquired about the preference and stored
        it in debugLog */
    private static boolean debugLogInitialized = false;
    /** Returns true iff the Tibetan-aware views that we create should
        include copious debugging output. */
    private static boolean getDebugLog() {
        if (false == debugLogInitialized) {
            debugLogInitialized = true;
            debugLog
                = ThdlOptions.getBooleanOption("thdl.log.line.breaking.algorithm");
        }
        return debugLog;
    }
    /** Do not call this. */
    private TibetanRTFViewFactory() {
        super();
    }
    /** Creates a new TibetanRTFViewFactory that delegates to vf when
     *  unknown elements are encountered.
@ -39,23 +67,16 @@ class TibetanRTFViewFactory implements ViewFactory {
        delegatee = d;
    }
    /** the delegatee */
    private ViewFactory delegatee = null;
    /** Returns a View that will break correctly at Tibetan
     *  punctuation. */
    public View create(Element el) {
        String elName = el.getName();
        if (null != elName
-            && elName.equals(AbstractDocument.ContentElementName)) { // FIXME: is this right?
+            && elName.equals(AbstractDocument.ContentElementName)) { // FIXME: is this right?  what about paragraph-level stuff?
-            return new TibetanLabelView(el);
+            return new TibetanLabelView(el, getDebugLog());
        } else {
-            // we don't know what to do, so delegate
+            // We don't know what to do, so delegate:
-            View r = delegatee.create(el);
+            return delegatee.create(el);
            // DLC for debugging:
 //             System.out.println("DLC: creating a view '" + r + "'");
 //             System.out.println("DLC:   for element   '" + el + "'");
            return r;
        }
    }
 }