Now breaks the line after the last whitespace, not the first.
I cleaned things up a bit, and I've made logging optional since I don't yet trust the code fully. A Wylie underscore at the end of a line is worth looking into further, at the very least.
This commit is contained in:
		
							parent
							
								
									8433369d60
								
							
						
					
					
						commit
						fd1b4dd468
					
				
					 4 changed files with 130 additions and 36 deletions
				
			
		|  | @ -69,7 +69,8 @@ thdl.use.temp.file.directory.for.log = false | |||
| 
 | ||||
| # Set this to the directory in which you want the debugging output, | ||||
| # the .log files, placed.  You cannot set this to true AND set | ||||
| # thdl.use.temp.file.directory.for.log to true. | ||||
| # thdl.use.temp.file.directory.for.log to true.  Setting it to the | ||||
| # empty string means to use the current working directory. | ||||
| thdl.log.directory = | ||||
| 
 | ||||
| # Set this to true only if you are a developer and want to see what | ||||
|  | @ -85,3 +86,12 @@ thdl.debug = false | |||
| # Set this to true if you want Savant to treat all files as valid | ||||
| # input.  Otherwise, only .savant files are acceptable. | ||||
| thdl.treat.all.files.as.dot.savant.files.regardless.of.extension = false | ||||
| 
 | ||||
| # Do you want a developer-only option on the bottom of the Tools menu | ||||
| # in Jskad? | ||||
| thdl.add.developer.options.to.menu = false | ||||
| 
 | ||||
| # Set this to true if you want the console and the log file to tell | ||||
| # you about the messy details of how Jskad decides when and where to | ||||
| # break lines for display. | ||||
| thdl.log.line.breaking.algorithm = false | ||||
|  |  | |||
|  | @ -20,7 +20,6 @@ package org.thdl.tib.text; | |||
| 
 | ||||
| import javax.swing.*;  | ||||
| import javax.swing.text.*; | ||||
| import javax.swing.text.rtf.RTFEditorKit; | ||||
| 
 | ||||
| /** A TibetanLabelView is a LabelView that has its own idea, informed | ||||
|  *  by its knowledge of Tibetan, about where a good place to break | ||||
|  | @ -37,10 +36,13 @@ import javax.swing.text.rtf.RTFEditorKit; | |||
|  * | ||||
|  *  @author David Chandler */ | ||||
| class TibetanLabelView extends LabelView { | ||||
|     private boolean logging; | ||||
|     /** Creates a new TibetanLabelView. */ | ||||
|     public TibetanLabelView(Element e) { | ||||
|     public TibetanLabelView(Element e, boolean debugLog) { | ||||
|         super(e); | ||||
|         // FIXME: assert (e == this.getElement()) | ||||
| 
 | ||||
|         logging = debugLog; | ||||
|     } | ||||
| 
 | ||||
|     public int getBreakWeight(int axis, float pos, float len) { | ||||
|  | @ -107,31 +109,63 @@ class TibetanLabelView extends LabelView { | |||
|         // Grab the underlying characters: | ||||
|         Segment seggy = this.getText(startOffset, endOffset); | ||||
| 
 | ||||
|         //        System.out.println("DLC: getGoodBreakingLocation(start=" + startOffset + ", end=" + endOffset + "\"" + new String(seggy.array, seggy.offset, seggy.count) + "\""); | ||||
| 
 | ||||
|         // Now look for whitespace: | ||||
|         // | ||||
|         // FIXME: does going backwards or forwards matter? | ||||
|         char currentChar = seggy.first(); | ||||
|         for (; currentChar != Segment.DONE; currentChar = seggy.next()) { | ||||
|         // Now look for whitespace.  Going from the back is what you | ||||
|         // want--otherwise, your 2nd line of text will be fuller than | ||||
|         // your first. | ||||
|         char currentChar = seggy.last(); | ||||
|         for (; currentChar != Segment.DONE; currentChar = seggy.previous()) { | ||||
|             // FIXME: eeek!  How do we know when we're dealing with | ||||
|             // Tibetan and when we're not?  I'm assuming it's all | ||||
|             // Tibetan, all the time. | ||||
|             if (Character.isWhitespace(currentChar) | ||||
|                 || '-' /* FIXME: this is the TSHEG (i.e., the Wylie is ' '), but we have no constant for it. */ == currentChar | ||||
|                 || ' ' /* FIXME: this is space (i.e., the Wylie is '_'), but we have no constant for it. */ == currentChar | ||||
|             // Tibetan and when we're not?  This is styled text, so | ||||
|             // where are the attributes etc.?  We should find the font | ||||
|             // and decide about breaking based on that.  Well, we | ||||
|             // should if we want a clean solution and don't mind a | ||||
|             // little performance hit. | ||||
|             // | ||||
|             // This question only needs to be answered if you want a | ||||
|             // clean solution, I think, because the code below should | ||||
|             // work exactly the same.  Here's what's up: Even though | ||||
|             // we aren't testing to see if we're typing Roman or | ||||
|             // Tibetan, a character that's good for a line break in | ||||
|             // one is also good in the other.  Methinks Tony Duff was | ||||
|             // smart like that. | ||||
|             // | ||||
|             // To be explicit, the test below seems to work perfectly | ||||
|             // for both Tibetan and Roman text.  (Maybe Roman text | ||||
|             // will break after hyphens more quickly, but hey.) | ||||
|             // | ||||
|             // That said, this is still a FIXME.  But note that the | ||||
|             // obvious fix will slow things down. | ||||
| 
 | ||||
|                 // FIXME: am I missing anything?  move this into TibetanMachineWeb, anyway. | ||||
|                 ) | ||||
|             if (Character.isWhitespace(currentChar) // FIXME: is this OK for Tibetan text?  Tony Duff may have made it so, but maybe not.  Test! | ||||
|                 || TibetanMachineWeb.isTMWFontCharBreakable(currentChar)) | ||||
|                 { | ||||
|                     //                    System.out.println("DLC: We've got a good place to break: " + (startOffset + seggy.getIndex() - seggy.getBeginIndex() | ||||
|                     //                        + 1)); | ||||
|                     return startOffset + seggy.getIndex() - seggy.getBeginIndex() | ||||
|                         + 1 /* FIXME: why this foo work so good? */ | ||||
|                         ; | ||||
|                     // The '+ 1' is because you want to break after a | ||||
|                     // tsheg or what not rather than before it. | ||||
|                     int goodPlace = (startOffset + seggy.getIndex() | ||||
|                                      - seggy.getBeginIndex() + 1); | ||||
|                     if (logging) { | ||||
|                         String s = new String(seggy.array, seggy.offset, seggy.count); | ||||
|                         if (!"\n".equals(s)) { | ||||
|                             System.out.println("TibetanLabelView: found a good break in \"" | ||||
|                                                + new String(seggy.array, seggy.offset, seggy.count) | ||||
|                                                    + "\"; we should break after character " | ||||
|                                                + (seggy.getIndex() - seggy.getBeginIndex() + 1) | ||||
|                                                + " (counting begins at one)"); | ||||
|                         } | ||||
|                     } | ||||
|                     return goodPlace; | ||||
|                 } | ||||
|         } | ||||
|         // System.out.println("DLC: We DO NOT have any good place to break."); | ||||
| 
 | ||||
|         // There is no good place.  Return a negative number. | ||||
|         if (logging) { | ||||
|             String s = new String(seggy.array, seggy.offset, seggy.count); | ||||
|             if (!"\n".equals(s)) { | ||||
|                 System.out.println("TibetanLabelView: found NO good break in \"" | ||||
|                                    + new String(seggy.array, seggy.offset, seggy.count) | ||||
|                                        + "\""); | ||||
|             } | ||||
|         } | ||||
|         return -1; | ||||
|     } | ||||
| 
 | ||||
|  |  | |||
|  | @ -1117,4 +1117,33 @@ public static boolean isTopVowel(DuffCode dc) { | |||
| 
 | ||||
| 	return false; | ||||
| } | ||||
| 
 | ||||
|     /** Returns true if and only if ch, which is an ASCII character | ||||
|         that you can think of as an arbitrary index into one of the | ||||
|         Tibetan fonts, is a character that is appropriate for ending a | ||||
|         line of Tibetan.  <code>'-'</code>, for example, represents | ||||
|         the tsheg (the little dot after a syllable) in (FIXME: Edward, | ||||
|         is this true?) all of the TMW fonts.  Thus, this would return | ||||
|         true for <code>'-'</code>. | ||||
| 
 | ||||
|         Note that ch is <b>not</b> the Wylie transliteration; it is an | ||||
|         arbitrary character (well, not quite, since ' ', '\t', '\n' et | ||||
|         cetera seem to have been wisely chosen to represent Tibetan | ||||
|         whitespace, but pretty arbitrary).  If you open up MS Word, | ||||
|         select TibetanMachineWeb1, and type a hyphen, | ||||
|         i.e. <code>'-'</code>, you'll see a tsheg appear.  If you open | ||||
|         Jskad and type a hyphen, you won't see a tsheg. | ||||
|                      | ||||
|         @param ch the ASCII character "index" into the TMW font | ||||
| 
 | ||||
|         @return true iff this is a tsheg or whitespace or the like */ | ||||
|     public static boolean isTMWFontCharBreakable(char ch) { | ||||
|         return ('-' == ch /* FIXME: this is the tsheg (i.e., the Wylie is ' '), but we have no constant for it. */ | ||||
|                 || ' ' == ch /* FIXME: this is space (i.e., the Wylie is '_'), but we have no constant for it. */ | ||||
|                 || '\t' == ch /* FIXME: this is some sort of whitespace */ | ||||
|                 || '\n' == ch /* FIXME: this is some sort of whitespace */ | ||||
|                 ); | ||||
| 
 | ||||
|         // FIXME: am I missing anything?  tabs etc.? | ||||
|     } | ||||
| } | ||||
|  |  | |||
|  | @ -20,14 +20,42 @@ package org.thdl.tib.text; | |||
| 
 | ||||
| import javax.swing.*;  | ||||
| import javax.swing.text.*; | ||||
| import javax.swing.text.rtf.RTFEditorKit; | ||||
| 
 | ||||
| import org.thdl.util.ThdlOptions; | ||||
| 
 | ||||
| /** A ViewFactory that is cognizant of the line-wrapping rules for | ||||
|  *  Tibetan text.  That is, this class knows about the tsheg and other | ||||
|  *  Tibetan punctuation. | ||||
|  *  @author David Chandler */ | ||||
| class TibetanRTFViewFactory implements ViewFactory { | ||||
|     private TibetanRTFViewFactory() { super(); } | ||||
| 
 | ||||
|     /** the delegatee */ | ||||
|     private ViewFactory delegatee = null; | ||||
| 
 | ||||
|     /** true iff the Tibetan-aware views that we create should include | ||||
|         copious debugging output */ | ||||
|     private static boolean debugLog = false; | ||||
| 
 | ||||
|     /** true if we've already inquired about the preference and stored | ||||
|         it in debugLog */ | ||||
|     private static boolean debugLogInitialized = false; | ||||
| 
 | ||||
|     /** Returns true iff the Tibetan-aware views that we create should | ||||
|         include copious debugging output. */ | ||||
|     private static boolean getDebugLog() { | ||||
|         if (false == debugLogInitialized) { | ||||
|             debugLogInitialized = true; | ||||
|             debugLog | ||||
|                 = ThdlOptions.getBooleanOption("thdl.log.line.breaking.algorithm"); | ||||
|         } | ||||
|         return debugLog; | ||||
|     } | ||||
|      | ||||
|      | ||||
|     /** Do not call this. */ | ||||
|     private TibetanRTFViewFactory() { | ||||
|         super(); | ||||
|     } | ||||
| 
 | ||||
|     /** Creates a new TibetanRTFViewFactory that delegates to vf when | ||||
|      *  unknown elements are encountered. | ||||
|  | @ -39,23 +67,16 @@ class TibetanRTFViewFactory implements ViewFactory { | |||
|         delegatee = d; | ||||
|     } | ||||
|      | ||||
|     /** the delegatee */ | ||||
|     private ViewFactory delegatee = null; | ||||
| 
 | ||||
|     /** Returns a View that will break correctly at Tibetan | ||||
|      *  punctuation. */ | ||||
|     public View create(Element el) { | ||||
|         String elName = el.getName(); | ||||
|         if (null != elName | ||||
|             && elName.equals(AbstractDocument.ContentElementName)) { // FIXME: is this right? | ||||
|             return new TibetanLabelView(el); | ||||
|             && elName.equals(AbstractDocument.ContentElementName)) { // FIXME: is this right?  what about paragraph-level stuff? | ||||
|             return new TibetanLabelView(el, getDebugLog()); | ||||
|         } else { | ||||
|             // we don't know what to do, so delegate | ||||
|             View r = delegatee.create(el); | ||||
|             // DLC for debugging: | ||||
| //             System.out.println("DLC: creating a view '" + r + "'"); | ||||
| //             System.out.println("DLC:   for element   '" + el + "'"); | ||||
|             return r; | ||||
|             // We don't know what to do, so delegate: | ||||
|             return delegatee.create(el); | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue