Now breaks the line after the last whitespace, not the first.
I cleaned things up a bit, and I've made logging optional since I don't yet trust the code fully. A Wylie underscore at the end of a line is worth looking into further, at the very least.
This commit is contained in:
parent
8433369d60
commit
fd1b4dd468
4 changed files with 130 additions and 36 deletions
|
@ -69,7 +69,8 @@ thdl.use.temp.file.directory.for.log = false
|
||||||
|
|
||||||
# Set this to the directory in which you want the debugging output,
|
# Set this to the directory in which you want the debugging output,
|
||||||
# the .log files, placed. You cannot set this to true AND set
|
# the .log files, placed. You cannot set this to true AND set
|
||||||
# thdl.use.temp.file.directory.for.log to true.
|
# thdl.use.temp.file.directory.for.log to true. Setting it to the
|
||||||
|
# empty string means to use the current working directory.
|
||||||
thdl.log.directory =
|
thdl.log.directory =
|
||||||
|
|
||||||
# Set this to true only if you are a developer and want to see what
|
# Set this to true only if you are a developer and want to see what
|
||||||
|
@ -85,3 +86,12 @@ thdl.debug = false
|
||||||
# Set this to true if you want Savant to treat all files as valid
|
# Set this to true if you want Savant to treat all files as valid
|
||||||
# input. Otherwise, only .savant files are acceptable.
|
# input. Otherwise, only .savant files are acceptable.
|
||||||
thdl.treat.all.files.as.dot.savant.files.regardless.of.extension = false
|
thdl.treat.all.files.as.dot.savant.files.regardless.of.extension = false
|
||||||
|
|
||||||
|
# Do you want a developer-only option on the bottom of the Tools menu
|
||||||
|
# in Jskad?
|
||||||
|
thdl.add.developer.options.to.menu = false
|
||||||
|
|
||||||
|
# Set this to true if you want the console and the log file to tell
|
||||||
|
# you about the messy details of how Jskad decides when and where to
|
||||||
|
# break lines for display.
|
||||||
|
thdl.log.line.breaking.algorithm = false
|
||||||
|
|
|
@ -20,7 +20,6 @@ package org.thdl.tib.text;
|
||||||
|
|
||||||
import javax.swing.*;
|
import javax.swing.*;
|
||||||
import javax.swing.text.*;
|
import javax.swing.text.*;
|
||||||
import javax.swing.text.rtf.RTFEditorKit;
|
|
||||||
|
|
||||||
/** A TibetanLabelView is a LabelView that has its own idea, informed
|
/** A TibetanLabelView is a LabelView that has its own idea, informed
|
||||||
* by its knowledge of Tibetan, about where a good place to break
|
* by its knowledge of Tibetan, about where a good place to break
|
||||||
|
@ -37,10 +36,13 @@ import javax.swing.text.rtf.RTFEditorKit;
|
||||||
*
|
*
|
||||||
* @author David Chandler */
|
* @author David Chandler */
|
||||||
class TibetanLabelView extends LabelView {
|
class TibetanLabelView extends LabelView {
|
||||||
|
private boolean logging;
|
||||||
/** Creates a new TibetanLabelView. */
|
/** Creates a new TibetanLabelView. */
|
||||||
public TibetanLabelView(Element e) {
|
public TibetanLabelView(Element e, boolean debugLog) {
|
||||||
super(e);
|
super(e);
|
||||||
// FIXME: assert (e == this.getElement())
|
// FIXME: assert (e == this.getElement())
|
||||||
|
|
||||||
|
logging = debugLog;
|
||||||
}
|
}
|
||||||
|
|
||||||
public int getBreakWeight(int axis, float pos, float len) {
|
public int getBreakWeight(int axis, float pos, float len) {
|
||||||
|
@ -107,31 +109,63 @@ class TibetanLabelView extends LabelView {
|
||||||
// Grab the underlying characters:
|
// Grab the underlying characters:
|
||||||
Segment seggy = this.getText(startOffset, endOffset);
|
Segment seggy = this.getText(startOffset, endOffset);
|
||||||
|
|
||||||
// System.out.println("DLC: getGoodBreakingLocation(start=" + startOffset + ", end=" + endOffset + "\"" + new String(seggy.array, seggy.offset, seggy.count) + "\"");
|
// Now look for whitespace. Going from the back is what you
|
||||||
|
// want--otherwise, your 2nd line of text will be fuller than
|
||||||
// Now look for whitespace:
|
// your first.
|
||||||
//
|
char currentChar = seggy.last();
|
||||||
// FIXME: does going backwards or forwards matter?
|
for (; currentChar != Segment.DONE; currentChar = seggy.previous()) {
|
||||||
char currentChar = seggy.first();
|
|
||||||
for (; currentChar != Segment.DONE; currentChar = seggy.next()) {
|
|
||||||
// FIXME: eeek! How do we know when we're dealing with
|
// FIXME: eeek! How do we know when we're dealing with
|
||||||
// Tibetan and when we're not? I'm assuming it's all
|
// Tibetan and when we're not? This is styled text, so
|
||||||
// Tibetan, all the time.
|
// where are the attributes etc.? We should find the font
|
||||||
if (Character.isWhitespace(currentChar)
|
// and decide about breaking based on that. Well, we
|
||||||
|| '-' /* FIXME: this is the TSHEG (i.e., the Wylie is ' '), but we have no constant for it. */ == currentChar
|
// should if we want a clean solution and don't mind a
|
||||||
|| ' ' /* FIXME: this is space (i.e., the Wylie is '_'), but we have no constant for it. */ == currentChar
|
// little performance hit.
|
||||||
|
//
|
||||||
|
// This question only needs to be answered if you want a
|
||||||
|
// clean solution, I think, because the code below should
|
||||||
|
// work exactly the same. Here's what's up: Even though
|
||||||
|
// we aren't testing to see if we're typing Roman or
|
||||||
|
// Tibetan, a character that's good for a line break in
|
||||||
|
// one is also good in the other. Methinks Tony Duff was
|
||||||
|
// smart like that.
|
||||||
|
//
|
||||||
|
// To be explicit, the test below seems to work perfectly
|
||||||
|
// for both Tibetan and Roman text. (Maybe Roman text
|
||||||
|
// will break after hyphens more quickly, but hey.)
|
||||||
|
//
|
||||||
|
// That said, this is still a FIXME. But note that the
|
||||||
|
// obvious fix will slow things down.
|
||||||
|
|
||||||
// FIXME: am I missing anything? move this into TibetanMachineWeb, anyway.
|
if (Character.isWhitespace(currentChar) // FIXME: is this OK for Tibetan text? Tony Duff may have made it so, but maybe not. Test!
|
||||||
)
|
|| TibetanMachineWeb.isTMWFontCharBreakable(currentChar))
|
||||||
{
|
{
|
||||||
// System.out.println("DLC: We've got a good place to break: " + (startOffset + seggy.getIndex() - seggy.getBeginIndex()
|
// The '+ 1' is because you want to break after a
|
||||||
// + 1));
|
// tsheg or what not rather than before it.
|
||||||
return startOffset + seggy.getIndex() - seggy.getBeginIndex()
|
int goodPlace = (startOffset + seggy.getIndex()
|
||||||
+ 1 /* FIXME: why this foo work so good? */
|
- seggy.getBeginIndex() + 1);
|
||||||
;
|
if (logging) {
|
||||||
|
String s = new String(seggy.array, seggy.offset, seggy.count);
|
||||||
|
if (!"\n".equals(s)) {
|
||||||
|
System.out.println("TibetanLabelView: found a good break in \""
|
||||||
|
+ new String(seggy.array, seggy.offset, seggy.count)
|
||||||
|
+ "\"; we should break after character "
|
||||||
|
+ (seggy.getIndex() - seggy.getBeginIndex() + 1)
|
||||||
|
+ " (counting begins at one)");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return goodPlace;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// System.out.println("DLC: We DO NOT have any good place to break.");
|
|
||||||
|
// There is no good place. Return a negative number.
|
||||||
|
if (logging) {
|
||||||
|
String s = new String(seggy.array, seggy.offset, seggy.count);
|
||||||
|
if (!"\n".equals(s)) {
|
||||||
|
System.out.println("TibetanLabelView: found NO good break in \""
|
||||||
|
+ new String(seggy.array, seggy.offset, seggy.count)
|
||||||
|
+ "\"");
|
||||||
|
}
|
||||||
|
}
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1117,4 +1117,33 @@ public static boolean isTopVowel(DuffCode dc) {
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Returns true if and only if ch, which is an ASCII character
|
||||||
|
that you can think of as an arbitrary index into one of the
|
||||||
|
Tibetan fonts, is a character that is appropriate for ending a
|
||||||
|
line of Tibetan. <code>'-'</code>, for example, represents
|
||||||
|
the tsheg (the little dot after a syllable) in (FIXME: Edward,
|
||||||
|
is this true?) all of the TMW fonts. Thus, this would return
|
||||||
|
true for <code>'-'</code>.
|
||||||
|
|
||||||
|
Note that ch is <b>not</b> the Wylie transliteration; it is an
|
||||||
|
arbitrary character (well, not quite, since ' ', '\t', '\n' et
|
||||||
|
cetera seem to have been wisely chosen to represent Tibetan
|
||||||
|
whitespace, but pretty arbitrary). If you open up MS Word,
|
||||||
|
select TibetanMachineWeb1, and type a hyphen,
|
||||||
|
i.e. <code>'-'</code>, you'll see a tsheg appear. If you open
|
||||||
|
Jskad and type a hyphen, you won't see a tsheg.
|
||||||
|
|
||||||
|
@param ch the ASCII character "index" into the TMW font
|
||||||
|
|
||||||
|
@return true iff this is a tsheg or whitespace or the like */
|
||||||
|
public static boolean isTMWFontCharBreakable(char ch) {
|
||||||
|
return ('-' == ch /* FIXME: this is the tsheg (i.e., the Wylie is ' '), but we have no constant for it. */
|
||||||
|
|| ' ' == ch /* FIXME: this is space (i.e., the Wylie is '_'), but we have no constant for it. */
|
||||||
|
|| '\t' == ch /* FIXME: this is some sort of whitespace */
|
||||||
|
|| '\n' == ch /* FIXME: this is some sort of whitespace */
|
||||||
|
);
|
||||||
|
|
||||||
|
// FIXME: am I missing anything? tabs etc.?
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -20,14 +20,42 @@ package org.thdl.tib.text;
|
||||||
|
|
||||||
import javax.swing.*;
|
import javax.swing.*;
|
||||||
import javax.swing.text.*;
|
import javax.swing.text.*;
|
||||||
import javax.swing.text.rtf.RTFEditorKit;
|
|
||||||
|
import org.thdl.util.ThdlOptions;
|
||||||
|
|
||||||
/** A ViewFactory that is cognizant of the line-wrapping rules for
|
/** A ViewFactory that is cognizant of the line-wrapping rules for
|
||||||
* Tibetan text. That is, this class knows about the tsheg and other
|
* Tibetan text. That is, this class knows about the tsheg and other
|
||||||
* Tibetan punctuation.
|
* Tibetan punctuation.
|
||||||
* @author David Chandler */
|
* @author David Chandler */
|
||||||
class TibetanRTFViewFactory implements ViewFactory {
|
class TibetanRTFViewFactory implements ViewFactory {
|
||||||
private TibetanRTFViewFactory() { super(); }
|
|
||||||
|
/** the delegatee */
|
||||||
|
private ViewFactory delegatee = null;
|
||||||
|
|
||||||
|
/** true iff the Tibetan-aware views that we create should include
|
||||||
|
copious debugging output */
|
||||||
|
private static boolean debugLog = false;
|
||||||
|
|
||||||
|
/** true if we've already inquired about the preference and stored
|
||||||
|
it in debugLog */
|
||||||
|
private static boolean debugLogInitialized = false;
|
||||||
|
|
||||||
|
/** Returns true iff the Tibetan-aware views that we create should
|
||||||
|
include copious debugging output. */
|
||||||
|
private static boolean getDebugLog() {
|
||||||
|
if (false == debugLogInitialized) {
|
||||||
|
debugLogInitialized = true;
|
||||||
|
debugLog
|
||||||
|
= ThdlOptions.getBooleanOption("thdl.log.line.breaking.algorithm");
|
||||||
|
}
|
||||||
|
return debugLog;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/** Do not call this. */
|
||||||
|
private TibetanRTFViewFactory() {
|
||||||
|
super();
|
||||||
|
}
|
||||||
|
|
||||||
/** Creates a new TibetanRTFViewFactory that delegates to vf when
|
/** Creates a new TibetanRTFViewFactory that delegates to vf when
|
||||||
* unknown elements are encountered.
|
* unknown elements are encountered.
|
||||||
|
@ -39,23 +67,16 @@ class TibetanRTFViewFactory implements ViewFactory {
|
||||||
delegatee = d;
|
delegatee = d;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** the delegatee */
|
|
||||||
private ViewFactory delegatee = null;
|
|
||||||
|
|
||||||
/** Returns a View that will break correctly at Tibetan
|
/** Returns a View that will break correctly at Tibetan
|
||||||
* punctuation. */
|
* punctuation. */
|
||||||
public View create(Element el) {
|
public View create(Element el) {
|
||||||
String elName = el.getName();
|
String elName = el.getName();
|
||||||
if (null != elName
|
if (null != elName
|
||||||
&& elName.equals(AbstractDocument.ContentElementName)) { // FIXME: is this right?
|
&& elName.equals(AbstractDocument.ContentElementName)) { // FIXME: is this right? what about paragraph-level stuff?
|
||||||
return new TibetanLabelView(el);
|
return new TibetanLabelView(el, getDebugLog());
|
||||||
} else {
|
} else {
|
||||||
// we don't know what to do, so delegate
|
// We don't know what to do, so delegate:
|
||||||
View r = delegatee.create(el);
|
return delegatee.create(el);
|
||||||
// DLC for debugging:
|
|
||||||
// System.out.println("DLC: creating a view '" + r + "'");
|
|
||||||
// System.out.println("DLC: for element '" + el + "'");
|
|
||||||
return r;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue