From b6d8fd89f98c301f4a7f45956f4af47bb066f25f Mon Sep 17 00:00:00 2001
From: dchandler <dchandler>
Date: Mon, 23 Jun 2003 01:24:02 +0000
Subject: [PATCH] When errors in (all but TMW->Wylie and Wylie->TMW) conversion
 occur, the troublesome glyphs are now put at the beginning of the document
 AFTER AN ACHEN.  This makes a glyph like \tmw7095 visible atop the achen.

Major fix to the handling of paragraphs in conversion; we were (for
whatever reason) dropping paragraphs before.
---
 source/org/thdl/tib/text/TibetanDocument.java | 69 ++++++++++++-------
 1 file changed, 46 insertions(+), 23 deletions(-)
diff --git a/source/org/thdl/tib/text/TibetanDocument.java b/source/org/thdl/tib/text/TibetanDocument.java
index b1ef4ff..8ea555d 100644
--- a/source/org/thdl/tib/text/TibetanDocument.java
+++ b/source/org/thdl/tib/text/TibetanDocument.java
@@ -700,7 +700,6 @@ public class TibetanDocument extends DefaultStyledDocument {
         if (end < 0) {
             end = getLength();
         }
-        Element[] paragraphs = getParagraphs(begin, end);
         try {
             finalEndPos = createPosition(end);
         } catch (BadLocationException e) {
@@ -708,25 +707,35 @@ public class TibetanDocument extends DefaultStyledDocument {
         }
 
         ConversionErrorHelper ceh = new ConversionErrorHelper();
-        int pl = paragraphs.length;
-        for (int i = 0; i < pl; i++) {
-            int p_end = paragraphs[i].getEndOffset();
-            if (i + 1 == paragraphs.length)
+        int pl = 0;
+        pl = getParagraphs(begin, end).length;
+        boolean warn = false;
+        int lastTimeWeExamined = -1; // must be -1
+        boolean noMore = false;
+        while (!noMore
+               && lastTimeWeExamined != ceh.lastOffsetExamined) {
+            lastTimeWeExamined = ceh.lastOffsetExamined;
+            Element thisParagraph
+                = getParagraphElement(lastTimeWeExamined + 1);
+            int p_end = thisParagraph.getEndOffset();
+            if (p_end >= finalEndPos.getOffset()) {
+                noMore = true;
                 ceh.doErrorWrapup = true;
-            convertHelperHelper(paragraphs[i].getStartOffset(),
+            }
+            convertHelperHelper(thisParagraph.getStartOffset(),
                                 ((finalEndPos.getOffset() < p_end)
                                  ? finalEndPos.getOffset()
                                  : p_end),
                                 toTM, toUnicode, errors, ceh);
-
-            // Now that we've changed paragraph i, recalculate
-            // paragraphs.  (PERFORMANCE FIXME: this is O(N*N), and we
-            // could make it O(N) by calculating just one paragraph at
-            // a time.)
-            paragraphs = getParagraphs(begin, finalEndPos.getOffset());
-            if (paragraphs.length != pl)
-                throw new Error("Conversion failed: the number of paragraphs changed, indicating that formatting was lost.");
         }
+        if (!ceh.errorReturn
+            && pl != getParagraphs(begin, finalEndPos.getOffset()).length) {
+            System.err.println("Conversion WARNING: the number of paragraphs changed from "
+                               + pl + " to " + getParagraphs(begin, end).length
+                               + ", indicating that formatting may have been lost.");
+            ThdlDebug.noteIffyCode();
+        }
+
         return ceh.errorReturn;
     }
 
@@ -762,7 +771,7 @@ public class TibetanDocument extends DefaultStyledDocument {
         // than TMW.33 -- that's because each of the ten TMW fonts has
         // the same glyph at position 32 (space) and the same glyph at
         // position 45 (tsheg).  Note that we're building up a big
-        // ArrayList; we're trading space for time.
+        // StringBuffer; we're trading space for time.
         try {
             int replacementStartIndex = begin;
             StringBuffer replacementQueue = new StringBuffer();
@@ -770,7 +779,6 @@ public class TibetanDocument extends DefaultStyledDocument {
             int replacementFontSize = -1;
 
             int i = begin;
-            HashMap problemGlyphsTable = new HashMap();
             Position endPos = createPosition(end);
             DuffData[] equivalent = new DuffData[1];
             equivalent[0] = new DuffData();
@@ -871,8 +879,8 @@ public class TibetanDocument extends DefaultStyledDocument {
                         ceh.errorReturn = true;
                         CharacterInAGivenFont cgf
                             = new CharacterInAGivenFont(getText(i,1), fontName);
-                        if (!problemGlyphsTable.containsKey(cgf)) {
-                            problemGlyphsTable.put(cgf, "yes this character appears once");
+                        if (!ceh.problemGlyphsTable.containsKey(cgf)) {
+                            ceh.problemGlyphsTable.put(cgf, "yes this character appears once");
                             if (null != errors) {
                                 String err
                                     = (toUnicode
@@ -889,7 +897,13 @@ public class TibetanDocument extends DefaultStyledDocument {
                                 }
 
                                 // Now also put this problem glyph at
-                                // the beginning of the document:
+                                // the beginning of the document,
+                                // after a 'a' character (i.e.,
+                                // \tm0062 or \tmw0063):
+                                equivalent[0].setData((toUnicode || toTM) ? (char)63 : (char)62, 1);
+                                insertDuff(72, ceh.errorGlyphLocation++,
+                                           equivalent, toUnicode || toTM);
+                                ++i;
                                 equivalent[0].setData(getText(i,1), fontNum);
                                 insertDuff(72, ceh.errorGlyphLocation++,
                                            equivalent, toUnicode || toTM);
@@ -933,6 +947,7 @@ public class TibetanDocument extends DefaultStyledDocument {
                                  !toTM);
                 }
             }
+            ceh.lastOffsetExamined = endPos.getOffset() - 1;
 
             if (!ThdlOptions.getBooleanOption("thdl.leave.bad.tm.tmw.conversions.in.place")) {
                 // Remove all characters other than the oddballs:
@@ -962,10 +977,14 @@ public class TibetanDocument extends DefaultStyledDocument {
         while (pos <= end) {
             Element pe = getParagraphElement(pos);
             v.add(pe);
-            if (pe.getEndOffset() == pos)
-                pos = pe.getEndOffset() + 1;
-            else
-                pos = pe.getEndOffset();
+            int peo = pe.getEndOffset();
+            if (peo == pos) {
+                // Avoids an infinite loop I've run into:
+                if (getParagraphElement(peo + 1).getEndOffset() > pos)
+                    pos = peo + 1;
+                break;
+            } else
+                pos = peo;
         }
         return (Element[])v.toArray(arrayType);
     }
@@ -979,9 +998,13 @@ class ConversionErrorHelper {
      *  error glyphs yet exist */
     int errorGlyphLocation;
     boolean doErrorWrapup;
+    int lastOffsetExamined;
+    HashMap problemGlyphsTable;
     ConversionErrorHelper() {
         errorReturn = false;
         errorGlyphLocation = 0;
         doErrorWrapup = false;
+        lastOffsetExamined = 0;
+        problemGlyphsTable = new HashMap();
     }
 }