Added the initial version of Tibbibl, which Nathaniel Garson of UVa

e-mailed to me. Tibbibl is an editor for XML-based bibliographies of Tibetan texts. All I did was change the package from org.thdl.xml to org.thdl.tib.bibl and add boilerplate; no changes to Than's code were made. Tibbibl features a diacritic input tool which Jskad might want to swipe.
2003-02-01 05:08:02 +00:00 · 2003-02-01 05:08:02 +00:00 · 72ee4fc7d2
commit 72ee4fc7d2
parent 32a08c06c3
28 changed files with 10248 additions and 3 deletions
--- a/source/org/thdl/tib/bibl/TitleParser.java
+++ b/source/org/thdl/tib/bibl/TitleParser.java
@ -0,0 +1,352 @@
+/*
+The contents of this file are subject to the THDL Open Community License
+Version 1.0 (the "License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License on the THDL web site 
+(http://www.thdl.org/).
+
+Software distributed under the License is distributed on an "AS IS" basis, 
+WITHOUT WARRANTY OF ANY KIND, either express or implied. See the 
+License for the specific terms governing rights and limitations under the 
+License. 
+
+The Initial Developer of this software is the Tibetan and Himalayan Digital
+Library (THDL). Portions created by the THDL are Copyright 2001-2003 THDL.
+All Rights Reserved. 
+
+Contributor(s): ______________________________________.
+*/
+
+package org.thdl.tib.bibl;
+
+import java.util.*;
+import org.jdom.*;
+import org.jdom.input.*;
+import org.jdom.output.*;
+
+/**
+*
+* This class provides the mechanism to "parse" a Tibbibl title tag and understand its contents. Some title tags
+* will contain AP children that represent variant readings. TitleParser can among other things supply the text
+* of the main readings in the title, weeding out the alternative one.
+*/
+
+public class TitleParser implements TibConstants
+{
+	Object item;
+	String stringItem, outText, titleText;
+	org.jdom.Element title, foreign, contentItem;
+	org.jdom.Text textItem;
+	Vector parts, styleElements, toRemove;
+
+	private void clean()
+	{
+		/**
+		* <p>
+		* This method cleans the title element of consequetive Text elements so
+		* that the {@link #isAppInsertable} method will not mistakenly think that
+		* an app cannot be inserted since it crosses a part boundary. The two consequetive
+		* jdom.org.Text elements will be combined and the second one removed.
+		* </p>
+		*/
+		org.jdom.Text lastText, textElement;
+		Iterator it = title.getContent().iterator();
+		Vector toRemove = new Vector();
+		lastText = null;
+		while(it.hasNext())
+		{
+			Object item = it.next();
+			if(item instanceof org.jdom.Text) {
+				if(lastText == null) {
+					lastText = (org.jdom.Text)item;
+				} else {
+					String lastTextString = lastText.getText();
+					textElement = (org.jdom.Text)item;
+					String thisTextString = textElement.getText();
+					if(!lastTextString.endsWith(" ") &&
+						!thisTextString.startsWith(" "))
+					{
+						thisTextString += " ";
+					}
+					lastText.setText(lastTextString + thisTextString);
+					toRemove.add(item);
+				}
+			} else {
+				lastText = null;
+			}
+		}
+		for(it = toRemove.iterator(); it.hasNext();)
+		{
+			title.getContent().remove(it.next());
+		}
+	}
+
+	private void parse()
+	{
+		/**
+		* <p>
+		* This method parses the title by finding its constituent parts and creating
+		* a {@link TitleParser.TitlePart} out of them and adding each TitlePart to
+		* the vector {@link #parts}. The title parts contain the text for display, its
+		* beginning and end indexes, and the corresponding JDOM part, whether it be
+		* an element, text, or string.
+		* </p>
+		*/
+		parts = new Vector();
+		outText = new String();
+		Iterator it = title.getContent().iterator();
+		TitlePart prevPart = null;
+
+		while(it.hasNext())
+		{
+			boolean isApp = false;
+			item = it.next();
+			String stringItem = new String();
+			if(item instanceof org.jdom.Element) {
+				contentItem = (org.jdom.Element)item;
+				if(contentItem.getName().equals(AP)) {
+					stringItem = cleanString(contentItem.getChild(LM).getText());
+					isApp = true;
+				} else if(contentItem.getName().equals(FOREIGN)) {
+					foreign = contentItem;
+					continue;
+				} else {
+					stringItem = cleanString(contentItem.getText());
+					System.out.println("Found an inner element that's not ap or foreign: " + contentItem.toString());
+				}
+			} else if(item instanceof org.jdom.Text) {
+				textItem = (org.jdom.Text)item;
+				stringItem = cleanString(textItem.getText());
+			} else if(item instanceof String) {
+				stringItem = cleanString((String)item);
+			} else {
+				System.out.println("A kind of title content found that is not recognized!\n" +
+						item.toString());
+				continue;
+			}
+
+			if(!outText.endsWith(" ") && !stringItem.startsWith(" ")) {
+				if(isApp) {
+					TitlePart tpart = (TitlePart)parts.lastElement();
+					tpart.addSpace();
+					outText += " ";
+				} else {
+					stringItem = " " + stringItem;
+				}
+			}
+			int start = outText.length()+1;
+			outText += stringItem;
+			parts.add(new TitlePart(stringItem,start,item,isApp));
+			prevPart = (TitlePart)parts.lastElement();
+		}
+
+		titleText = cleanString(outText);
+	}
+
+	private void setStyles()
+	{
+		ElementStyle es;
+		styleElements = new Vector();
+
+		Iterator it = parts.iterator();
+		while(it.hasNext())
+		{
+			es = null;
+			TitlePart tpart = (TitlePart)it.next();
+			String text = tpart.text;
+			if(tpart.isApp) {
+				org.jdom.Element e = (org.jdom.Element)tpart.associatedItem;
+				es = new ElementStyle(text,TextPane.VARIANT,e);
+			} else {
+				es = new ElementStyle(text,TextPane.REG,title);
+			}
+			styleElements.add(es);
+		}
+		if(styleElements.size()>0) {
+			try {
+				es = (ElementStyle)styleElements.firstElement();
+				if(es.getText().equals(" ")) {styleElements.remove(es);}
+				es = (ElementStyle)styleElements.lastElement();
+				if(es.getText().equals(" ")) {styleElements.remove(es);}
+			} catch (NoSuchElementException nsee) {}
+		}
+	}
+
+	public void setTitle(org.jdom.Element ttl)
+	{
+		title = ttl;
+		parse();
+		setStyles();
+	}
+
+	public org.jdom.Element getTitle()
+	{
+		return title;
+	}
+
+	public String getTitleText()
+	{
+		return titleText;
+	}
+
+	public org.jdom.Element getTranslation()
+	{
+		return foreign;
+	}
+
+	public Vector getStyles()
+	{
+		if(styleElements.size()==0) {
+			styleElements.add(new ElementStyle("",TextPane.REG,title));
+		}
+		return styleElements;
+	}
+
+/**
+* <p>
+* This method checks the parts of the title to see if an app (or apparatus) element is insertable. The
+* main thing it checks for is that the range of text does not span over an already existing app.
+* </p>
+*
+* @param startInsert the insertion point within the display of the titles text relative to the beginning of the line.
+*
+* @param txt the selected text which is to be marked up with the app. This is necessary to make sure that
+* the text does not fall completely within an already existing app.
+*
+* @return boolean - whether or not this is an acceptable place to insert an app element.
+*/
+	public boolean isAppInsertable(int startInsert, String txt)
+	{
+
+		if(parts == null) {return false;}
+		Iterator it = parts.iterator();
+		while(it.hasNext())
+		{
+			TitlePart tpart = (TitlePart)it.next();
+
+			int tpStart = tpart.start;
+			int tpEnd = tpart.end;
+			if(startInsert>=tpStart && startInsert<tpEnd) {
+				if(tpart.isApp) {
+					return false;
+				}
+				if(tpart.text.indexOf(txt)>-1 || tpart.text.equals(txt)) {
+					return true;
+				} else {
+					TitlePart oldtp = tpart;
+					String backText = tpart.text;
+					tpart = (TitlePart)it.next();
+					if(tpart.isApp) {return false;}
+					backText += tpart.text;
+					if(backText.indexOf(txt)>-1) {
+						System.out.println("Found a text run that crosses boundaries of a non-app elements!\n");
+						System.out.println("Firs element: " + oldtp.associatedItem.toString());
+						System.out.println("Second element: " + tpart.associatedItem.toString());
+					}
+				}
+			}
+		}
+		return false;
+	}
+
+	public Object getItemAt(int ind)
+	{
+		if(parts == null) {return null;}
+		Iterator it = parts.iterator();
+		while(it.hasNext())
+		{
+			TitlePart tpart = (TitlePart)it.next();
+			int tpStart = tpart.start;
+			int tpEnd = tpart.end;
+			if(ind>=tpStart && ind<tpEnd) {
+				return tpart.associatedItem;
+			}
+		}
+		return null;
+	}
+
+	public String getTextAt(int ind)
+	{
+		if(parts == null) {return null;}
+		Iterator it = parts.iterator();
+		while(it.hasNext())
+		{
+			TitlePart tpart = (TitlePart)it.next();
+			int tpStart = tpart.start;
+			int tpEnd = tpart.end;
+			if(ind>=tpStart && ind<tpEnd) {
+				return tpart.text;
+			}
+		}
+		return null;
+	}
+
+	private String cleanString(String text)
+	{
+		if(text != null) {
+			text = text.replace('\n',' ');
+		} else {
+			text = "";
+			return text;
+		}
+
+		StringTokenizer parts = new StringTokenizer(text," ");
+		text = new String();
+		while(parts.hasMoreTokens()) {
+			String word = parts.nextToken();
+			if(word != null && !word.equals(" ")) {
+				text += word;
+				if(parts.hasMoreTokens()) { text += " "; }
+			}
+		}
+		return text;
+	}
+
+	public TitleParser(org.jdom.Element ttl)
+	{
+		setTitle(ttl);
+		clean();
+		parse();
+		setStyles();
+	}
+
+	protected class TitlePart
+	{
+		int start, end;
+		String text;
+		Object associatedItem;
+		boolean isApp;
+		boolean isText;
+
+		public void addSpace()
+		{
+			text = text + " ";
+			end = start + text.length();
+		}
+
+		public String toString()
+		{
+			String out = "\nText: " + text;
+			out += "\nStart: " + start;
+			out += "\nEnd: " + end;
+			out += "\nisApp: " + isApp;
+			out += "\nisText: " + isText;
+			out += "\nAssociatedItem: " + associatedItem.toString();
+			out += "\n---------end of item --------\n\n";
+			return out;
+		}
+
+		public TitlePart(String txt, int ind, Object ai, boolean ia)
+		{
+			text = txt;
+			start = ind;
+			end = start + text.length()-1;
+			associatedItem = ai;
+			isApp = ia;
+			if(associatedItem instanceof org.jdom.Text || associatedItem instanceof String) {
+				isText = true;
+			} else {
+				isText = false;
+			}
+		}
+	}
+}