Added the initial version of Tibbibl, which Nathaniel Garson of UVa
e-mailed to me. Tibbibl is an editor for XML-based bibliographies of Tibetan texts. All I did was change the package from org.thdl.xml to org.thdl.tib.bibl and add boilerplate; no changes to Than's code were made. Tibbibl features a diacritic input tool which Jskad might want to swipe.
This commit is contained in:
parent
32a08c06c3
commit
72ee4fc7d2
28 changed files with 10248 additions and 3 deletions
352
source/org/thdl/tib/bibl/TitleParser.java
Normal file
352
source/org/thdl/tib/bibl/TitleParser.java
Normal file
|
@ -0,0 +1,352 @@
|
|||
/*
|
||||
The contents of this file are subject to the THDL Open Community License
|
||||
Version 1.0 (the "License"); you may not use this file except in compliance
|
||||
with the License. You may obtain a copy of the License on the THDL web site
|
||||
(http://www.thdl.org/).
|
||||
|
||||
Software distributed under the License is distributed on an "AS IS" basis,
|
||||
WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
|
||||
License for the specific terms governing rights and limitations under the
|
||||
License.
|
||||
|
||||
The Initial Developer of this software is the Tibetan and Himalayan Digital
|
||||
Library (THDL). Portions created by the THDL are Copyright 2001-2003 THDL.
|
||||
All Rights Reserved.
|
||||
|
||||
Contributor(s): ______________________________________.
|
||||
*/
|
||||
|
||||
package org.thdl.tib.bibl;
|
||||
|
||||
import java.util.*;
|
||||
import org.jdom.*;
|
||||
import org.jdom.input.*;
|
||||
import org.jdom.output.*;
|
||||
|
||||
/**
|
||||
*
|
||||
* This class provides the mechanism to "parse" a Tibbibl title tag and understand its contents. Some title tags
|
||||
* will contain AP children that represent variant readings. TitleParser can among other things supply the text
|
||||
* of the main readings in the title, weeding out the alternative one.
|
||||
*/
|
||||
|
||||
public class TitleParser implements TibConstants
|
||||
{
|
||||
Object item;
|
||||
String stringItem, outText, titleText;
|
||||
org.jdom.Element title, foreign, contentItem;
|
||||
org.jdom.Text textItem;
|
||||
Vector parts, styleElements, toRemove;
|
||||
|
||||
private void clean()
|
||||
{
|
||||
/**
|
||||
* <p>
|
||||
* This method cleans the title element of consequetive Text elements so
|
||||
* that the {@link #isAppInsertable} method will not mistakenly think that
|
||||
* an app cannot be inserted since it crosses a part boundary. The two consequetive
|
||||
* jdom.org.Text elements will be combined and the second one removed.
|
||||
* </p>
|
||||
*/
|
||||
org.jdom.Text lastText, textElement;
|
||||
Iterator it = title.getContent().iterator();
|
||||
Vector toRemove = new Vector();
|
||||
lastText = null;
|
||||
while(it.hasNext())
|
||||
{
|
||||
Object item = it.next();
|
||||
if(item instanceof org.jdom.Text) {
|
||||
if(lastText == null) {
|
||||
lastText = (org.jdom.Text)item;
|
||||
} else {
|
||||
String lastTextString = lastText.getText();
|
||||
textElement = (org.jdom.Text)item;
|
||||
String thisTextString = textElement.getText();
|
||||
if(!lastTextString.endsWith(" ") &&
|
||||
!thisTextString.startsWith(" "))
|
||||
{
|
||||
thisTextString += " ";
|
||||
}
|
||||
lastText.setText(lastTextString + thisTextString);
|
||||
toRemove.add(item);
|
||||
}
|
||||
} else {
|
||||
lastText = null;
|
||||
}
|
||||
}
|
||||
for(it = toRemove.iterator(); it.hasNext();)
|
||||
{
|
||||
title.getContent().remove(it.next());
|
||||
}
|
||||
}
|
||||
|
||||
private void parse()
|
||||
{
|
||||
/**
|
||||
* <p>
|
||||
* This method parses the title by finding its constituent parts and creating
|
||||
* a {@link TitleParser.TitlePart} out of them and adding each TitlePart to
|
||||
* the vector {@link #parts}. The title parts contain the text for display, its
|
||||
* beginning and end indexes, and the corresponding JDOM part, whether it be
|
||||
* an element, text, or string.
|
||||
* </p>
|
||||
*/
|
||||
parts = new Vector();
|
||||
outText = new String();
|
||||
Iterator it = title.getContent().iterator();
|
||||
TitlePart prevPart = null;
|
||||
|
||||
while(it.hasNext())
|
||||
{
|
||||
boolean isApp = false;
|
||||
item = it.next();
|
||||
String stringItem = new String();
|
||||
if(item instanceof org.jdom.Element) {
|
||||
contentItem = (org.jdom.Element)item;
|
||||
if(contentItem.getName().equals(AP)) {
|
||||
stringItem = cleanString(contentItem.getChild(LM).getText());
|
||||
isApp = true;
|
||||
} else if(contentItem.getName().equals(FOREIGN)) {
|
||||
foreign = contentItem;
|
||||
continue;
|
||||
} else {
|
||||
stringItem = cleanString(contentItem.getText());
|
||||
System.out.println("Found an inner element that's not ap or foreign: " + contentItem.toString());
|
||||
}
|
||||
} else if(item instanceof org.jdom.Text) {
|
||||
textItem = (org.jdom.Text)item;
|
||||
stringItem = cleanString(textItem.getText());
|
||||
} else if(item instanceof String) {
|
||||
stringItem = cleanString((String)item);
|
||||
} else {
|
||||
System.out.println("A kind of title content found that is not recognized!\n" +
|
||||
item.toString());
|
||||
continue;
|
||||
}
|
||||
|
||||
if(!outText.endsWith(" ") && !stringItem.startsWith(" ")) {
|
||||
if(isApp) {
|
||||
TitlePart tpart = (TitlePart)parts.lastElement();
|
||||
tpart.addSpace();
|
||||
outText += " ";
|
||||
} else {
|
||||
stringItem = " " + stringItem;
|
||||
}
|
||||
}
|
||||
int start = outText.length()+1;
|
||||
outText += stringItem;
|
||||
parts.add(new TitlePart(stringItem,start,item,isApp));
|
||||
prevPart = (TitlePart)parts.lastElement();
|
||||
}
|
||||
|
||||
titleText = cleanString(outText);
|
||||
}
|
||||
|
||||
private void setStyles()
|
||||
{
|
||||
ElementStyle es;
|
||||
styleElements = new Vector();
|
||||
|
||||
Iterator it = parts.iterator();
|
||||
while(it.hasNext())
|
||||
{
|
||||
es = null;
|
||||
TitlePart tpart = (TitlePart)it.next();
|
||||
String text = tpart.text;
|
||||
if(tpart.isApp) {
|
||||
org.jdom.Element e = (org.jdom.Element)tpart.associatedItem;
|
||||
es = new ElementStyle(text,TextPane.VARIANT,e);
|
||||
} else {
|
||||
es = new ElementStyle(text,TextPane.REG,title);
|
||||
}
|
||||
styleElements.add(es);
|
||||
}
|
||||
if(styleElements.size()>0) {
|
||||
try {
|
||||
es = (ElementStyle)styleElements.firstElement();
|
||||
if(es.getText().equals(" ")) {styleElements.remove(es);}
|
||||
es = (ElementStyle)styleElements.lastElement();
|
||||
if(es.getText().equals(" ")) {styleElements.remove(es);}
|
||||
} catch (NoSuchElementException nsee) {}
|
||||
}
|
||||
}
|
||||
|
||||
public void setTitle(org.jdom.Element ttl)
|
||||
{
|
||||
title = ttl;
|
||||
parse();
|
||||
setStyles();
|
||||
}
|
||||
|
||||
public org.jdom.Element getTitle()
|
||||
{
|
||||
return title;
|
||||
}
|
||||
|
||||
public String getTitleText()
|
||||
{
|
||||
return titleText;
|
||||
}
|
||||
|
||||
public org.jdom.Element getTranslation()
|
||||
{
|
||||
return foreign;
|
||||
}
|
||||
|
||||
public Vector getStyles()
|
||||
{
|
||||
if(styleElements.size()==0) {
|
||||
styleElements.add(new ElementStyle("",TextPane.REG,title));
|
||||
}
|
||||
return styleElements;
|
||||
}
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* This method checks the parts of the title to see if an app (or apparatus) element is insertable. The
|
||||
* main thing it checks for is that the range of text does not span over an already existing app.
|
||||
* </p>
|
||||
*
|
||||
* @param startInsert the insertion point within the display of the titles text relative to the beginning of the line.
|
||||
*
|
||||
* @param txt the selected text which is to be marked up with the app. This is necessary to make sure that
|
||||
* the text does not fall completely within an already existing app.
|
||||
*
|
||||
* @return boolean - whether or not this is an acceptable place to insert an app element.
|
||||
*/
|
||||
public boolean isAppInsertable(int startInsert, String txt)
|
||||
{
|
||||
|
||||
if(parts == null) {return false;}
|
||||
Iterator it = parts.iterator();
|
||||
while(it.hasNext())
|
||||
{
|
||||
TitlePart tpart = (TitlePart)it.next();
|
||||
|
||||
int tpStart = tpart.start;
|
||||
int tpEnd = tpart.end;
|
||||
if(startInsert>=tpStart && startInsert<tpEnd) {
|
||||
if(tpart.isApp) {
|
||||
return false;
|
||||
}
|
||||
if(tpart.text.indexOf(txt)>-1 || tpart.text.equals(txt)) {
|
||||
return true;
|
||||
} else {
|
||||
TitlePart oldtp = tpart;
|
||||
String backText = tpart.text;
|
||||
tpart = (TitlePart)it.next();
|
||||
if(tpart.isApp) {return false;}
|
||||
backText += tpart.text;
|
||||
if(backText.indexOf(txt)>-1) {
|
||||
System.out.println("Found a text run that crosses boundaries of a non-app elements!\n");
|
||||
System.out.println("Firs element: " + oldtp.associatedItem.toString());
|
||||
System.out.println("Second element: " + tpart.associatedItem.toString());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
public Object getItemAt(int ind)
|
||||
{
|
||||
if(parts == null) {return null;}
|
||||
Iterator it = parts.iterator();
|
||||
while(it.hasNext())
|
||||
{
|
||||
TitlePart tpart = (TitlePart)it.next();
|
||||
int tpStart = tpart.start;
|
||||
int tpEnd = tpart.end;
|
||||
if(ind>=tpStart && ind<tpEnd) {
|
||||
return tpart.associatedItem;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
public String getTextAt(int ind)
|
||||
{
|
||||
if(parts == null) {return null;}
|
||||
Iterator it = parts.iterator();
|
||||
while(it.hasNext())
|
||||
{
|
||||
TitlePart tpart = (TitlePart)it.next();
|
||||
int tpStart = tpart.start;
|
||||
int tpEnd = tpart.end;
|
||||
if(ind>=tpStart && ind<tpEnd) {
|
||||
return tpart.text;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
private String cleanString(String text)
|
||||
{
|
||||
if(text != null) {
|
||||
text = text.replace('\n',' ');
|
||||
} else {
|
||||
text = "";
|
||||
return text;
|
||||
}
|
||||
|
||||
StringTokenizer parts = new StringTokenizer(text," ");
|
||||
text = new String();
|
||||
while(parts.hasMoreTokens()) {
|
||||
String word = parts.nextToken();
|
||||
if(word != null && !word.equals(" ")) {
|
||||
text += word;
|
||||
if(parts.hasMoreTokens()) { text += " "; }
|
||||
}
|
||||
}
|
||||
return text;
|
||||
}
|
||||
|
||||
public TitleParser(org.jdom.Element ttl)
|
||||
{
|
||||
setTitle(ttl);
|
||||
clean();
|
||||
parse();
|
||||
setStyles();
|
||||
}
|
||||
|
||||
protected class TitlePart
|
||||
{
|
||||
int start, end;
|
||||
String text;
|
||||
Object associatedItem;
|
||||
boolean isApp;
|
||||
boolean isText;
|
||||
|
||||
public void addSpace()
|
||||
{
|
||||
text = text + " ";
|
||||
end = start + text.length();
|
||||
}
|
||||
|
||||
public String toString()
|
||||
{
|
||||
String out = "\nText: " + text;
|
||||
out += "\nStart: " + start;
|
||||
out += "\nEnd: " + end;
|
||||
out += "\nisApp: " + isApp;
|
||||
out += "\nisText: " + isText;
|
||||
out += "\nAssociatedItem: " + associatedItem.toString();
|
||||
out += "\n---------end of item --------\n\n";
|
||||
return out;
|
||||
}
|
||||
|
||||
public TitlePart(String txt, int ind, Object ai, boolean ia)
|
||||
{
|
||||
text = txt;
|
||||
start = ind;
|
||||
end = start + text.length()-1;
|
||||
associatedItem = ai;
|
||||
isApp = ia;
|
||||
if(associatedItem instanceof org.jdom.Text || associatedItem instanceof String) {
|
||||
isText = true;
|
||||
} else {
|
||||
isText = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue