From afd3a95a216774cdca31c5ac63e2a33c55fdc526 Mon Sep 17 00:00:00 2001 From: amontano Date: Fri, 13 Aug 2004 04:47:35 +0000 Subject: [PATCH] Updated the dictionary structure to allow grouping of dictionaries, this is the first step to try to clean up a bit the massive repetitions in dictionaries. --- source/org/thdl/tib/scanner/AcipToTab.java | 343 ++++++++++++++++++ .../thdl/tib/scanner/AppletScannerFilter.java | 16 +- .../thdl/tib/scanner/BinaryFileGenerator.java | 262 +++++++++++-- .../thdl/tib/scanner/BitDictionarySource.java | 171 +++++++++ .../tib/scanner/ByteDictionarySource.java | 307 ++++++++++++++++ .../tib/scanner/CachedSyllableListTree.java | 23 +- source/org/thdl/tib/scanner/Definitions.java | 68 ++-- .../thdl/tib/scanner/DictionarySource.java | 147 ++------ .../tib/scanner/DictionaryTableModel.java | 2 +- .../tib/scanner/FileSyllableListTree.java | 134 +++++-- .../thdl/tib/scanner/LocalTibetanScanner.java | 6 +- source/org/thdl/tib/scanner/Manipulate.java | 72 ++++ .../tib/scanner/MemorySyllableListTree.java | 8 + .../thdl/tib/scanner/OnLineScannerFilter.java | 98 ++--- .../thdl/tib/scanner/RemoteScannerFilter.java | 5 +- .../tib/scanner/RemoteTibetanScanner.java | 8 +- source/org/thdl/tib/scanner/ScannerPanel.java | 6 +- .../thdl/tib/scanner/SimpleScannerPanel.java | 56 ++- .../thdl/tib/scanner/SyllableListTree.java | 2 + .../org/thdl/tib/scanner/TibetanScanner.java | 72 +++- .../thdl/tib/scanner/WindowScannerFilter.java | 14 +- source/org/thdl/tib/scanner/Word.java | 10 + 22 files changed, 1533 insertions(+), 297 deletions(-) create mode 100644 source/org/thdl/tib/scanner/AcipToTab.java create mode 100644 source/org/thdl/tib/scanner/BitDictionarySource.java create mode 100644 source/org/thdl/tib/scanner/ByteDictionarySource.java diff --git a/source/org/thdl/tib/scanner/AcipToTab.java b/source/org/thdl/tib/scanner/AcipToTab.java new file mode 100644 index 0000000..9f9130f --- /dev/null +++ b/source/org/thdl/tib/scanner/AcipToTab.java @@ -0,0 +1,343 @@ +package org.thdl.tib.scanner; + +import java.net.*; +import java.io.*; + +class AcipToTab +{ + private BufferedReader in; + private PrintWriter out; + private String currentDefiniendum, currentDefinition; + + public AcipToTab(BufferedReader in, PrintWriter out) + { + this.in = in; + this.out = out; + } + + public void add() + { + out.println(currentDefiniendum + '\t' + currentDefinition); + } + + public static void main (String[] args) throws Exception + { + PrintWriter out; + BufferedReader in=null; + boolean file=false; + + switch (args.length) + { + case 0: out = new PrintWriter(System.out); + in = new BufferedReader(new InputStreamReader(System.in)); + break; + case 1: out = new PrintWriter(System.out); + file = true; + break; + default: out = new PrintWriter(new FileOutputStream(args[1])); + file = true; + } + + if (file) + { + if (args[0].indexOf("http://") >= 0) + in = new BufferedReader(new InputStreamReader(new BufferedInputStream((new URL(args[0])).openStream()))); + else + in = new BufferedReader(new InputStreamReader(new FileInputStream(args[0]))); + } + + new AcipToTab(in, out).run(); + } + + public void run() throws Exception + { + final short newDefiniendum=1, halfDefiniendum=2, definition=3; + short status=newDefiniendum; + int marker, len, marker2, n=0, total=0, currentPage=0, currentLine=1, pos; + char ch; + String entrada="", currentLetter="", temp="", lastDefiniendum="", lastWeirdDefiniendum=""; + boolean markerNotFound; + currentDefiniendum=""; + currentDefinition=""; + outAHere: + while (true) + { + entrada=in.readLine(); + if (entrada==null) break; + currentLine++; + + entrada = entrada.trim(); + len = entrada.length(); + if (len<=0) continue; + + // get page number + if (entrada.charAt(0)=='@') + { + marker = 1; + while(marker0) + { + currentPage=Integer.parseInt(temp); + if (currentPage==3141) + { + System.out.println("Hello!"); + } + } + if (marker0) n++; + lastDefiniendum=currentDefiniendum; + currentDefiniendum=""; + currentDefinition=""; + } + + marker=marker2=1; + markerNotFound=true; + + while (marker < len) + { + ch = entrada.charAt(marker); + switch(ch) + { + case '/': + markerNotFound=false; + marker2=marker+1; + break; + case '(': case '<': + markerNotFound=false; + marker2=marker; + break; + case 'g': // verify "g " + if (marker+10 && Manipulate.isVowel(entrada.charAt(pos-1)) && (markerNotFound || entrada.substring(0,pos+1).length() < entrada.substring(0, marker).trim().length())) + { + // out.println(currentPage + ": " + entrada); + n++; + }*/ + + /* either this is a definiendum that consists of several lines or + it is part of the last definition. */ + if (markerNotFound) + { + /* assume that the definiendum goes on to the next line. */ + currentDefiniendum = currentDefiniendum + " "; + status=halfDefiniendum; + } + else + { + // total++; + + currentDefiniendum = currentDefiniendum + entrada.substring(0,marker).trim(); + currentDefinition = "[" + currentPage + "] " + entrada.substring(marker2).trim(); + + status=definition; + + while (true) + { + entrada=in.readLine(); + + if (entrada==null) + { + // add here + add(); + + // if (new TibetanString(lastDefiniendum).compareTo(new TibetanString(currentDefiniendum))>0) n++; + break outAHere; + } + + currentLine++; + entrada = entrada.trim(); + + if (entrada.equals("")) break; + else + { + currentDefinition = currentDefinition + " " + entrada; + } + } + + } + } + else // last line did not start with the current letter, it must still be part of the definition + { + currentDefinition = currentDefinition + " " + entrada; + while (true) + { + entrada=in.readLine(); + + if (entrada==null) + { + // add here + add(); + + // if (new TibetanString(lastDefiniendum).compareTo(new TibetanString(currentDefiniendum))>0) n++; + break outAHere; + } + + currentLine++; + entrada = entrada.trim(); + + if (entrada.equals("")) break; + { + currentDefinition = currentDefinition + " " + entrada; + } + } + } + + } else // if first character was not a letter, it must still be part of definition + { + currentDefinition = currentDefinition + " " + entrada; + while (true) + { + entrada=in.readLine(); + + if (entrada==null) + { + // add here + add(); + + break outAHere; + } + + currentLine++; + entrada = entrada.trim(); + + if (entrada.equals("")) break; + else + { + currentDefinition = currentDefinition + " " + entrada; + } + } + } + } +// out.println(n + " / " + total); + out.flush(); + } +} \ No newline at end of file diff --git a/source/org/thdl/tib/scanner/AppletScannerFilter.java b/source/org/thdl/tib/scanner/AppletScannerFilter.java index a6dff66..6342e53 100644 --- a/source/org/thdl/tib/scanner/AppletScannerFilter.java +++ b/source/org/thdl/tib/scanner/AppletScannerFilter.java @@ -70,16 +70,13 @@ public class AppletScannerFilter extends JApplet implements ActionListener, Focu } diagAbout = null; - + // sp = new SimpleScannerPanel(url); - sp = new DuffScannerPanel(url); - + sp = new DuffScannerPanel(url); sp.addFocusListener(this); - setContentPane(sp); // setup the menu. Almost identical to WindowScannerFilter, but using swing. - JMenuBar mb = new JMenuBar(); mnuEdit = new JMenu ("Edit"); mnuCut = new JMenuItem("Cut"); @@ -107,13 +104,14 @@ public class AppletScannerFilter extends JApplet implements ActionListener, Focu mnuClear.addActionListener(this); mb.add(mnuEdit); - JMenu m = new JMenu("View"); + JMenu m; + + /* m = new JMenu("View"); tibScript = new JCheckBoxMenuItem("Tibetan Script", true); m.add(tibScript); tibScript.addItemListener(this); - mb.add(m); + mb.add(m);*/ - //JMenuItem aboutItem = new JMenuItem("About..."); aboutItem.addActionListener(this); @@ -128,7 +126,7 @@ public class AppletScannerFilter extends JApplet implements ActionListener, Focu SymComponent aSymComponent = new SymComponent(); this.addComponentListener(aSymComponent); //}} - + fakeFrame = new Frame(); if (!ThdlOptions.getBooleanOption(AboutDialog.windowAboutOption)) { diff --git a/source/org/thdl/tib/scanner/BinaryFileGenerator.java b/source/org/thdl/tib/scanner/BinaryFileGenerator.java index c147499..30aad8a 100644 --- a/source/org/thdl/tib/scanner/BinaryFileGenerator.java +++ b/source/org/thdl/tib/scanner/BinaryFileGenerator.java @@ -150,6 +150,8 @@ myglossary_uma.txt in the transliteration format explained above.
*/ public class BinaryFileGenerator extends SimplifiedLinkedList { + private static final int versionNumber = 3; + private long posHijos; private String sil, def[]; public final static int delimiterGeneric=0; @@ -158,7 +160,7 @@ public class BinaryFileGenerator extends SimplifiedLinkedList /** Number of dictionary. If 0, partial word (no definition). */ - private DictionarySource sourceDef; + private ByteDictionarySource sourceDef; public static RandomAccessFile wordRaf; private static RandomAccessFile defRaf; @@ -181,14 +183,16 @@ public class BinaryFileGenerator extends SimplifiedLinkedList { super(); int marker = sil.indexOf(" "); - this.sourceDef = new DictionarySource(); + + // fix for updates + this.sourceDef = new ByteDictionarySource(); if (marker<0) { this.sil = sil; this.def = new String[1]; this.def[0] = def; - this.sourceDef.add(numDef); + this.sourceDef.addNewDef(numDef); } else { @@ -208,9 +212,7 @@ public class BinaryFileGenerator extends SimplifiedLinkedList { final short newDefiniendum=1, halfDefiniendum=2, definition=3; short status=newDefiniendum; - int marker, len, marker2; -// int n=0; - int currentPage=0, currentLine=1; + int marker, len, marker2, currentPage=0, currentLine=1; char ch; BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(archivo))); String entrada="", s1="", s2="", currentLetter="", temp="", lastWeirdDefiniendum="", alternateWords[]; @@ -495,6 +497,11 @@ public class BinaryFileGenerator extends SimplifiedLinkedList s2 = Manipulate.deleteQuotes(entrada.substring(marker+delimiter.length()).trim()); if (!s2.equals("")) { + if (currentLine%5000==0) + { + System.out.println("Adding " + s1 + "..."); + System.out.flush(); + } marker2 = s1.indexOf(';'); if (marker2>0) { @@ -564,32 +571,234 @@ public class BinaryFileGenerator extends SimplifiedLinkedList } } } + + private void reGroup (int n) + { + int i, pos, posEnd; + + for (i=0; i=def[n].length()) + { + pos = def[i].indexOf(def[n]); + + // if it is the same String exactly + if (pos==0 && def[i].length()==def[n].length()) + { + if (i0 && !Character.isLetter(def[i].charAt(pos-1)))) && (posEnd==def[i].length() || !Character.isLetter(def[i].charAt(posEnd)))) + { + if(sourceDef.getDef(i).contains(sourceDef.getDef(n))) + { + def = Manipulate.deleteString(def, n); + sourceDef.deleteDef(n); + return; + } + + // else + sourceDef.addDictToDef(sourceDef.getDef(i), n); + + do + { + def[i] = Manipulate.replace(def[i], pos, posEnd, "*"); + pos = def[i].indexOf(def[n]); + posEnd = pos + def[n].length(); + } while ((pos==0 || (pos>0 && !Character.isLetter(def[i].charAt(pos-1)))) && (posEnd==def[i].length() || !Character.isLetter(def[i].charAt(posEnd)))); + + if (i0 && !Character.isLetter(def[n].charAt(pos-1)))) && (posEnd==def[n].length() || !Character.isLetter(def[n].charAt(posEnd)))) + { + if (sourceDef.getDef(n).contains(sourceDef.getDef(i))) + { + def = Manipulate.deleteString(def, i); + sourceDef.deleteDef(i); + i--; + continue; + } + + sourceDef.addDictToDef(sourceDef.getDef(n), i); + + do + { + def[n] = Manipulate.replace(def[n], pos, posEnd, "*"); + pos = def[n].indexOf(def[i]); + posEnd = pos + def[i].length(); + } while ((pos==0 || (pos>0 && !Character.isLetter(def[n].charAt(pos-1)))) && (posEnd==def[n].length() || !Character.isLetter(def[n].charAt(posEnd)))); + + i=-1; // start over + continue; + } + } + + // deal with repetition of dictionaries + + if (sourceDef.getDef(i).equals(sourceDef.getDef(n))) + { + if (i=def.length()) + { + pos = this.def[i].indexOf(def); + posEnd = pos + def.length(); + if ((pos==0 || (pos>0 && !Character.isLetter(this.def[i].charAt(pos-1)))) && (posEnd==this.def[i].length() || !Character.isLetter(this.def[i].charAt(posEnd)))) + { + if (!sourceDef.isDictInDef(numDef, i)) + { + if (this.def[i].length()>def.length()) + { + //temp = Manipulate.deleteSubstring(this.def[i], pos, posEnd); + temp = this.def[i]; + do + { + temp = Manipulate.replace(temp, pos, posEnd, "*"); + pos = temp.indexOf(def); + posEnd = pos + def.length(); + } while ((pos==0 || (pos>0 && !Character.isLetter(temp.charAt(pos-1)))) && (posEnd==temp.length() || !Character.isLetter(temp.charAt(posEnd)))); + + this.def[i] = def; + this.def = Manipulate.addString(this.def, temp, i+1); + sourceDef.dubDef(i); + sourceDef.addDictToDef(numDef, i); + + reGroup(i); + if (i+10 && !Character.isLetter(def.charAt(pos-1)))) && (posEnd==def.length() || !Character.isLetter(def.charAt(posEnd)))) + { + if (sourceDef.isDictInDefAlone(numDef, i)) + { + this.def[i] = def; + reGroup(i); + } + else + { + sourceDef.addDictToDef(numDef, i); + do + { + //def = Manipulate.deleteSubstring(def, pos, posEnd); + def = Manipulate.replace(def, pos, posEnd, "*"); + pos = def.indexOf(this.def[i]); + posEnd = pos + this.def[i].length(); + } while ((pos==0 || (pos>0 && !Character.isLetter(def.charAt(pos-1)))) && (posEnd==def.length() || !Character.isLetter(def.charAt(posEnd)))); + } + changed = true; + } + } + i++; + } + } while (changed); + + if (notAlreadyThere) + { + // check if it is a duplicate for the same dictionary. + i = sourceDef.containsAlone(numDef); + if (i>-1) + { + this.def[i] = this.def[i] + ". " + def; + reGroup(i); + } + else + { + this.def = Manipulate.addString(this.def, def, this.def.length); + sourceDef.addNewDef(numDef); + reGroup(this.def.length-1); } - newDef[i] = def; - this.def = newDef; - sourceDef.add(numDef); } } } @@ -617,8 +826,8 @@ public class BinaryFileGenerator extends SimplifiedLinkedList { try { - wordRaf.writeInt((int)defRaf.getFilePointer()); - defRaf.writeUTF(def[i]); + wordRaf.writeInt((int)defRaf.getFilePointer()); + defRaf.writeUTF(def[i]); } catch (Exception e) { @@ -681,6 +890,12 @@ public class BinaryFileGenerator extends SimplifiedLinkedList print(); wordRaf.writeInt((int)posHijos); + // write version marker + wordRaf.writeShort(-1); + wordRaf.writeByte(-1); + + // write version number + wordRaf.writeByte(versionNumber); } public static void main(String args[]) throws Exception @@ -754,11 +969,14 @@ public class BinaryFileGenerator extends SimplifiedLinkedList { delimiterType=delimiterDash; } + System.out.println("\nProcessing " + args[i] + "..."); sl.addFile(args[i] + ".txt", delimiterType, delimiter, n); n++; i++; } } } + System.out.println("Writing to file " + args[a] + "..."); + System.out.flush(); sl.generateDatabase(args[a]); } } diff --git a/source/org/thdl/tib/scanner/BitDictionarySource.java b/source/org/thdl/tib/scanner/BitDictionarySource.java new file mode 100644 index 0000000..7725d33 --- /dev/null +++ b/source/org/thdl/tib/scanner/BitDictionarySource.java @@ -0,0 +1,171 @@ +/* +The contents of this file are subject to the AMP Open Community License +Version 1.0 (the "License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License on the AMP web site +(http://www.tibet.iteso.mx/Guatemala/). + +Software distributed under the License is distributed on an "AS IS" basis, +WITHOUT WARRANTY OF ANY KIND, either express or implied. See the +License for the specific terms governing rights and limitations under the +License. + +The Initial Developer of this software is Andres Montano Pellegrini. Portions +created by Andres Montano Pellegrini are Copyright 2001 Andres Montano +Pellegrini. All Rights Reserved. + +Contributor(s): ______________________________________. +*/ +package org.thdl.tib.scanner; + +import java.io.*; + +/** Specifies a subset of dictionaries among a set of + dictionaries. Supports a maximum of 30 dictionaries. + + @author Andrés Montano Pellegrini +*/ +public class BitDictionarySource extends DictionarySource +{ + private int dicts; + + /** Last bit of word; 1 if there are more brothers.*/ + private static final int lastBit=1073741824; + private static final int allDicts=lastBit-1; + + public BitDictionarySource() + { + dicts = 0; + } + + public BitDictionarySource(int dicts) + { + this.dicts = dicts; + } + + public boolean equals(Object obj) + { + BitDictionarySource objB = (BitDictionarySource) obj; + return (this.getDicts()==objB.getDicts()); + } + + /** Returns an instance of DictionarySource with all dictionaries selected */ + public BitDictionarySource getAllDictionaries() + { + BitDictionarySource ds = new BitDictionarySource(); + ds.setDicts(allDicts); + return ds; + } + + /** Marks all dictionaries as selected */ + public void setAllDictionaries() + { + dicts = allDicts; + } + + /** Assumes dicts is an array of bits, and selects the dictionaries marked by + each bit. */ + public void setDicts(int dicts) + { + this.dicts = dicts; + } + + /** Returns an array of bits representing the selected dictionaries. */ + public int getDicts() + { + return dicts; + } + + private int getBits(int n) + { + return 1 << n; + } + + public boolean contains(int dict) + { + return (dicts & getBits(dict))>0; + } + + public boolean contains(BitDictionarySource dicts) + { + return this.intersection(dicts).equals(dicts); + } + + /** Marks the dictionary "dict" as selected */ + public void add(int dict) + { + dicts|= getBits(dict); + } + + public void add(BitDictionarySource dicts) + { + this.dicts|= dicts.dicts; + } + + /** Write to file using BinaryFileGenerator */ + public void print(boolean hasNext, DataOutput raf) throws IOException + { + int numDict; + if (hasNext) numDict = lastBit | dicts; + else numDict = dicts; + raf.writeInt(numDict); + } + + public void read(DataInput raf) throws IOException + { + setDicts(raf.readInt()); + } + + public boolean hasBrothers() + { + return (dicts & lastBit)>0; + } + + public int countDefs() + { + int n, source; + for (n=0, source = dicts & allDicts; source>0; source>>=1) + if (source%2==1) n++; + return n; + } + + public DictionarySource intersection(DictionarySource dsO) + { + BitDictionarySource ds = new BitDictionarySource(), dsOB = (BitDictionarySource) dsO; + ds.setDicts(this.dicts & dsOB.dicts); + return ds; + } + + /** Returns an array containing the indexes for the available dictionaries. Use this + method when you know exactly how many dictionaries there are! */ + public int[] untangleDefs(int n) + { + int arr[], i, pos, source; + arr = new int[n]; + for (i=0, pos=0, source=dicts & allDicts; pos>=1) + if (source%2==1) + arr[pos++]=i; + return arr; + } + + /** Returns an array containing the indexes for the available dictionaries.*/ + public int[] untangleDefs() + { + return untangleDefs(countDefs()); + } + + public boolean isEmpty() + { + return (dicts & allDicts)<=0; + } + + public void reset() + { + dicts = 0; + } + public String getTag(int i) + { + int source[] = this.untangleDefs(); + if (defTags==null) return Integer.toString(source[i]+1); + return defTags[source[i]]; + } +} \ No newline at end of file diff --git a/source/org/thdl/tib/scanner/ByteDictionarySource.java b/source/org/thdl/tib/scanner/ByteDictionarySource.java new file mode 100644 index 0000000..94fc31d --- /dev/null +++ b/source/org/thdl/tib/scanner/ByteDictionarySource.java @@ -0,0 +1,307 @@ +/* +The contents of this file are subject to the AMP Open Community License +Version 1.0 (the "License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License on the AMP web site +(http://www.tibet.iteso.mx/Guatemala/). + +Software distributed under the License is distributed on an "AS IS" basis, +WITHOUT WARRANTY OF ANY KIND, either express or implied. See the +License for the specific terms governing rights and limitations under the +License. + +The Initial Developer of this software is Andres Montano Pellegrini. Portions +created by Andres Montano Pellegrini are Copyright 2001 Andres Montano +Pellegrini. All Rights Reserved. + +Contributor(s): ______________________________________. +*/ +package org.thdl.tib.scanner; + +import java.io.*; + +/** Specifies a subset of dictionaries among a set of + dictionaries. Supports a maximum of 30 dictionaries. + Unlike @BitDictionarySource, it provides the infrastructure + to group definitions from various dictionaries. + + @author Andrés Montano Pellegrini +*/ +public class ByteDictionarySource extends DictionarySource +{ + //private BitDictionarySource dicts[]; + private BitDictionarySource dicts[]; + private boolean hasBrother; + + /** Last bit of word; 1 if there are more brothers.*/ + private static final int lastBit = 64; + private static final int allDicts=lastBit-1; + + public ByteDictionarySource() + { + dicts = null; + hasBrother = false; + } + + public ByteDictionarySource(BitDictionarySource dicts[], boolean hasBrother) + { + this.dicts = dicts; + this.hasBrother = hasBrother; + } + + public void insertDef(BitDictionarySource newDef, int n) + { + int i; + BitDictionarySource newDicts[] = new BitDictionarySource[dicts.length+1]; + + for (i=0; i0) + { + hasBrother = true; + n = n & allDicts; + } + else hasBrother = false; + + if (n==0) + { + dicts = null; + return; + } + + dicts = new BitDictionarySource[n]; + + for (i=0; i< dicts.length; i++) + { + dicts[i] = new BitDictionarySource(); + do + { + n = (int) raf.readByte(); + dicts[i].add(n & allDicts); + } while((n & lastBit)>0); + } + } + + public boolean hasBrothers() + { + return this.hasBrother; + } + + public boolean contains(int dict) + { + int i; + + if (dicts==null) return false; + + for (i=0; i0; - } - - public void add(int dict) - { - dicts|= getBits(dict); - } - - /** Write to file using BinaryFileGenerator */ - public void print(boolean hasNext, DataOutput raf) throws IOException - { - int numDict; - if (hasNext) numDict = lastBit | dicts; - else numDict = dicts; - raf.writeInt(numDict); - } - - public static DictionarySource read(DataInput raf) throws IOException - { - DictionarySource ds = new DictionarySource(); - ds.setDicts(raf.readInt()); - return ds; - } - - public boolean hasBrothers() - { - return (dicts & lastBit)>0; - } - - public int countDefs() - { - int n, source; - for (n=0, source = dicts & allDicts; source>0; source>>=1) - if (source%2==1) n++; - return n; - } - - public DictionarySource intersection(DictionarySource dsO) - { - DictionarySource ds = new DictionarySource(); - ds.setDicts(this.dicts & dsO.dicts); - return ds; - } - - public int[] untangleDefs(int n) - { - int arr[], i, pos, source; - arr = new int[n]; - for (i=0, pos=0, source=dicts & allDicts; pos>=1) - if (source%2==1) - arr[pos++]=i; - return arr; - } - - public int[] untangleDefs() - { - return untangleDefs(countDefs()); - } - - public boolean isEmpty() - { - return (dicts & allDicts)<=0; - } - - public void reset() - { - dicts = 0; - } -} + /** Writes the dictionary information to a random access file. */ + public abstract void print(boolean hasNext, DataOutput raf) throws IOException; + + /** Reads the dictionary information from a random access file, according + to the way it was written with @print. */ + public abstract void read(DataInput raf) throws IOException; + + /** Returns the number of definitions available. */ + public abstract int countDefs(); + + /** Returns true if the node has brothers. This is used by @FileSyllableListTree. */ + public abstract boolean hasBrothers(); + + /** Returns true if dict is a selected dictionary. */ + public abstract boolean contains(int dict); +} \ No newline at end of file diff --git a/source/org/thdl/tib/scanner/DictionaryTableModel.java b/source/org/thdl/tib/scanner/DictionaryTableModel.java index 18f2b6f..12e1596 100644 --- a/source/org/thdl/tib/scanner/DictionaryTableModel.java +++ b/source/org/thdl/tib/scanner/DictionaryTableModel.java @@ -64,7 +64,7 @@ public class DictionaryTableModel extends AbstractTableModel case 0: if (tibetanActivated) return arrayTibetan[row]; else return array[row].getWylie(); - case 1: return array[row].getDef(); + case 1: return array[row].getDefPreview(); default: return array[row].toString(); } } diff --git a/source/org/thdl/tib/scanner/FileSyllableListTree.java b/source/org/thdl/tib/scanner/FileSyllableListTree.java index 5b72071..98013ef 100644 --- a/source/org/thdl/tib/scanner/FileSyllableListTree.java +++ b/source/org/thdl/tib/scanner/FileSyllableListTree.java @@ -38,20 +38,24 @@ public class FileSyllableListTree implements SyllableListTree private long def[]; private long posLista; private DictionarySource defSource; - public static DictionarySource defSourcesWanted; + public static BitDictionarySource defSourcesWanted; public static RandomAccessFile wordRaf=null; private static RandomAccessFile defRaf=null; + public static int versionNumber; /** Creates the root */ public FileSyllableListTree(String archivo, int defSourcesWanted) throws Exception { sil = null; def = null; - this.defSource = new DictionarySource(); - openFiles(archivo); - posLista = wordRaf.length() - 4; - wordRaf.seek(posLista); - posLista = (long)wordRaf.readInt(); + defSource = null; + + this.openFiles(archivo); + posLista = this.wordRaf.getFilePointer(); + + /* if versionNumber is 2 use BitDictionarySource + else use ByteDictionarySource. */ + this.defSourcesWanted.setDicts(defSourcesWanted); } /** Used to create each node (except the root) @@ -73,12 +77,61 @@ public class FileSyllableListTree implements SyllableListTree { return defSource; } + + public BitDictionarySource getDictionarySourcesWanted() + { + return this.defSourcesWanted; + } public static void openFiles(String archivo) throws Exception { + long fileSize; + int pos; + wordRaf = new RandomAccessFile(archivo + ".wrd", "r"); defRaf = new RandomAccessFile(archivo + ".def", "r"); - defSourcesWanted = DictionarySource.getAllDictionaries(); + + fileSize = wordRaf.length(); + wordRaf.seek(fileSize-4L); + pos = wordRaf.readInt(); + + if (pos >> 8 == -1) + { + versionNumber = pos & 255; + + // for now, only version 2 & 3 should be expected + if (versionNumber != 3) versionNumber=2; + wordRaf.seek(fileSize-8L); + pos = wordRaf.readInt(); + } + else + { + // Updates the dictionary for backward compatibility. + try + { + wordRaf.close(); + wordRaf = new RandomAccessFile(archivo + ".wrd", "rw"); + wordRaf.seek(fileSize); + wordRaf.writeShort(-1); + wordRaf.writeByte(-1); + + // Because it didn't have a version number, must be version 2. + versionNumber = 2; + wordRaf.writeByte(versionNumber); + wordRaf.close(); + wordRaf = new RandomAccessFile(archivo + ".wrd", "r"); + } + catch (Exception e) + { + // dictionary is stored on a non-writable media. Do nothing. + } + } + + /* if versionNumber is 2 use BitDictionarySource else use + ByteDictionarySource. */ + defSourcesWanted = new BitDictionarySource(); + + wordRaf.seek(pos); } public String getDef() @@ -89,28 +142,54 @@ public class FileSyllableListTree implements SyllableListTree public Definitions getDefs() { if (def==null) return null; - DictionarySource defSourceAvail = defSource.intersection(defSourcesWanted); - - int defsAvail[] = defSourceAvail.untangleDefs(), defsFound[] = defSource.untangleDefs(def.length); - - - String defs[] = new String[defsAvail.length]; + DictionarySource defSourceAvail = defSource.intersection(defSourcesWanted); + String defs[]; int i, n=0; - try + + if (versionNumber==2) { - for (i=0; i" + dictionaries[i] + " (" + Definitions.defTags[i] + ")   "); + out.print(">" + dictionaries[i] + " (" + DictionarySource.defTags[i] + ")   "); else - out.print(">" + Definitions.defTags[i] + "   "); + out.print(">" + DictionarySource.defTags[i] + "   "); // out.println(" + ""); } // out.println(" "); } - else ds = DictionarySource.getAllDictionaries(); + // fix for updates + else ds = new BitDictionarySource().getAllDictionaries(); // out.println(""); out.println("

"); out.println(""); @@ -300,36 +300,40 @@ public class OnLineScannerFilter extends HttpServlet init = fin+1; } */ + scanner.clearTokens(); scanner.scanBody(in); scanner.finishUp(); - words = scanner.getTokenArray(); - printText(pw, words, tibetan); - printAllDefs(pw, words, tibetan); + printText(pw, tibetan); + printAllDefs(pw, tibetan); scanner.clearTokens(); } } - public void printText(PrintWriter pw, Object words[], boolean tibetan) + public void printText(PrintWriter pw, boolean tibetan) { - Token token; + Token words[] = scanner.getTokenArray(); Word word; char pm; int i; + if (words==null) return; + pw.print("

"); for (i=0; i < words.length; i++) { - token = (Token) words[i]; - if (token instanceof Word) + + if (words[i] instanceof Word) { - word = (Word) token; - pw.print(word.getLink()); + word = (Word) words[i]; + if (word.getDefs().getDictionarySource()!=null) + pw.print(word.getLink()); + else pw.print(word.getWylie() + " "); } else { - if (token instanceof PunctuationMark) + if (words[i] instanceof PunctuationMark) { - pm = token.toString().charAt(0); + pm = words[i].toString().charAt(0); switch (pm) { case '\n': @@ -352,53 +356,49 @@ public class OnLineScannerFilter extends HttpServlet pw.println("

"); } - public void printAllDefs(PrintWriter pw, Object words[], boolean tibetan) + public void printAllDefs(PrintWriter pw, boolean tibetan) { - SimplifiedLinkedList temp = new SimplifiedLinkedList(); - int i; - Word word; + int i, j; + Word words[]; Definitions defs; - - for (i=words.length-1; i >= 0; i--) - { - if (words[i] instanceof Word) - { - if (!temp.contains(words[i])) - { - temp.addLast(words[i]); - } - } - } - - SimplifiedListIterator li = temp.listIterator(); String tag; + DictionarySource ds; + + words = scanner.getWordArray(false); + + if (words == null) return; + pw.println("
"); - while (li.hasNext()) + + for (j=0; j"); - tag = defs.getTag(0); - if (tag!=null) - { - pw.println(" "); - pw.println(" "); - pw.println(" "); - } + tag = ds.getTag(0); + // else tag = null; + /*if (tag!=null) + {*/ + pw.println(" "); + pw.println(" "); + pw.println(" "); + /*} else { - pw.println(" "); + pw.println(" "); pw.println(" "); - } + }*/ pw.println(" "); for (i=1; i"); - tag = defs.getTag(i); + if (ds!=null) tag = ds.getTag(i); + else tag = null; if (tag!=null) { - pw.println(" "); - pw.println(" "); + pw.println(" "); + pw.println(" "); } else pw.println(" "); pw.println(" "); diff --git a/source/org/thdl/tib/scanner/RemoteScannerFilter.java b/source/org/thdl/tib/scanner/RemoteScannerFilter.java index 8824f32..b5fa9f3 100644 --- a/source/org/thdl/tib/scanner/RemoteScannerFilter.java +++ b/source/org/thdl/tib/scanner/RemoteScannerFilter.java @@ -35,7 +35,7 @@ import javax.servlet.http.*; public class RemoteScannerFilter extends GenericServlet { private TibetanScanner scanner; - private DictionarySource ds; + private BitDictionarySource ds; public RemoteScannerFilter() throws Exception { @@ -53,6 +53,7 @@ public class RemoteScannerFilter extends GenericServlet PrintWriter out = res.getWriter(); int i; String linea, dicts = req.getParameter("dicts"), dicDescrip[]; + if (dicts!=null) { if (dicts.equals("names")) @@ -66,7 +67,7 @@ public class RemoteScannerFilter extends GenericServlet for (i=0; ilenPreview) preview = preview.substring(0,lenPreview); listDefs.add(preview); } diff --git a/source/org/thdl/tib/scanner/SyllableListTree.java b/source/org/thdl/tib/scanner/SyllableListTree.java index 15101c6..bf2ac9b 100644 --- a/source/org/thdl/tib/scanner/SyllableListTree.java +++ b/source/org/thdl/tib/scanner/SyllableListTree.java @@ -39,5 +39,7 @@ public interface SyllableListTree public Definitions getDefs(); public boolean hasDef(); public SyllableListTree lookUp(String silStr); + public DictionarySource getDictionarySource(); + public BitDictionarySource getDictionarySourcesWanted(); } \ No newline at end of file diff --git a/source/org/thdl/tib/scanner/TibetanScanner.java b/source/org/thdl/tib/scanner/TibetanScanner.java index f447118..a75395b 100644 --- a/source/org/thdl/tib/scanner/TibetanScanner.java +++ b/source/org/thdl/tib/scanner/TibetanScanner.java @@ -25,7 +25,7 @@ import org.thdl.util.*; */ public abstract class TibetanScanner { - public static final String version = "The Tibetan to English Translation Tool, version 2.2.2 compiled on " + ThdlVersion.getTimeOfCompilation() + ". "; + public static final String version = "The Tibetan to English Translation Tool, version 3.0.0 compiled on " + ThdlVersion.getTimeOfCompilation() + ". "; public static final String copyrightUnicode="Copyright " + '\u00A9' + " 2000-2004 by Andr" + '\u00E9' + "s Montano Pellegrini, all rights reserved."; public static final String copyrightASCII="Copyright 2000-2004 by Andres Montano Pellegrini, all rights reserved."; public static final String copyrightHTML="
" + version + "Copyright © 2000-2004 by Andrés Montano Pellegrini.
All rights reserved.
"; @@ -293,6 +293,7 @@ public abstract class TibetanScanner public Token[] getTokenArray() { int n=wordList.size(); + if (n==0) return null; Token token[] = new Token[n]; SimplifiedListIterator li = wordList.listIterator(); while(li.hasNext()) @@ -306,35 +307,76 @@ public abstract class TibetanScanner } public Word[] getWordArray() + { + return getWordArray(true); + } + + public Word[] getWordArray(boolean includeRepeated) { Token token; - Word array[]; + Word array[], word; int n=0; SimplifiedListIterator li = wordList.listIterator(); - while(li.hasNext()) - if (li.next() instanceof Word) n++; - - if (n==0) return null; - - array = new Word[n]; - n--; - li = wordList.listIterator(); + SimplifiedLinkedList ll2, ll = new SimplifiedLinkedList(); + while(li.hasNext()) { token = (Token) li.next(); - if (token instanceof Word) - { - array[n] = (Word) token; - n--; + + if (token instanceof Word) + { + ll.addLast(token); } } + if (includeRepeated) + { + n = ll.size(); + + if (n==0) return null; + + array = new Word[n]; + li = ll.listIterator(); + + n=0; + while (li.hasNext()) + { + array[n++] = (Word) li.next(); + } + } + else + { + ll2 = new SimplifiedLinkedList(); + li = ll.listIterator(); + + while(li.hasNext()) + { + word = (Word) li.next(); + + if (!ll2.contains(word)) ll2.addLast(word); + } + + n = ll2.size(); + + if (n==0) return null; + + array = new Word[n]; + li = ll2.listIterator(); + + while (li.hasNext()) + { + array[--n] = (Word) li.next(); + } + } + + + return array; } public abstract void scanLine(String linea); public abstract void scanBody(String linea); public abstract void finishUp(); - public abstract DictionarySource getDictionarySource(); + public abstract BitDictionarySource getDictionarySource(); public abstract String[] getDictionaryDescriptions(); } diff --git a/source/org/thdl/tib/scanner/WindowScannerFilter.java b/source/org/thdl/tib/scanner/WindowScannerFilter.java index b7ff1b7..4a20817 100644 --- a/source/org/thdl/tib/scanner/WindowScannerFilter.java +++ b/source/org/thdl/tib/scanner/WindowScannerFilter.java @@ -189,10 +189,12 @@ public class WindowScannerFilter implements WindowListener, FocusListener, Actio mnuDicts = new CheckboxMenuItem("Dictionaries", false); m.add(mnuDicts); mnuDicts.addItemListener(this); + mb.add(m); } - else + + m = new Menu("Help"); + if (!pocketpc) { - m = new Menu("Help"); for (int i = 0; i < DuffScannerPanel.keybdMgr.size(); i++) { final JskadKeyboard kbd = DuffScannerPanel.keybdMgr.elementAt(i); @@ -217,10 +219,10 @@ public class WindowScannerFilter implements WindowListener, FocusListener, Actio } } m.add("-"); - mnuAbout = new MenuItem("About..."); - m.add(mnuAbout); - mnuAbout.addActionListener(this); - } + } + mnuAbout = new MenuItem("About..."); + m.add(mnuAbout); + mnuAbout.addActionListener(this); mb.add(m); // disable menus diff --git a/source/org/thdl/tib/scanner/Word.java b/source/org/thdl/tib/scanner/Word.java index b8821d1..c1384d1 100644 --- a/source/org/thdl/tib/scanner/Word.java +++ b/source/org/thdl/tib/scanner/Word.java @@ -76,6 +76,16 @@ public class Word extends Token public String getDef() { return def.toString(); + } + + public String getDefPreview() + { + return def.getPreview(); + } + + public String getWordDefPreview() + { + return super.token + " - " + getDefPreview(); } public Definitions getDefs()
"+ word.getBookmark(tibetan) +""+ tag +"" + defs.def[0] + ""+ words[j].getBookmark(tibetan) +""+ tag +"" + defs.def[0] + ""+ word.getBookmark(tibetan) +""+ words[j].getBookmark(tibetan) +"" + defs.def[0] + "
"+ tag +"" + defs.def[i] + ""+ tag +"" + defs.def[i] + "" + defs.def[i] + "