2002-10-03 19:28:09 +00:00
|
|
|
/*
|
|
|
|
The contents of this file are subject to the AMP Open Community License
|
|
|
|
Version 1.0 (the "License"); you may not use this file except in compliance
|
|
|
|
with the License. You may obtain a copy of the License on the AMP web site
|
|
|
|
(http://www.tibet.iteso.mx/Guatemala/).
|
|
|
|
|
|
|
|
Software distributed under the License is distributed on an "AS IS" basis,
|
|
|
|
WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
|
|
|
|
License for the specific terms governing rights and limitations under the
|
|
|
|
License.
|
|
|
|
|
|
|
|
The Initial Developer of this software is Andres Montano Pellegrini. Portions
|
|
|
|
created by Andres Montano Pellegrini are Copyright 2001 Andres Montano
|
|
|
|
Pellegrini. All Rights Reserved.
|
|
|
|
|
|
|
|
Contributor(s): ______________________________________.
|
|
|
|
*/
|
|
|
|
package org.thdl.tib.scanner;
|
|
|
|
|
|
|
|
import java.io.*;
|
2005-02-06 23:19:44 +00:00
|
|
|
import org.thdl.util.*;
|
2002-10-03 19:28:09 +00:00
|
|
|
|
2002-10-06 18:23:27 +00:00
|
|
|
/** Searches the words directly in a file; not the preferred
|
2002-10-03 19:28:09 +00:00
|
|
|
implementation. The search is too slow!
|
2002-11-03 08:56:11 +00:00
|
|
|
The preferred implementation is the {@link CachedSyllableListTree}.
|
2002-10-03 19:28:09 +00:00
|
|
|
|
|
|
|
<p>The words must be stored in a binary file tree structure format.
|
2002-11-03 08:56:11 +00:00
|
|
|
This can be done using the {@link BinaryFileGenerator}.</p>
|
2002-10-03 19:28:09 +00:00
|
|
|
|
|
|
|
@author Andrés Montano Pellegrini
|
|
|
|
@see TibetanScanner
|
2002-10-06 18:23:27 +00:00
|
|
|
@see CachedSyllableListTree
|
2002-10-03 19:28:09 +00:00
|
|
|
@see BinaryFileGenerator
|
|
|
|
*/
|
|
|
|
|
|
|
|
public class FileSyllableListTree implements SyllableListTree
|
|
|
|
{
|
2005-02-06 23:19:44 +00:00
|
|
|
protected String sil;
|
2002-10-03 19:28:09 +00:00
|
|
|
private long def[];
|
2005-02-06 23:19:44 +00:00
|
|
|
protected long posLista;
|
|
|
|
protected DictionarySource defSource;
|
2004-08-13 04:47:35 +00:00
|
|
|
public static BitDictionarySource defSourcesWanted;
|
2002-10-03 19:28:09 +00:00
|
|
|
public static RandomAccessFile wordRaf=null;
|
|
|
|
private static RandomAccessFile defRaf=null;
|
2004-08-13 04:47:35 +00:00
|
|
|
public static int versionNumber;
|
2002-10-03 19:28:09 +00:00
|
|
|
|
2005-02-06 23:19:44 +00:00
|
|
|
/** Creates the root. */
|
|
|
|
public FileSyllableListTree(String archivo) throws Exception
|
2002-10-03 19:28:09 +00:00
|
|
|
{
|
|
|
|
sil = null;
|
|
|
|
def = null;
|
2004-08-13 04:47:35 +00:00
|
|
|
defSource = null;
|
|
|
|
|
|
|
|
this.openFiles(archivo);
|
|
|
|
posLista = this.wordRaf.getFilePointer();
|
2002-10-03 19:28:09 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/** Used to create each node (except the root)
|
|
|
|
*/
|
2005-02-06 23:19:44 +00:00
|
|
|
protected FileSyllableListTree(String sil, long []def, DictionarySource defSource, long posLista)
|
2002-10-03 19:28:09 +00:00
|
|
|
{
|
|
|
|
this.sil=sil;
|
|
|
|
this.def=def;
|
|
|
|
this.defSource = defSource;
|
|
|
|
this.posLista=posLista;
|
|
|
|
}
|
|
|
|
|
|
|
|
public String toString()
|
|
|
|
{
|
|
|
|
return sil;
|
|
|
|
}
|
|
|
|
|
|
|
|
public DictionarySource getDictionarySource()
|
|
|
|
{
|
|
|
|
return defSource;
|
|
|
|
}
|
2004-08-13 04:47:35 +00:00
|
|
|
|
|
|
|
public BitDictionarySource getDictionarySourcesWanted()
|
|
|
|
{
|
|
|
|
return this.defSourcesWanted;
|
|
|
|
}
|
2005-01-23 00:57:02 +00:00
|
|
|
|
2002-10-03 19:28:09 +00:00
|
|
|
public static void openFiles(String archivo) throws Exception
|
2005-01-23 00:57:02 +00:00
|
|
|
{
|
|
|
|
openFiles(archivo, true);
|
|
|
|
}
|
|
|
|
|
|
|
|
public static void closeFiles()
|
|
|
|
{
|
|
|
|
try
|
|
|
|
{
|
|
|
|
wordRaf.close();
|
|
|
|
defRaf.close();
|
|
|
|
}
|
|
|
|
catch (Exception e)
|
|
|
|
{
|
|
|
|
e.printStackTrace();
|
|
|
|
}
|
|
|
|
}
|
2005-02-06 23:19:44 +00:00
|
|
|
|
|
|
|
/** Initiates all static variables, it is called by the constructor of the root
|
|
|
|
FileSyllableListTree (in the case of a pure file tree) or by the
|
|
|
|
constructor of CachedSyllableListTree in the case of the root being loaded
|
|
|
|
into memory.
|
|
|
|
*/
|
2005-01-23 00:57:02 +00:00
|
|
|
public static void openFiles(String archivo, boolean backwardCompatible) throws Exception
|
2002-10-03 19:28:09 +00:00
|
|
|
{
|
2004-08-13 04:47:35 +00:00
|
|
|
long fileSize;
|
|
|
|
int pos;
|
|
|
|
|
2002-10-03 19:28:09 +00:00
|
|
|
wordRaf = new RandomAccessFile(archivo + ".wrd", "r");
|
|
|
|
defRaf = new RandomAccessFile(archivo + ".def", "r");
|
2004-08-13 04:47:35 +00:00
|
|
|
|
|
|
|
fileSize = wordRaf.length();
|
|
|
|
wordRaf.seek(fileSize-4L);
|
|
|
|
pos = wordRaf.readInt();
|
|
|
|
|
|
|
|
if (pos >> 8 == -1)
|
|
|
|
{
|
|
|
|
versionNumber = pos & 255;
|
|
|
|
|
|
|
|
// for now, only version 2 & 3 should be expected
|
|
|
|
if (versionNumber != 3) versionNumber=2;
|
|
|
|
wordRaf.seek(fileSize-8L);
|
|
|
|
pos = wordRaf.readInt();
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
// Updates the dictionary for backward compatibility.
|
|
|
|
try
|
|
|
|
{
|
2005-01-23 00:57:02 +00:00
|
|
|
if (backwardCompatible)
|
|
|
|
{
|
|
|
|
wordRaf.close();
|
|
|
|
wordRaf = new RandomAccessFile(archivo + ".wrd", "rw");
|
|
|
|
wordRaf.seek(fileSize);
|
|
|
|
wordRaf.writeShort(-1);
|
|
|
|
wordRaf.writeByte(-1);
|
|
|
|
|
|
|
|
// Because it didn't have a version number, must be version 2.
|
|
|
|
versionNumber = 2;
|
|
|
|
wordRaf.writeByte(versionNumber);
|
|
|
|
wordRaf.close();
|
|
|
|
wordRaf = new RandomAccessFile(archivo + ".wrd", "r");
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
// something is wrong
|
|
|
|
ScannerLogger sl = new ScannerLogger();
|
|
|
|
sl.writeLog("Crash\tFileSyllableListTree\t" + "size: " + fileSize + "; bytes: " + Integer.toHexString(pos));
|
|
|
|
|
|
|
|
// try to open again, but not corrupting the file
|
|
|
|
wordRaf = new RandomAccessFile(archivo + ".wrd", "r");
|
|
|
|
|
|
|
|
fileSize = wordRaf.length();
|
|
|
|
wordRaf.seek(fileSize-8L);
|
|
|
|
pos = wordRaf.readInt();
|
|
|
|
versionNumber = 3;
|
|
|
|
}
|
2004-08-13 04:47:35 +00:00
|
|
|
}
|
|
|
|
catch (Exception e)
|
|
|
|
{
|
|
|
|
// dictionary is stored on a non-writable media. Do nothing.
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2005-02-06 23:19:44 +00:00
|
|
|
defSourcesWanted = BitDictionarySource.getAllDictionaries();
|
2004-08-13 04:47:35 +00:00
|
|
|
|
|
|
|
wordRaf.seek(pos);
|
2002-10-03 19:28:09 +00:00
|
|
|
}
|
2005-02-06 23:19:44 +00:00
|
|
|
|
|
|
|
public static String[] getDictionaryDescriptions(String archivo)
|
|
|
|
{
|
|
|
|
int n;
|
|
|
|
try
|
|
|
|
{
|
|
|
|
BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(archivo + ".dic")));
|
|
|
|
SimplifiedLinkedList ll1 = new SimplifiedLinkedList(), ll2 = new SimplifiedLinkedList();
|
|
|
|
String s;
|
|
|
|
while ((s=br.readLine())!=null)
|
|
|
|
{
|
|
|
|
n = s.indexOf(",");
|
|
|
|
if (n < 0)
|
|
|
|
{
|
|
|
|
ll1.addLast(null);
|
|
|
|
ll2.addLast(s);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
ll1.addLast(s.substring(0,n).trim());
|
|
|
|
ll2.addLast(s.substring(n+1).trim());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
DictionarySource.setTags(ll2.toStringArray());
|
|
|
|
return ll1.toStringArray();
|
|
|
|
}
|
|
|
|
catch (Exception e)
|
|
|
|
{
|
|
|
|
return null;
|
|
|
|
}
|
|
|
|
}
|
2002-10-03 19:28:09 +00:00
|
|
|
|
|
|
|
public String getDef()
|
|
|
|
{
|
|
|
|
return getDefs().toString();
|
|
|
|
}
|
|
|
|
|
|
|
|
public Definitions getDefs()
|
|
|
|
{
|
|
|
|
if (def==null) return null;
|
2004-08-13 04:47:35 +00:00
|
|
|
DictionarySource defSourceAvail = defSource.intersection(defSourcesWanted);
|
|
|
|
String defs[];
|
2002-10-03 19:28:09 +00:00
|
|
|
int i, n=0;
|
2004-08-13 04:47:35 +00:00
|
|
|
|
|
|
|
if (versionNumber==2)
|
2002-10-03 19:28:09 +00:00
|
|
|
{
|
2004-08-13 04:47:35 +00:00
|
|
|
int defsAvail[] = ((BitDictionarySource) defSourceAvail).untangleDefs(), defsFound[] = ((BitDictionarySource) defSource).untangleDefs(def.length);
|
|
|
|
|
|
|
|
defs = new String[defsAvail.length];
|
|
|
|
try
|
|
|
|
{
|
|
|
|
for (i=0; i<defsAvail.length; i++)
|
|
|
|
{
|
|
|
|
while(defsAvail[i]!=defsFound[n]) n++;
|
|
|
|
defRaf.seek(def[n]);
|
|
|
|
defs[i] = defRaf.readUTF();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
catch (Exception e)
|
|
|
|
{
|
|
|
|
System.out.println(e);
|
|
|
|
return null;
|
|
|
|
}
|
2002-10-03 19:28:09 +00:00
|
|
|
}
|
2004-08-13 04:47:35 +00:00
|
|
|
else
|
2002-10-03 19:28:09 +00:00
|
|
|
{
|
2004-08-13 04:47:35 +00:00
|
|
|
ByteDictionarySource defSourceAvailBy = (ByteDictionarySource) defSourceAvail;
|
|
|
|
defs = new String [defSourceAvailBy.countDefs()];
|
|
|
|
|
|
|
|
try
|
|
|
|
{
|
|
|
|
for (i=0; i < def.length; i++)
|
|
|
|
{
|
|
|
|
if (!defSourceAvailBy.isEmpty(i))
|
|
|
|
{
|
|
|
|
defRaf.seek(def[i]);
|
|
|
|
defs[n] = defRaf.readUTF();
|
|
|
|
n++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
catch (Exception e)
|
|
|
|
{
|
|
|
|
System.out.println(e);
|
|
|
|
return null;
|
|
|
|
}
|
2002-10-03 19:28:09 +00:00
|
|
|
}
|
2004-08-13 04:47:35 +00:00
|
|
|
return new Definitions(defs, defSourceAvail);
|
2002-10-03 19:28:09 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
public boolean hasDef()
|
|
|
|
{
|
|
|
|
if (def==null) return false;
|
|
|
|
DictionarySource defSourceAvail = defSource.intersection(defSourcesWanted);
|
|
|
|
return !defSourceAvail.isEmpty();
|
|
|
|
}
|
|
|
|
|
|
|
|
public SyllableListTree lookUp(String silStr)
|
|
|
|
{
|
|
|
|
String sil;
|
|
|
|
long pos, defSource[];
|
|
|
|
DictionarySource sourceDef;
|
2004-08-13 04:47:35 +00:00
|
|
|
|
2002-10-03 19:28:09 +00:00
|
|
|
int i;
|
|
|
|
|
2005-02-06 23:19:44 +00:00
|
|
|
if (silStr==null || posLista==-1) return null;
|
2002-10-03 19:28:09 +00:00
|
|
|
try
|
|
|
|
{
|
|
|
|
wordRaf.seek(posLista);
|
|
|
|
do
|
|
|
|
{
|
|
|
|
pos = (long) wordRaf.readInt();
|
|
|
|
sil = wordRaf.readUTF();
|
2004-08-13 04:47:35 +00:00
|
|
|
|
|
|
|
if (versionNumber==2) sourceDef = new BitDictionarySource();
|
|
|
|
else sourceDef = new ByteDictionarySource();
|
|
|
|
sourceDef.read(wordRaf);
|
|
|
|
|
2002-10-03 19:28:09 +00:00
|
|
|
if (sourceDef.isEmpty()) defSource = null;
|
|
|
|
else
|
|
|
|
{
|
|
|
|
defSource = new long[sourceDef.countDefs()];
|
|
|
|
for (i=0; i<defSource.length; i++)
|
|
|
|
{
|
|
|
|
defSource[i] = (long) wordRaf.readInt();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (sil.compareTo(silStr)>0)
|
|
|
|
return null;
|
|
|
|
if (sil.equals(silStr))
|
|
|
|
return new FileSyllableListTree(sil, defSource, sourceDef, pos);
|
|
|
|
|
|
|
|
}while(sourceDef.hasBrothers());
|
|
|
|
}
|
|
|
|
catch (Exception e)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
return null;
|
2005-02-06 23:19:44 +00:00
|
|
|
}
|
2002-10-06 18:23:27 +00:00
|
|
|
}
|