added lucene & solr libraries as well as initial tibetan language processing code + new build file

This commit is contained in:
eg3p 2007-05-14 11:40:24 +00:00
parent 3cd1f09087
commit 030f279e28
14 changed files with 282 additions and 0 deletions

View file

@ -0,0 +1,32 @@
package org.thdl.lucene;
import org.apache.lucene.analysis.*;
import java.text.DecimalFormat;
import java.io.*;
public class NumberPadder extends TokenFilter {
public static final String NUMBER_TYPE = "Number";
private static final DecimalFormat formatter = new DecimalFormat("0000000000");
public static String pad(int n) {
return formatter.format(n);
}
public NumberPadder(TokenStream input) {
super(input);
}
public Token next() throws IOException {
Token token = input.next();
if (token == null)
return null;
try {
int i = Integer.parseInt(token.termText());
Token replace = new Token(pad(i), token.startOffset(), token.endOffset(), NUMBER_TYPE);
replace.setPositionIncrement(token.getPositionIncrement());
return replace;
} catch (NumberFormatException nfe) {
return token;
}
}
}