Changed converters from unicode non-breaking tsheg to unicode non-breaking wylie space.

2009-02-20 23:11:17 +00:00 · 2009-02-20 23:11:17 +00:00 · 835e74c0cd
commit 835e74c0cd
parent ffb32b3207
7 changed files with 56 additions and 23 deletions
--- a/source/org/thdl/tib/scanner/BasicTibetanTranscriptionConverter.java
+++ b/source/org/thdl/tib/scanner/BasicTibetanTranscriptionConverter.java
@ -46,7 +46,6 @@ public class BasicTibetanTranscriptionConverter implements FontConverterConstant
 	private static final int WYLIE_TO_ACIP=2;
 	private static final int UNICODE_TO_WYLIE=3;
 	private static final int WYLIE_TO_UNICODE=4;
-	private static final int TIBETAN_UNICODE_RANGE[] = {3840, 4095};
 	
 	/** Converts from the Acip transliteration scheme to EWTS.*/
 	public static String acipToWylie(String acip)
@ -253,19 +252,7 @@ public class BasicTibetanTranscriptionConverter implements FontConverterConstant
 		nuevaPalabra = Manipulate.fixWazur(nuevaPalabra);
 		return nuevaPalabra;*/
 	}
-	
-	private static int getTibetanUnicodeStart(String unicode, int pos)
-	{
-		for(; pos < unicode.length(); pos++ ) if(unicode.codePointAt(pos)>=TIBETAN_UNICODE_RANGE[0] && unicode.codePointAt(pos)<=TIBETAN_UNICODE_RANGE[1]) return pos;
-		return -1;
-	}
-	
-	private static int getTibetanUnicodeEnd(String unicode, int pos)
-	{
-		for(; pos < unicode.length(); pos++ ) if(unicode.codePointAt(pos)<TIBETAN_UNICODE_RANGE[0] || unicode.codePointAt(pos)>TIBETAN_UNICODE_RANGE[1]) return pos;
-		return pos;
-	}
-    
+	    
 	/** Converts Tibetan Unicode to EWTS. */
    public static String unicodeToWylie(String unicode)
    {
@ -274,9 +261,9 @@ public class BasicTibetanTranscriptionConverter implements FontConverterConstant
    	TibetanDocument tibDoc;
    	StringBuffer errors;
    	int posStart=0, posEnd;
-    	while((posStart = getTibetanUnicodeStart(unicode, posStart))>=0)
+    	while((posStart = Manipulate.getTibetanUnicodeStart(unicode, posStart))>=0)
    	{
-    		posEnd = getTibetanUnicodeEnd(unicode, posStart+1);
+    		posEnd = Manipulate.getTibetanUnicodeEnd(unicode, posStart+1);
    		startString = unicode.substring(0, posStart);
    		tibetanString = unicode.substring(posStart, posEnd);
    		endString = unicode.substring(posEnd);
--- a/source/org/thdl/tib/scanner/Manipulate.java
+++ b/source/org/thdl/tib/scanner/Manipulate.java
@ -28,6 +28,7 @@ public class Manipulate
 	private static String bracketMarks = "<>(){}[]";
 	private static String endOfSyllableMarks = " _\t";
 	private static String allStopMarkers = endOfSyllableMarks + endOfParagraphMarks + bracketMarks;
+	private static final int TIBETAN_UNICODE_RANGE[] = {3840, 4095};

 	/* public static String[] parseFields (String s, char delimiter)
 	{
@ -204,6 +205,18 @@ public class Manipulate
 		return ch>=0xF00 && ch<=0xFFF;
 	}
 	
+	public static boolean isTibetanUnicodeLetter(char ch)
+	{
+		
+		return ch>=0xF40 && ch<=0xFBC || ch>=0xF00 && ch<=0xF03;
+	}
+	
+	public static boolean isTibetanUnicodeDigit(char ch)
+	{
+		
+		return ch>=0xF20 && ch<=0xF33;
+	}
+	
 	public static boolean guessIfUnicode(String line)
 	{
 	    char ch;
@ -415,4 +428,36 @@ public class Manipulate
    	}
    	return ncr.toString();
    }
+    
+    public static String unescape(String s) {
+    	int i=0,len=s.length();
+    	char c;
+    	StringBuffer sb = new StringBuffer(len);
+    	while (i<len) {
+    		c = s.charAt(i++);
+    		if (c=='\\') {
+    			if (i<len) {
+    				c = s.charAt(i++);
+    				if (c=='u') {
+    					c = (char) Integer.parseInt(s.substring(i,i+4),16);
+    					i += 4;
+    				} // add other cases here as desired...
+    			}} // fall through: \ escapes itself, quotes any character but u
+    		sb.append(c);
+    	}
+    	return sb.toString();
+    }
+    
+	public static int getTibetanUnicodeStart(String unicode, int pos)
+	{
+		for(; pos < unicode.length(); pos++ ) if(unicode.codePointAt(pos)>=TIBETAN_UNICODE_RANGE[0] && unicode.codePointAt(pos)<=TIBETAN_UNICODE_RANGE[1]) return pos;
+		return -1;
+	}
+	
+	public static int getTibetanUnicodeEnd(String unicode, int pos)
+	{
+		for(; pos < unicode.length(); pos++ ) if(unicode.codePointAt(pos)<TIBETAN_UNICODE_RANGE[0] || unicode.codePointAt(pos)>TIBETAN_UNICODE_RANGE[1]) return pos;
+		return pos;
+	}
+
 }